394 files changed, 70521 insertions, 37989 deletions
diff --git a/third_party/aom/.clang-format b/third_party/aom/.clang-format
index 7837b7704..c1483199e 100644
--- a/third_party/aom/.clang-format
+++ b/third_party/aom/.clang-format
@@ -1,7 +1,7 @@
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
-# Generated with clang-format 3.9.1
+# Generated with clang-format 4.0.1
 AccessModifierOffset: -1
 AlignAfterOpenBracket: Align
 AlignConsecutiveAssignments: false
@@ -60,6 +60,8 @@ IncludeIsMainRegex: '([-_](test|unittest))?$'
 IndentCaseLabels: true
 IndentWidth:     2
 IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
@@ -78,6 +80,7 @@ PointerAlignment: Right
 ReflowComments:  true
 SortIncludes:    false
 SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
diff --git a/third_party/aom/CMakeLists.txt b/third_party/aom/CMakeLists.txt
index dfafc8c64..59338b8b5 100644
--- a/third_party/aom/CMakeLists.txt
+++ b/third_party/aom/CMakeLists.txt
@@ -12,17 +12,22 @@ cmake_minimum_required(VERSION 3.5)
 
 if (NOT EMSCRIPTEN)
   if (NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE
+    set(CMAKE_BUILD_TYPE "Release" CACHE
       "Build type: Debug, Release, RelWithDebInfo or MinSizeRel" STRING FORCE)
   endif ()
 endif ()
 
+option(ENABLE_ADOPTED_EXPERIMENTS "Enable adopted experiments." ON)
 option(ENABLE_CCACHE "Enable ccache support." OFF)
 option(ENABLE_DISTCC "Enable distcc support." OFF)
 option(ENABLE_DOCS "Enable documentation generation (doxygen required)." ON)
-option(ENABLE_NASM "Use nasm instead of yasm for x86 assembly." OFF)
+option(ENABLE_EXAMPLES "Enables build of example code." ON)
+option(ENABLE_GOMA "Enable goma support." OFF)
 option(ENABLE_IDE_TEST_HOSTING
        "Enables running tests within IDEs like Visual Studio and Xcode." OFF)
+option(ENABLE_NASM "Use nasm instead of yasm for x86 assembly." OFF)
+option(ENABLE_TOOLS "Enable applications in tools sub directory." ON)
+option(ENABLE_WERROR "Converts warnings to errors at compile time." OFF)
 
 # $BUILD_SHARED_LIBS is a CMake built-in-- it's listed here for visibility.
 option(BUILD_SHARED_LIBS "CMake should generate a shared library build." OFF)
@@ -47,6 +52,7 @@ include("${AOM_ROOT}/aom_scale/aom_scale.cmake")
 include("${AOM_ROOT}/aom_util/aom_util.cmake")
 include("${AOM_ROOT}/av1/av1.cmake")
 include("${AOM_ROOT}/test/test.cmake")
+include("${AOM_ROOT}/build/cmake/sanitizers.cmake")
 include("${AOM_ROOT}/build/cmake/util.cmake")
 
 set(AOM_RTCD_SOURCES
@@ -160,6 +166,10 @@ set(AOM_ENCODER_STATS_SOURCES
     "${AOM_ROOT}/rate_hist.c"
     "${AOM_ROOT}/rate_hist.h")
 
+set(AOM_PKG_CONFIG_SOURCES "${AOM_CONFIG_DIR}/aom.pc")
+
+set(AOM_VERSION_SOURCES "${AOM_CONFIG_DIR}/aom_version.h")
+
 set(AOM_WEBM_DECODER_SOURCES
     "${AOM_ROOT}/webmdec.cc"
     "${AOM_ROOT}/webmdec.h")
@@ -171,6 +181,48 @@ set(AOM_WEBM_ENCODER_SOURCES
 include_directories(${AOM_ROOT} ${AOM_CONFIG_DIR})
 
 # Targets
+add_library(aom_version ${AOM_VERSION_SOURCES})
+add_dummy_source_file_to_target(aom_version c)
+add_custom_command(
+  OUTPUT "${AOM_CONFIG_DIR}/aom_version.h"
+  COMMAND ${CMAKE_COMMAND}
+  ARGS -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
+    -DAOM_ROOT=${AOM_ROOT}
+    -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
+    -DPERL_EXECUTABLE=${PERL_EXECUTABLE}
+    -P "${AOM_ROOT}/build/cmake/version.cmake"
+  COMMENT "Writing aom_version.h"
+  VERBATIM)
+
+add_custom_target(aom_version_check
+  COMMAND ${CMAKE_COMMAND}
+    -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
+    -DAOM_ROOT=${AOM_ROOT}
+    -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
+    -DPERL_EXECUTABLE=${PERL_EXECUTABLE}
+    -P "${AOM_ROOT}/build/cmake/version.cmake"
+  COMMENT "Updating version info if necessary."
+  VERBATIM)
+add_dependencies(aom_version aom_version_check)
+
+if (NOT MSVC)
+  add_library(aom_pc ${AOM_PKG_CONFIG_SOURCES})
+  add_dummy_source_file_to_target(aom_pc c)
+  add_custom_command(
+    OUTPUT "${AOM_CONFIG_DIR}/aom.pc"
+    COMMAND ${CMAKE_COMMAND}
+    ARGS -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
+      -DAOM_ROOT=${AOM_ROOT}
+      -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+      -DCMAKE_PROJECT_NAME=${CMAKE_PROJECT_NAME}
+      -DCONFIG_MULTITHREAD=${CONFIG_MULTITHREAD}
+      -DHAVE_PTHREAD_H=${HAVE_PTHREAD_H}
+      -P "${AOM_ROOT}/build/cmake/pkg_config.cmake"
+      COMMENT "Writing aom.pc"
+      VERBATIM)
+  add_dependencies(aom_pc aom_version)
+endif ()
+
 # TODO(tomfinegan): Move rtcd target setup where it belongs for each rtcd
 # source.
 add_rtcd_build_step("${AOM_ROOT}/aom_dsp/aom_dsp_rtcd_defs.pl"
@@ -187,9 +239,15 @@ add_rtcd_build_step("${AOM_ROOT}/av1/common/av1_rtcd_defs.pl"
                     "av1_rtcd")
 
 add_library(aom_rtcd OBJECT ${AOM_RTCD_SOURCES})
+add_dependencies(aom_rtcd aom_version)
+
 add_library(aom_encoder_stats OBJECT ${AOM_ENCODER_STATS_SOURCES})
 add_library(aom ${AOM_SOURCES} $<TARGET_OBJECTS:aom_rtcd>)
 
+if (NOT MSVC AND NOT APPLE)
+  target_link_libraries(aom ${AOM_LIB_LINK_TYPE} m)
+endif ()
+
 # List of object and static library targets.
 set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_rtcd aom_encoder_stats aom_mem
     aom_scale aom)
@@ -209,29 +267,30 @@ foreach (aom_lib ${AOM_LIB_TARGETS})
   endif ()
 endforeach ()
 
+# Generate a stub file containing the C function usage_exit(). Users of the
+# aom_common_app_util library must define this function. This is a convenience
+# to allow omission of the function from applications that might want to use
+# other pieces of the util support without defining the usage_exit().
+file(WRITE "${AOM_CONFIG_DIR}/usage_exit.c" "void usage_exit(void) {}")
+
 #
 # Application and application support targets.
 #
-add_library(aom_common_app_util OBJECT ${AOM_COMMON_APP_UTIL_SOURCES})
-
+if (CONFIG_UNIT_TESTS OR ENABLE_EXAMPLES OR ENABLE_TOOLS)
+  add_library(aom_common_app_util OBJECT ${AOM_COMMON_APP_UTIL_SOURCES})
+  if (CONFIG_AV1_DECODER)
+    add_library(aom_decoder_app_util OBJECT ${AOM_DECODER_APP_UTIL_SOURCES})
+  endif ()
+  if (CONFIG_AV1_ENCODER)
+    add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES})
+  endif ()
+endif ()
 
-if (CONFIG_AV1_DECODER)
-  add_library(aom_decoder_app_util OBJECT ${AOM_DECODER_APP_UTIL_SOURCES})
+if (CONFIG_AV1_DECODER AND ENABLE_EXAMPLES)
   add_executable(aomdec
                  "${AOM_ROOT}/aomdec.c"
                  $<TARGET_OBJECTS:aom_common_app_util>
                  $<TARGET_OBJECTS:aom_decoder_app_util>)
-
-  if (CONFIG_ANALYZER)
-    add_executable(analyzer
-                   "${AOM_ROOT}/examples/analyzer.cc"
-                   $<TARGET_OBJECTS:aom_common_app_util>
-                   $<TARGET_OBJECTS:aom_decoder_app_util>)
-    target_link_libraries(analyzer ${AOM_LIB_LINK_TYPE} ${wxWidgets_LIBRARIES})
-    set(AOM_APP_TARGETS ${AOM_APP_TARGETS} analyzer)
-    set(AOM_DECODER_EXAMPLE_TARGETS ${AOM_DECODER_EXAMPLE_TARGETS} analyzer)
-  endif ()
-
   add_executable(decode_to_md5
                  "${AOM_ROOT}/examples/decode_to_md5.c"
                  $<TARGET_OBJECTS:aom_common_app_util>
@@ -245,6 +304,17 @@ if (CONFIG_AV1_DECODER)
                  $<TARGET_OBJECTS:aom_common_app_util>
                  $<TARGET_OBJECTS:aom_decoder_app_util>)
 
+  if (CONFIG_ANALYZER)
+    add_executable(analyzer
+                   "${AOM_ROOT}/examples/analyzer.cc"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_decoder_app_util>)
+    target_link_libraries(analyzer
+                          ${AOM_LIB_LINK_TYPE} ${wxWidgets_LIBRARIES})
+    set(AOM_APP_TARGETS ${AOM_APP_TARGETS} analyzer)
+    set(AOM_DECODER_EXAMPLE_TARGETS ${AOM_DECODER_EXAMPLE_TARGETS} analyzer)
+  endif ()
+
   if (CONFIG_INSPECTION)
     add_executable(inspect
                    "${AOM_ROOT}/examples/inspect.c"
@@ -269,50 +339,81 @@ if (CONFIG_AV1_DECODER)
     endif ()
   endif ()
 
-  # Maintain lists of example and app targets.
+  # Maintain a list of decoder example targets.
   set(AOM_DECODER_EXAMPLE_TARGETS ${AOM_DECODER_EXAMPLE_TARGETS}
-      decode_to_md5 decode_with_drops simple_decoder)
-  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} aomdec ${AOM_DECODER_EXAMPLE_TARGETS})
-endif ()
+      aomdec decode_to_md5 decode_with_drops simple_decoder)
 
+  # Add decoder examples to the app targets list.
+  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} ${AOM_DECODER_EXAMPLE_TARGETS})
+endif ()
 
 if (CONFIG_AV1_ENCODER)
-  add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES})
-  add_executable(aomenc
-                 "${AOM_ROOT}/aomenc.c"
-                 $<TARGET_OBJECTS:aom_common_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_stats>)
-  add_executable(lossless_encoder
-                 "${AOM_ROOT}/examples/lossless_encoder.c"
-                 $<TARGET_OBJECTS:aom_common_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_app_util>)
-  add_executable(set_maps
-                 "${AOM_ROOT}/examples/set_maps.c"
-                 $<TARGET_OBJECTS:aom_common_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_app_util>)
-  add_executable(simple_encoder
-                 "${AOM_ROOT}/examples/simple_encoder.c"
-                 $<TARGET_OBJECTS:aom_common_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_app_util>)
-  add_executable(twopass_encoder
-                 "${AOM_ROOT}/examples/twopass_encoder.c"
-                 $<TARGET_OBJECTS:aom_common_app_util>
-                 $<TARGET_OBJECTS:aom_encoder_app_util>)
+  if (ENABLE_EXAMPLES)
+    add_executable(aomenc
+                   "${AOM_ROOT}/aomenc.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_stats>)
+    add_executable(lossless_encoder
+                   "${AOM_ROOT}/examples/lossless_encoder.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+    add_executable(set_maps
+                   "${AOM_ROOT}/examples/set_maps.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+    add_executable(simple_encoder
+                   "${AOM_ROOT}/examples/simple_encoder.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+    add_executable(twopass_encoder
+                   "${AOM_ROOT}/examples/twopass_encoder.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+
+    # Maintain a list of encoder example targets.
+    set(AOM_ENCODER_EXAMPLE_TARGETS
+        aomenc lossless_encoder set_maps simple_encoder twopass_encoder)
+
+    # Add encoder examples to app target list.
+    set(AOM_APP_TARGETS ${AOM_APP_TARGETS} ${AOM_ENCODER_EXAMPLE_TARGETS})
+  endif ()
+
+  if (ENABLE_TOOLS AND CONFIG_ENTROPY_STATS)
+    # TODO(tomfinegan): Sort out why a simple link command with
+    # aom_entropy_optimizer.c won't work on macos, but dragging in all the
+    # helper machinery allows the link to succeed.
+    add_executable(aom_entropy_optimizer
+                   "${AOM_CONFIG_DIR}/usage_exit.c"
+                   "${AOM_ROOT}/tools/aom_entropy_optimizer.c"
+                   $<TARGET_OBJECTS:aom_common_app_util>
+                   $<TARGET_OBJECTS:aom_encoder_app_util>)
+
+    # Maintain a list of encoder tool targets.
+    set(AOM_ENCODER_TOOL_TARGETS
+        ${AOM_ENCODER_TOOL_TARGETS} aom_entropy_optimizer)
+
+      # Add encoder tools to app target list.
+    set(AOM_APP_TARGETS ${AOM_APP_TARGETS} ${AOM_ENCODER_TOOL_TARGETS})
+  endif ()
+endif ()
 
-  # Add encoder apps and examples to target lists.
-  set(AOM_ENCODER_EXAMPLE_TARGETS
-      lossless_encoder set_maps simple_encoder twopass_encoder)
-  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} aomenc ${AOM_ENCODER_EXAMPLE_TARGETS})
+if (ENABLE_EXAMPLES)
+  # Maintain a separate variable listing only the examples to facilitate
+  # installation of example programs into an examples sub directory of
+  # $AOM_DIST_DIR/bin when building the dist target.
+  set(AOM_EXAMPLE_TARGETS
+      ${AOM_DECODER_EXAMPLE_TARGETS} ${AOM_ENCODER_EXAMPLE_TARGETS})
 endif ()
 
-# Maintain a separate variable listing only the examples to facilitate
-# installation of example programs into an examples sub directory of
-# $AOM_DIST_DIR/bin when building the dist target.
-set(AOM_EXAMPLE_TARGETS
-    ${AOM_DECODER_EXAMPLE_TARGETS} ${AOM_ENCODER_EXAMPLE_TARGETS})
+if (ENABLE_TOOLS)
+  # Maintain a separate variable listing only the examples to facilitate
+  # installation of example programs into an tools sub directory of
+  # $AOM_DIST_DIR/bin when building the dist target.
+  set(AOM_TOOL_TARGETS ${AOM_DECODER_TOOL_TARGETS} ${AOM_ENCODER_TOOL_TARGETS})
+endif ()
 
-if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
+if (ENABLE_EXAMPLES AND CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
   add_executable(aom_cx_set_ref
                  "${AOM_ROOT}/examples/aom_cx_set_ref.c"
                  $<TARGET_OBJECTS:aom_common_app_util>
@@ -325,41 +426,45 @@ foreach (aom_app ${AOM_APP_TARGETS})
   target_link_libraries(${aom_app} ${AOM_LIB_LINK_TYPE} aom)
 endforeach ()
 
-if (CONFIG_LIBYUV)
-  add_library(yuv OBJECT ${AOM_LIBYUV_SOURCES})
-  if (NOT MSVC)
-    target_compile_options(yuv PRIVATE -Wno-unused-parameter)
+if (CONFIG_UNIT_TESTS OR ENABLE_EXAMPLES OR ENABLE_TOOLS)
+  if (CONFIG_LIBYUV)
+    add_library(yuv OBJECT ${AOM_LIBYUV_SOURCES})
+    if (NOT MSVC)
+      target_compile_options(yuv PRIVATE -Wno-unused-parameter)
+    endif ()
+    include_directories("${AOM_ROOT}/third_party/libyuv/include")
+
+    # Add to existing targets.
+    foreach (aom_app ${AOM_APP_TARGETS})
+      target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:yuv>)
+      set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
+    endforeach ()
   endif ()
-  include_directories("${AOM_ROOT}/third_party/libyuv/include")
 
-  # Add to existing targets.
-  foreach (aom_app ${AOM_APP_TARGETS})
-    target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:yuv>)
-    set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
-  endforeach ()
-endif ()
+  if (CONFIG_WEBM_IO)
+    add_library(webm OBJECT ${AOM_LIBWEBM_SOURCES})
+    include_directories("${AOM_ROOT}/third_party/libwebm")
+    target_compile_definitions(webm PRIVATE __STDC_CONSTANT_MACROS)
+    target_compile_definitions(webm PRIVATE __STDC_LIMIT_MACROS)
 
-if (CONFIG_WEBM_IO)
-  add_library(webm OBJECT ${AOM_LIBWEBM_SOURCES})
-  include_directories("${AOM_ROOT}/third_party/libwebm")
+    if (NOT MSVC)
+      target_compile_options(webm PRIVATE -Wno-shadow)
+    endif ()
 
-  if (NOT MSVC)
-    target_compile_options(webm PRIVATE -Wno-shadow)
-  endif ()
+    # Add to existing targets.
+    if (CONFIG_AV1_DECODER)
+      target_sources(aom_decoder_app_util PRIVATE ${AOM_WEBM_DECODER_SOURCES})
+    endif ()
 
-  # Add to existing targets.
-  if (CONFIG_AV1_DECODER)
-    target_sources(aom_decoder_app_util PRIVATE ${AOM_WEBM_DECODER_SOURCES})
-  endif ()
+    if (CONFIG_AV1_ENCODER)
+      target_sources(aom_encoder_app_util PRIVATE ${AOM_WEBM_ENCODER_SOURCES})
+    endif ()
 
-  if (CONFIG_AV1_ENCODER)
-    target_sources(aom_encoder_app_util PRIVATE ${AOM_WEBM_ENCODER_SOURCES})
+    foreach (aom_app ${AOM_APP_TARGETS})
+      target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:webm>)
+      set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
+     endforeach ()
   endif ()
-
-  foreach (aom_app ${AOM_APP_TARGETS})
-    target_sources(${aom_app} PRIVATE $<TARGET_OBJECTS:webm>)
-    set_property(TARGET ${aom_app} PROPERTY LINKER_LANGUAGE CXX)
-   endforeach ()
 endif ()
 
 if (CONFIG_UNIT_TESTS)
@@ -390,12 +495,25 @@ if (XCODE)
   endif ()
 endif ()
 
-if ("${CMAKE_GENERATOR}" MATCHES "Makefiles$" )
+if (ENABLE_EXAMPLES AND "${CMAKE_GENERATOR}" MATCHES "Makefiles$")
   # Users of the configure build expect the example targets to be built in the
   # examples sub directory of the configured build directory after running make.
   file(MAKE_DIRECTORY "${AOM_CONFIG_DIR}/examples")
-  set_target_properties(${AOM_EXAMPLE_TARGETS} PROPERTIES
-                        RUNTIME_OUTPUT_DIRECTORY "${AOM_CONFIG_DIR}/examples")
+
+  foreach (target ${AOM_EXAMPLE_TARGETS})
+    if (NOT "${target}" MATCHES "aomdec\|aomenc")
+      set_target_properties(${target} PROPERTIES
+                            RUNTIME_OUTPUT_DIRECTORY
+                            "${AOM_CONFIG_DIR}/examples")
+    endif ()
+  endforeach ()
+
+  if (ENABLE_TOOLS AND AOM_TOOL_TARGETS)
+    # The same expectation is true for tool targets.
+    file(MAKE_DIRECTORY "${AOM_CONFIG_DIR}/tools")
+    set_target_properties(${AOM_TOOL_TARGETS} PROPERTIES
+                          RUNTIME_OUTPUT_DIRECTORY "${AOM_CONFIG_DIR}/tools")
+  endif ()
 endif ()
 
 if (BUILD_SHARED_LIBS)
@@ -404,6 +522,9 @@ if (BUILD_SHARED_LIBS)
   set_target_properties(aom PROPERTIES SOVERSION 0)
 endif ()
 
+# Handle user supplied compile and link flags last to ensure they're obeyed.
+set_user_flags()
+
 # Aomedia documentation rule.
 if (ENABLE_DOCS)
   include(FindDoxygen)
@@ -426,7 +547,10 @@ set(AOM_INSTALL_INCS
     "${AOM_ROOT}/aom/aom.h")
 
 if (CONFIG_AV1_DECODER)
-  set(AOM_INSTALL_BINS ${AOM_INSTALL_BINS} aomdec)
+  if (ENABLE_EXAMPLES)
+    set(AOM_INSTALL_BINS ${AOM_INSTALL_BINS} aomdec)
+  endif ()
+
   set(AOM_INSTALL_INCS
       ${AOM_INSTALL_INCS}
       "${AOM_ROOT}/aom/aom_decoder.h"
@@ -434,11 +558,14 @@ if (CONFIG_AV1_DECODER)
 endif ()
 
 if (CONFIG_AV1_ENCODER)
+  if (ENABLE_EXAMPLES)
+    set(AOM_INSTALL_BINS ${AOM_INSTALL_BINS} aomenc)
+  endif ()
+
   set(AOM_INSTALL_INCS
       ${AOM_INSTALL_INCS}
       "${AOM_ROOT}/aom/aomcx.h"
       "${AOM_ROOT}/aom/aom_encoder.h")
-  set(AOM_INSTALL_BINS ${AOM_INSTALL_BINS} aomenc)
 endif ()
 
 set(AOM_INSTALL_LIBS aom)
@@ -448,19 +575,30 @@ install(FILES ${AOM_INSTALL_INCS}
 install(FILES "${AOM_CONFIG_DIR}/aom.pc"
         DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/pkgconfig")
 install(TARGETS ${AOM_INSTALL_LIBS} DESTINATION "${CMAKE_INSTALL_PREFIX}/lib")
-install(TARGETS ${AOM_INSTALL_BINS} DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
+
+if (ENABLE_EXAMPLES)
+  install(TARGETS ${AOM_INSTALL_BINS} DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
+endif ()
 
 # Aomedia dist rule.
-if (CONFIG_AV1_DECODER)
+if (CONFIG_AV1_DECODER AND ENABLE_EXAMPLES)
   set(AOM_DIST_APPS ${AOM_DIST_APPS} $<TARGET_FILE:aomdec>)
 endif ()
-if (CONFIG_AV1_ENCODER)
+if (CONFIG_AV1_ENCODER AND ENABLE_EXAMPLES)
   set(AOM_DIST_APPS ${AOM_DIST_APPS} $<TARGET_FILE:aomenc>)
 endif ()
 
-foreach (example ${AOM_EXAMPLE_TARGETS})
-  list(APPEND AOM_DIST_EXAMPLES $<TARGET_FILE:${example}>)
-endforeach ()
+if (ENABLE_EXAMPLES)
+  foreach (example ${AOM_EXAMPLE_TARGETS})
+    list(APPEND AOM_DIST_EXAMPLES $<TARGET_FILE:${example}>)
+  endforeach ()
+endif ()
+
+if (ENABLE_TOOLS)
+  foreach (tool ${AOM_TOOL_TARGETS})
+    list(APPEND AOM_DIST_TOOLS $<TARGET_FILE:${tool}>)
+  endforeach ()
+endif ()
 
 if (NOT AOM_DIST_DIR)
   set(AOM_DIST_DIR "${AOM_CONFIG_DIR}/dist")
@@ -473,12 +611,14 @@ add_custom_target(dist
                   -DAOM_DIST_DIR=${AOM_DIST_DIR}
                   -DAOM_DIST_APPS="${AOM_DIST_APPS}"
                   -DAOM_DIST_EXAMPLES="${AOM_DIST_EXAMPLES}"
+                  -DAOM_DIST_TOOLS="${AOM_DIST_TOOLS}"
                   -DAOM_DIST_INCLUDES="${AOM_INSTALL_INCS}"
                   -DAOM_DIST_LIBS=$<TARGET_FILE:aom>
                   -DENABLE_DOCS=${ENABLE_DOCS}
                   -P "${AOM_ROOT}/build/cmake/dist.cmake"
                   DEPENDS ${AOM_INSTALL_BINS} ${AOM_INSTALL_LIBS}
-                  ${AOM_INSTALL_INCS} ${AOM_EXAMPLE_TARGETS})
+                  ${AOM_INSTALL_INCS} ${AOM_EXAMPLE_TARGETS}
+                  ${AOM_TOOL_TARGETS})
 
 if (ENABLE_DOCS)
   add_dependencies(dist docs)
diff --git a/third_party/aom/README.md b/third_party/aom/README.md
index 9de9c8849..acedb105c 100644
--- a/third_party/aom/README.md
+++ b/third_party/aom/README.md
@@ -63,6 +63,35 @@ CMake built in variable `BUILD_SHARED_LIBS`:
 
 This is currently only supported on non-Windows targets.
 
+### Debugging
+
+Depending on the generator used there are multiple ways of going about
+debugging AV1 components. For single configuration generators like the Unix
+Makefiles generator, setting `CMAKE_BUILD_TYPE` to Debug is sufficient:
+
+~~~
+    $ cmake path/to/aom -DCMAKE_BUILD_TYPE=Debug
+~~~
+
+For Xcode, mainly because configuration controls for Xcode builds are buried two
+configuration windows deep and must be set for each subproject within the Xcode
+IDE individually, `CMAKE_CONFIGURATION_TYPES` should be set to Debug:
+
+~~~
+    $ cmake path/to/aom -G Xcode -DCMAKE_CONFIGURATION_TYPES=Debug
+~~~
+
+For Visual Studio the in-IDE configuration controls should be used. Simply set
+the IDE project configuration to Debug to allow for stepping through the code.
+
+In addition to the above it can sometimes be useful to debug only C and C++
+code. To disable all assembly code and intrinsics set `AOM_TARGET_CPU` to
+generic at generation time:
+
+~~~
+    $ cmake path/to/aom -DAOM_TARGET_CPU=generic
+~~~
+
 ### Cross compiling
 
 For the purposes of building the AV1 codec and applications and relative to the
@@ -81,7 +110,9 @@ The toolchain files available at the time of this writing are:
  - x86-ios-simulator.cmake
  - x86-linux.cmake
  - x86-macos.cmake
+ - x86-mingw-gcc.cmake
  - x86\_64-ios-simulator.cmake
+ - x86\_64-mingw-gcc.cmake
 
 The following example demonstrates use of the x86-macos.cmake toolchain file on
 a x86\_64 MacOS host:
@@ -109,6 +140,20 @@ In addition to the above it's important to note that the toolchain files
 suffixed with gcc behave differently than the others. These toolchain files
 attempt to obey the $CROSS environment variable.
 
+### Sanitizers
+
+Sanitizer integration is built-in to the CMake build system. To enable a
+sanitizer, add `-DSANITIZE=<type>` to the CMake command line. For example, to
+enable address sanitizer:
+
+~~~
+    $ cmake path/to/aom -DSANITIZE=address
+    $ make
+~~~
+
+Sanitizers available vary by platform, target, and compiler. Consult your
+compiler documentation to determine which, if any, are available.
+
 ### Microsoft Visual Studio builds
 
 Building the AV1 codec library in Microsoft Visual Studio is supported. The
@@ -249,11 +294,8 @@ test jobs. Sharded test runs can be achieved in a couple of ways.
    # Set the environment variable GTEST_TOTAL_SHARDS to 9 to run 10 test shards
    # (GTEST shard indexing is 0 based).
    $ export GTEST_TOTAL_SHARDS=9
-   $ for shard in $(seq 0 ${GTEST_TOTAL_SHARDS}); do \
-       [ ${shard} -lt ${GTEST_TOTAL_SHARDS} ] \
-         && GTEST_SHARD_INDEX=${shard} ./test_libaom & \
-     done
-
+   $ seq 0 $(( $GTEST_TOTAL_SHARDS - 1 )) \
+       | xargs -n 1 -P 0 -I{} env GTEST_SHARD_INDEX={} ./test_libaom
 ~~~
 
 To create a test shard for each CPU core available on the current system set
diff --git a/third_party/aom/aom/aom.h b/third_party/aom/aom/aom.h
index 98366b870..fecbeaf56 100644
--- a/third_party/aom/aom/aom.h
+++ b/third_party/aom/aom/aom.h
@@ -45,9 +45,7 @@ extern "C" {
 enum aom_com_control_id {
   /*!\brief pass in an external frame into decoder to be used as reference frame
    */
-  AOM_SET_REFERENCE = 1,
-  AOM_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
-  AOM_SET_POSTPROC = 3,   /**< set the decoder's post processing settings  */
+  AOM_SET_POSTPROC = 3, /**< set the decoder's post processing settings  */
   AOM_SET_DBG_COLOR_REF_FRAME =
       4, /**< set the reference frames to color for each macroblock */
   AOM_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
@@ -59,6 +57,9 @@ enum aom_com_control_id {
    * AOM_DECODER_CTRL_ID_START range next time we're ready to break the ABI.
    */
   AV1_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
+  AV1_SET_REFERENCE = 129, /**< write a frame into a reference buffer */
+  AV1_COPY_REFERENCE =
+      130, /**< get a copy of reference frame from the decoder */
   AOM_COMMON_CTRL_ID_MAX,
 
   AV1_GET_NEW_FRAME_IMAGE = 192, /**< get a pointer to the new frame */
@@ -98,25 +99,6 @@ typedef struct aom_postproc_cfg {
   int noise_level; /**< the strength of additive noise, valid range [0, 16] */
 } aom_postproc_cfg_t;
 
-/*!\brief reference frame type
- *
- * The set of macros define the type of AOM reference frames
- */
-typedef enum aom_ref_frame_type {
-  AOM_LAST_FRAME = 1,
-  AOM_GOLD_FRAME = 2,
-  AOM_ALTR_FRAME = 4
-} aom_ref_frame_type_t;
-
-/*!\brief reference frame data struct
- *
- * Define the data struct to access aom reference frames.
- */
-typedef struct aom_ref_frame {
-  aom_ref_frame_type_t frame_type; /**< which reference frame */
-  aom_image_t img;                 /**< reference frame data in image format */
-} aom_ref_frame_t;
-
 /*!\brief AV1 specific reference frame data struct
  *
  * Define the data struct to access av1 reference frames.
@@ -131,10 +113,6 @@ typedef struct av1_ref_frame {
  *
  * defines the data type for each of AOM decoder control function requires
  */
-AOM_CTRL_USE_TYPE(AOM_SET_REFERENCE, aom_ref_frame_t *)
-#define AOM_CTRL_AOM_SET_REFERENCE
-AOM_CTRL_USE_TYPE(AOM_COPY_REFERENCE, aom_ref_frame_t *)
-#define AOM_CTRL_AOM_COPY_REFERENCE
 AOM_CTRL_USE_TYPE(AOM_SET_POSTPROC, aom_postproc_cfg_t *)
 #define AOM_CTRL_AOM_SET_POSTPROC
 AOM_CTRL_USE_TYPE(AOM_SET_DBG_COLOR_REF_FRAME, int)
@@ -147,6 +125,10 @@ AOM_CTRL_USE_TYPE(AOM_SET_DBG_DISPLAY_MV, int)
 #define AOM_CTRL_AOM_SET_DBG_DISPLAY_MV
 AOM_CTRL_USE_TYPE(AV1_GET_REFERENCE, av1_ref_frame_t *)
 #define AOM_CTRL_AV1_GET_REFERENCE
+AOM_CTRL_USE_TYPE(AV1_SET_REFERENCE, av1_ref_frame_t *)
+#define AOM_CTRL_AV1_SET_REFERENCE
+AOM_CTRL_USE_TYPE(AV1_COPY_REFERENCE, av1_ref_frame_t *)
+#define AOM_CTRL_AV1_COPY_REFERENCE
 AOM_CTRL_USE_TYPE(AV1_GET_NEW_FRAME_IMAGE, aom_image_t *)
 #define AOM_CTRL_AV1_GET_NEW_FRAME_IMAGE
 
diff --git a/third_party/aom/aom/aom_decoder.h b/third_party/aom/aom/aom_decoder.h
index 509b875d3..ceab93453 100644
--- a/third_party/aom/aom/aom_decoder.h
+++ b/third_party/aom/aom/aom_decoder.h
@@ -55,8 +55,6 @@ extern "C" {
 #define AOM_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */
 #define AOM_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */
 #define AOM_CODEC_CAP_POSTPROC 0x40000  /**< Can postprocess decoded frame */
-/*!\brief Can conceal errors due to packet loss */
-#define AOM_CODEC_CAP_ERROR_CONCEALMENT 0x80000
 /*!\brief Can receive encoded frames one fragment at a time */
 #define AOM_CODEC_CAP_INPUT_FRAGMENTS 0x100000
 
@@ -73,8 +71,6 @@ extern "C" {
 #define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
 
 #define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
-/*!\brief Conceal errors in decoded frames */
-#define AOM_CODEC_USE_ERROR_CONCEALMENT 0x20000
 /*!\brief The input frame should be passed to the decoder one fragment at a
  * time */
 #define AOM_CODEC_USE_INPUT_FRAGMENTS 0x40000
diff --git a/third_party/aom/aom/aom_encoder.h b/third_party/aom/aom/aom_encoder.h
index 39f1ae3e5..208ba011f 100644
--- a/third_party/aom/aom/aom_encoder.h
+++ b/third_party/aom/aom/aom_encoder.h
@@ -372,21 +372,21 @@ typedef struct aom_codec_enc_cfg {
    */
   unsigned int rc_resize_mode;
 
-  /*!\brief Frame resize numerator.
+  /*!\brief Frame resize denominator.
    *
-   * The numerator for resize to use, assuming 16 as the denominator.
+   * The denominator for resize to use, assuming 8 as the numerator.
    *
-   * Valid numerators are  8 - 16 for now.
+   * Valid denominators are  8 - 16 for now.
    */
-  unsigned int rc_resize_numerator;
+  unsigned int rc_resize_denominator;
 
-  /*!\brief Keyframe resize numerator.
+  /*!\brief Keyframe resize denominator.
    *
-   * The numerator for resize to use, assuming 16 as the denominator.
+   * The denominator for resize to use, assuming 8 as the numerator.
    *
-   * Valid numerators are  8 - 16 for now.
+   * Valid denominators are  8 - 16 for now.
    */
-  unsigned int rc_resize_kf_numerator;
+  unsigned int rc_resize_kf_denominator;
 
   /*!\brief Frame super-resolution scaling mode.
    *
@@ -394,32 +394,50 @@ typedef struct aom_codec_enc_cfg {
    * upscaling after the encode/decode process. Taking control of upscaling and
    * using restoration filters should allow it to outperform normal resizing.
    *
-   * Mode 0 is SUPERRES_NONE, mode 1 is SUPERRES_FIXED, and mode 2 is
-   * SUPERRES_DYNAMIC.
+   * Mode 0 is SUPERRES_NONE, mode 1 is SUPERRES_FIXED, mode 2 is
+   * SUPERRES_RANDOM and mode 3 is SUPERRES_QTHRESH.
    */
   unsigned int rc_superres_mode;
 
-  /*!\brief Frame super-resolution numerator.
+  /*!\brief Frame super-resolution denominator.
    *
-   * The numerator for superres to use. If fixed it will only change if the
+   * The denominator for superres to use. If fixed it will only change if the
    * cumulative scale change over resizing and superres is greater than 1/2;
    * this forces superres to reduce scaling.
    *
-   * Valid numerators are 8 to 16.
+   * Valid denominators are 8 to 16.
    *
-   * Ignored by SUPERRES_DYNAMIC.
+   * Used only by SUPERRES_FIXED.
    */
-  unsigned int rc_superres_numerator;
+  unsigned int rc_superres_denominator;
 
-  /*!\brief Keyframe super-resolution numerator.
+  /*!\brief Keyframe super-resolution denominator.
    *
-   * The numerator for superres to use. If fixed it will only change if the
+   * The denominator for superres to use. If fixed it will only change if the
    * cumulative scale change over resizing and superres is greater than 1/2;
    * this forces superres to reduce scaling.
    *
-   * Valid numerators are 8 - 16 for now.
+   * Valid denominators are 8 - 16 for now.
    */
-  unsigned int rc_superres_kf_numerator;
+  unsigned int rc_superres_kf_denominator;
+
+  /*!\brief Frame super-resolution q threshold.
+   *
+   * The q level threshold after which superres is used.
+   * Valid values are 1 to 63.
+   *
+   * Used only by SUPERRES_QTHRESH
+   */
+  unsigned int rc_superres_qthresh;
+
+  /*!\brief Keyframe super-resolution q threshold.
+   *
+   * The q level threshold after which superres is used for key frames.
+   * Valid values are 1 to 63.
+   *
+   * Used only by SUPERRES_QTHRESH
+   */
+  unsigned int rc_superres_kf_qthresh;
 
   /*!\brief Rate control algorithm to use.
    *
@@ -601,6 +619,48 @@ typedef struct aom_codec_enc_cfg {
    * implies a large-scale tile coding.
    */
   unsigned int large_scale_tile;
+
+  /*!\brief Number of explicit tile widths specified
+   *
+   * This value indicates the number of tile widths specified
+   * A value of 0 implies no tile widths are specified.
+   * Tile widths are given in the array tile_widths[]
+   */
+  int tile_width_count;
+
+  /*!\brief Number of explicit tile heights specified
+   *
+   * This value indicates the number of tile heights specified
+   * A value of 0 implies no tile heights are specified.
+   * Tile heights are given in the array tile_heights[]
+   */
+  int tile_height_count;
+
+/*!\brief Maximum number of tile widths in tile widths array
+ *
+ * This define gives the maximum number of elements in the tile_widths array.
+ */
+#define MAX_TILE_WIDTHS 64  // maximum tile width array length
+
+  /*!\brief Array of specified tile widths
+   *
+   * This array specifies tile widths (and may be empty)
+   * The number of widths specified is given by tile_width_count
+   */
+  int tile_widths[MAX_TILE_WIDTHS];
+
+/*!\brief Maximum number of tile heights in tile heights array.
+ *
+ * This define gives the maximum number of elements in the tile_heights array.
+ */
+#define MAX_TILE_HEIGHTS 64  // maximum tile height array length
+
+  /*!\brief Array of specified tile heights
+   *
+   * This array specifies tile heights (and may be empty)
+   * The number of heights specified is given by tile_height_count
+   */
+  int tile_heights[MAX_TILE_HEIGHTS];
 } aom_codec_enc_cfg_t; /**< alias for struct aom_codec_enc_cfg */
 
 /*!\brief Initialize an encoder instance
@@ -616,7 +676,7 @@ typedef struct aom_codec_enc_cfg {
  *
  * \param[in]    ctx     Pointer to this instance's context.
  * \param[in]    iface   Pointer to the algorithm interface to use.
- * \param[in]    cfg     Configuration to use, if known. May be NULL.
+ * \param[in]    cfg     Configuration to use, if known.
  * \param[in]    flags   Bitfield of AOM_CODEC_USE_* flags
  * \param[in]    ver     ABI version number. Must be set to
  *                       AOM_ENCODER_ABI_VERSION
@@ -646,7 +706,7 @@ aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
  *
  * \param[in]    ctx     Pointer to this instance's context.
  * \param[in]    iface   Pointer to the algorithm interface to use.
- * \param[in]    cfg     Configuration to use, if known. May be NULL.
+ * \param[in]    cfg     Configuration to use, if known.
  * \param[in]    num_enc Total number of encoders.
  * \param[in]    flags   Bitfield of AOM_CODEC_USE_* flags
  * \param[in]    dsf     Pointer to down-sampling factors.
diff --git a/third_party/aom/aom/aom_image.h b/third_party/aom/aom/aom_image.h
index 34cf71552..776794960 100644
--- a/third_party/aom/aom/aom_image.h
+++ b/third_party/aom/aom/aom_image.h
@@ -35,8 +35,6 @@ extern "C" {
 #define AOM_IMG_FMT_HAS_ALPHA 0x400    /**< Image has an alpha channel. */
 #define AOM_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */
 
-#include "./aom_config.h"
-
 /*!\brief List of supported image formats */
 typedef enum aom_img_fmt {
   AOM_IMG_FMT_NONE,
@@ -71,25 +69,19 @@ typedef enum aom_img_fmt {
 
 /*!\brief List of supported color spaces */
 typedef enum aom_color_space {
-  AOM_CS_UNKNOWN = 0,   /**< Unknown */
-  AOM_CS_BT_601 = 1,    /**< BT.601 */
-  AOM_CS_BT_709 = 2,    /**< BT.709 */
-  AOM_CS_SMPTE_170 = 3, /**< SMPTE.170 */
-  AOM_CS_SMPTE_240 = 4, /**< SMPTE.240 */
-#if CONFIG_COLORSPACE_HEADERS
+  AOM_CS_UNKNOWN = 0,     /**< Unknown */
+  AOM_CS_BT_601 = 1,      /**< BT.601 */
+  AOM_CS_BT_709 = 2,      /**< BT.709 */
+  AOM_CS_SMPTE_170 = 3,   /**< SMPTE.170 */
+  AOM_CS_SMPTE_240 = 4,   /**< SMPTE.240 */
   AOM_CS_BT_2020_NCL = 5, /**< BT.2020 non-constant luminance (BT.2100) */
   AOM_CS_BT_2020_CL = 6,  /**< BT.2020 constant luminance */
   AOM_CS_SRGB = 7,        /**< sRGB */
   AOM_CS_ICTCP = 8,       /**< ICtCp, ITU-R BT.2100 */
   AOM_CS_RESERVED = 9     /**< Values 9..31 are reserved */
-#else
-  AOM_CS_BT_2020 = 5,  /**< BT.2020 */
-  AOM_CS_RESERVED = 6, /**< Reserved */
-  AOM_CS_SRGB = 7      /**< sRGB */
-#endif
-} aom_color_space_t; /**< alias for enum aom_color_space */
+} aom_color_space_t;      /**< alias for enum aom_color_space */
 
-#if CONFIG_COLORSPACE_HEADERS
+/*!\brief List of supported transfer functions */
 typedef enum aom_transfer_function {
   AOM_TF_UNKNOWN = 0,      /**< Unknown */
   AOM_TF_BT_709 = 1,       /**< BT.709 */
@@ -97,7 +89,6 @@ typedef enum aom_transfer_function {
   AOM_TF_HLG = 3,          /**< Hybrid Log-Gamma */
   AOM_TF_RESERVED = 4      /**< Values 4..31 are reserved */
 } aom_transfer_function_t; /**< alias for enum aom_transfer_function */
-#endif
 
 /*!\brief List of supported color range */
 typedef enum aom_color_range {
@@ -105,7 +96,7 @@ typedef enum aom_color_range {
   AOM_CR_FULL_RANGE = 1    /**< YUV/RGB [0..255] */
 } aom_color_range_t;       /**< alias for enum aom_color_range */
 
-#if CONFIG_COLORSPACE_HEADERS
+/*!\brief List of chroma sample positions */
 typedef enum aom_chroma_sample_position {
   AOM_CSP_UNKNOWN = 0,          /**< Unknown */
   AOM_CSP_VERTICAL = 1,         /**< Horizontally co-located with luma(0, 0)*/
@@ -113,17 +104,14 @@ typedef enum aom_chroma_sample_position {
   AOM_CSP_COLOCATED = 2,        /**< Co-located with luma(0, 0) sample */
   AOM_CSP_RESERVED = 3          /**< Reserved value */
 } aom_chroma_sample_position_t; /**< alias for enum aom_transfer_function */
-#endif
 
 /**\brief Image Descriptor */
 typedef struct aom_image {
-  aom_img_fmt_t fmt;    /**< Image Format */
-  aom_color_space_t cs; /**< Color Space */
-#if CONFIG_COLORSPACE_HEADERS
+  aom_img_fmt_t fmt;                /**< Image Format */
+  aom_color_space_t cs;             /**< Color Space */
   aom_transfer_function_t tf;       /**< transfer function */
   aom_chroma_sample_position_t csp; /**< chroma sample position */
-#endif
-  aom_color_range_t range; /**< Color Range */
+  aom_color_range_t range;          /**< Color Range */
 
   /* Image storage dimensions */
   unsigned int w;         /**< Stored image width */
@@ -252,6 +240,24 @@ void aom_img_flip(aom_image_t *img);
  */
 void aom_img_free(aom_image_t *img);
 
+/*!\brief Get the width of a plane
+ *
+ * Get the width of a plane of an image
+ *
+ * \param[in]    img       Image descriptor
+ * \param[in]    plane     Plane index
+ */
+int aom_img_plane_width(const aom_image_t *img, int plane);
+
+/*!\brief Get the height of a plane
+ *
+ * Get the height of a plane of an image
+ *
+ * \param[in]    img       Image descriptor
+ * \param[in]    plane     Plane index
+ */
+int aom_img_plane_height(const aom_image_t *img, int plane);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/aom/aomcx.h b/third_party/aom/aom/aomcx.h
index 2b87a71d8..e573f986d 100644
--- a/third_party/aom/aom/aomcx.h
+++ b/third_party/aom/aom/aomcx.h
@@ -341,7 +341,6 @@ enum aome_enc_control_id {
    */
   AV1E_SET_COLOR_SPACE,
 
-#if CONFIG_COLORSPACE_HEADERS
   /*!\brief Codec control function to set transfer function info.
    * \note Valid ranges: 0..4, default is "UNKNOWN".
    *                     0 = UNKNOWN,
@@ -360,7 +359,6 @@ enum aome_enc_control_id {
    *                     3 = RESERVED
    */
   AV1E_SET_CHROMA_SAMPLE_POSITION,
-#endif
 
   /*!\brief Codec control function to set minimum interval between GF/ARF frames
    *
@@ -458,6 +456,21 @@ enum aome_enc_control_id {
    */
   AV1E_SET_QM_MAX,
 
+  /*!\brief Codec control function to encode with dist_8x8.
+   *
+   *  The dist_8x8 is enabled automatically for model tuning parameters that
+   *  require measuring distortion at the 8x8 level. This control also allows
+   *  measuring distortion at the 8x8 level for other tuning options
+   *  (e.g., PSNR), for testing purposes.
+   *                          0 = do not use dist_8x8
+   *                          1 = use dist_8x8
+   *
+   *  By default, the encoder does not use dist_8x8
+   *
+   * Experiment: DIST_8X8
+   */
+  AV1E_SET_ENABLE_DIST_8X8,
+
   /*!\brief Codec control function to set a maximum number of tile groups.
    *
    * This will set the maximum number of tile groups. This will be
@@ -567,24 +580,31 @@ typedef enum aom_scaling_mode_1d {
   AOME_ONETWO = 3
 } AOM_SCALING_MODE;
 
+/*!\brief Max number of segments
+ *
+ * This is the limit of number of segments allowed within a frame.
+ *
+ * Currently same as "MAX_SEGMENTS" in AV1, the maximum that AV1 supports.
+ *
+ */
+#define AOM_MAX_SEGMENTS 8
+
 /*!\brief  aom region of interest map
  *
  * These defines the data structures for the region of interest map
  *
+ * TODO(yaowu): create a unit test for ROI map related APIs
+ *
  */
-
 typedef struct aom_roi_map {
-  /*! An id between 0 and 3 for each 16x16 region within a frame. */
+  /*! An id between 0 and 7 for each 8x8 region within a frame. */
   unsigned char *roi_map;
-  unsigned int rows; /**< Number of rows. */
-  unsigned int cols; /**< Number of columns. */
-  // TODO(paulwilkins): broken for AV1 which has 8 segments
-  // q and loop filter deltas for each segment
-  // (see MAX_MB_SEGMENTS)
-  int delta_q[4];  /**< Quantizer deltas. */
-  int delta_lf[4]; /**< Loop filter deltas. */
+  unsigned int rows;              /**< Number of rows. */
+  unsigned int cols;              /**< Number of columns. */
+  int delta_q[AOM_MAX_SEGMENTS];  /**< Quantizer deltas. */
+  int delta_lf[AOM_MAX_SEGMENTS]; /**< Loop filter deltas. */
   /*! Static breakout threshold for each segment. */
-  unsigned int static_threshold[4];
+  unsigned int static_threshold[AOM_MAX_SEGMENTS];
 } aom_roi_map_t;
 
 /*!\brief  aom active region map
@@ -622,7 +642,14 @@ typedef enum {
  * Changes the encoder to tune for certain types of input material.
  *
  */
-typedef enum { AOM_TUNE_PSNR, AOM_TUNE_SSIM } aom_tune_metric;
+typedef enum {
+  AOM_TUNE_PSNR,
+  AOM_TUNE_SSIM,
+#ifdef CONFIG_DIST_8X8
+  AOM_TUNE_CDEF_DIST,
+  AOM_TUNE_DAALA_DIST
+#endif
+} aom_tune_metric;
 
 /*!\cond */
 /*!\brief Encoder control function parameter type
@@ -632,7 +659,7 @@ typedef enum { AOM_TUNE_PSNR, AOM_TUNE_SSIM } aom_tune_metric;
  *
  */
 
-AOM_CTRL_USE_TYPE_DEPRECATED(AOME_USE_REFERENCE, int)
+AOM_CTRL_USE_TYPE(AOME_USE_REFERENCE, int)
 #define AOM_CTRL_AOME_USE_REFERENCE
 AOM_CTRL_USE_TYPE(AOME_SET_ROI_MAP, aom_roi_map_t *)
 #define AOM_CTRL_AOME_SET_ROI_MAP
@@ -693,6 +720,9 @@ AOM_CTRL_USE_TYPE(AV1E_SET_LOSSLESS, unsigned int)
 AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_QM, unsigned int)
 #define AOM_CTRL_AV1E_SET_ENABLE_QM
 
+AOM_CTRL_USE_TYPE(AV1E_SET_ENABLE_DIST_8X8, unsigned int)
+#define AOM_CTRL_AV1E_SET_ENABLE_DIST_8X8
+
 AOM_CTRL_USE_TYPE(AV1E_SET_QM_MIN, unsigned int)
 #define AOM_CTRL_AV1E_SET_QM_MIN
 
@@ -728,13 +758,11 @@ AOM_CTRL_USE_TYPE(AV1E_SET_TUNE_CONTENT, int) /* aom_tune_content */
 AOM_CTRL_USE_TYPE(AV1E_SET_COLOR_SPACE, int)
 #define AOM_CTRL_AV1E_SET_COLOR_SPACE
 
-#if CONFIG_COLORSPACE_HEADERS
 AOM_CTRL_USE_TYPE(AV1E_SET_TRANSFER_FUNCTION, int)
 #define AOM_CTRL_AV1E_SET_TRANSFER_FUNCTION
 
 AOM_CTRL_USE_TYPE(AV1E_SET_CHROMA_SAMPLE_POSITION, int)
 #define AOM_CTRL_AV1E_SET_CHROMA_SAMPLE_POSITION
-#endif
 
 AOM_CTRL_USE_TYPE(AV1E_SET_MIN_GF_INTERVAL, unsigned int)
 #define AOM_CTRL_AV1E_SET_MIN_GF_INTERVAL
diff --git a/third_party/aom/aom/exports_com b/third_party/aom/aom/exports_com
index 0c79fa124..897b712a7 100644
--- a/third_party/aom/aom/exports_com
+++ b/third_party/aom/aom/exports_com
@@ -12,5 +12,7 @@ text aom_codec_version_str
 text aom_img_alloc
 text aom_img_flip
 text aom_img_free
+text aom_img_plane_width
+text aom_img_plane_height
 text aom_img_set_rect
 text aom_img_wrap
diff --git a/third_party/aom/aom/src/aom_decoder.c b/third_party/aom/aom/src/aom_decoder.c
index 57a9a25d2..75eb81089 100644
--- a/third_party/aom/aom/src/aom_decoder.c
+++ b/third_party/aom/aom/src/aom_decoder.c
@@ -37,9 +37,6 @@ aom_codec_err_t aom_codec_dec_init_ver(aom_codec_ctx_t *ctx,
   else if ((flags & AOM_CODEC_USE_POSTPROC) &&
            !(iface->caps & AOM_CODEC_CAP_POSTPROC))
     res = AOM_CODEC_INCAPABLE;
-  else if ((flags & AOM_CODEC_USE_ERROR_CONCEALMENT) &&
-           !(iface->caps & AOM_CODEC_CAP_ERROR_CONCEALMENT))
-    res = AOM_CODEC_INCAPABLE;
   else if ((flags & AOM_CODEC_USE_INPUT_FRAGMENTS) &&
            !(iface->caps & AOM_CODEC_CAP_INPUT_FRAGMENTS))
     res = AOM_CODEC_INCAPABLE;
diff --git a/third_party/aom/aom/src/aom_image.c b/third_party/aom/aom/src/aom_image.c
index 0d54fd46d..e1176bd45 100644
--- a/third_party/aom/aom/src/aom_image.c
+++ b/third_party/aom/aom/src/aom_image.c
@@ -238,3 +238,17 @@ void aom_img_free(aom_image_t *img) {
     if (img->self_allocd) free(img);
   }
 }
+
+int aom_img_plane_width(const aom_image_t *img, int plane) {
+  if (plane > 0 && img->x_chroma_shift > 0)
+    return (img->d_w + 1) >> img->x_chroma_shift;
+  else
+    return img->d_w;
+}
+
+int aom_img_plane_height(const aom_image_t *img, int plane) {
+  if (plane > 0 && img->y_chroma_shift > 0)
+    return (img->d_h + 1) >> img->y_chroma_shift;
+  else
+    return img->d_h;
+}
diff --git a/third_party/aom/aom_dsp/aom_dsp.cmake b/third_party/aom/aom_dsp/aom_dsp.cmake
index 3ce6761ca..11b55caa7 100644
--- a/third_party/aom/aom_dsp/aom_dsp.cmake
+++ b/third_party/aom/aom_dsp/aom_dsp.cmake
@@ -23,6 +23,7 @@ set(AOM_DSP_COMMON_SOURCES
     "${AOM_ROOT}/aom_dsp/blend_a64_mask.c"
     "${AOM_ROOT}/aom_dsp/blend_a64_vmask.c"
     "${AOM_ROOT}/aom_dsp/intrapred.c"
+    "${AOM_ROOT}/aom_dsp/intrapred_common.h"
     "${AOM_ROOT}/aom_dsp/loopfilter.c"
     "${AOM_ROOT}/aom_dsp/prob.c"
     "${AOM_ROOT}/aom_dsp/prob.h"
@@ -45,7 +46,9 @@ set(AOM_DSP_COMMON_ASM_SSE2
 set(AOM_DSP_COMMON_INTRIN_SSE2
     "${AOM_ROOT}/aom_dsp/x86/aom_asm_stubs.c"
     "${AOM_ROOT}/aom_dsp/x86/convolve.h"
+    "${AOM_ROOT}/aom_dsp/x86/intrapred_sse2.c"
     "${AOM_ROOT}/aom_dsp/x86/txfm_common_sse2.h"
+    "${AOM_ROOT}/aom_dsp/x86/lpf_common_sse2.h"
     "${AOM_ROOT}/aom_dsp/x86/loopfilter_sse2.c")
 
 set(AOM_DSP_COMMON_ASM_SSSE3
@@ -55,6 +58,7 @@ set(AOM_DSP_COMMON_ASM_SSSE3
 
 set(AOM_DSP_COMMON_INTRIN_SSSE3
     "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c"
+    "${AOM_ROOT}/aom_dsp/x86/intrapred_ssse3.c"
     "${AOM_ROOT}/aom_dsp/x86/inv_txfm_ssse3.c")
 
 set(AOM_DSP_COMMON_INTRIN_SSE4_1
@@ -64,16 +68,28 @@ set(AOM_DSP_COMMON_INTRIN_SSE4_1
 
 set(AOM_DSP_COMMON_INTRIN_AVX2
     "${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
-    "${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c"
+    "${AOM_ROOT}/aom_dsp/x86/intrapred_avx2.c"
     "${AOM_ROOT}/aom_dsp/x86/inv_txfm_avx2.c"
+    "${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
     "${AOM_ROOT}/aom_dsp/x86/inv_txfm_common_avx2.h"
     "${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h")
 
+if (NOT CONFIG_PARALLEL_DEBLOCKING)
+  set(AOM_DSP_COMMON_INTRIN_AVX2
+      ${AOM_DSP_COMMON_INTRIN_AVX2}
+      "${AOM_ROOT}/aom_dsp/x86/loopfilter_avx2.c")
+endif ()
+
+if (NOT CONFIG_EXT_PARTITION)
+  set(AOM_DSP_COMMON_ASM_NEON
+      "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon_asm.asm"
+      "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_neon_asm.asm"
+      "${AOM_ROOT}/aom_dsp/arm/aom_convolve_avg_neon_asm.asm"
+      "${AOM_ROOT}/aom_dsp/arm/aom_convolve_copy_neon_asm.asm")
+endif ()
+
 set(AOM_DSP_COMMON_ASM_NEON
-    "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon_asm.asm"
-    "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_neon_asm.asm"
-    "${AOM_ROOT}/aom_dsp/arm/aom_convolve_avg_neon_asm.asm"
-    "${AOM_ROOT}/aom_dsp/arm/aom_convolve_copy_neon_asm.asm"
+    ${AOM_DSP_COMMON_ASM_NEON}
     "${AOM_ROOT}/aom_dsp/arm/idct16x16_1_add_neon.asm"
     "${AOM_ROOT}/aom_dsp/arm/idct16x16_add_neon.asm"
     "${AOM_ROOT}/aom_dsp/arm/idct32x32_1_add_neon.asm"
@@ -83,33 +99,53 @@ set(AOM_DSP_COMMON_ASM_NEON
     "${AOM_ROOT}/aom_dsp/arm/idct8x8_1_add_neon.asm"
     "${AOM_ROOT}/aom_dsp/arm/idct8x8_add_neon.asm"
     "${AOM_ROOT}/aom_dsp/arm/intrapred_neon_asm.asm"
-    "${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.asm"
-    "${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.asm"
-    "${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.asm"
-    "${AOM_ROOT}/aom_dsp/arm/loopfilter_mb_neon.asm"
     "${AOM_ROOT}/aom_dsp/arm/save_reg_neon.asm")
 
+if (NOT CONFIG_PARALLEL_DEBLOCKING)
+  set(AOM_DSP_COMMON_ASM_NEON
+      ${AOM_DSP_COMMON_ASM_NEON}
+      "${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.asm"
+      "${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.asm"
+      "${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.asm"
+      "${AOM_ROOT}/aom_dsp/arm/loopfilter_mb_neon.asm")
+endif ()
+
+if (NOT CONFIG_EXT_PARTITION)
+  set(AOM_DSP_COMMON_INTRIN_NEON
+      "${AOM_ROOT}/aom_dsp/arm/aom_convolve_neon.c")
+endif ()
+
 set(AOM_DSP_COMMON_INTRIN_NEON
-    "${AOM_ROOT}/aom_dsp/arm/aom_convolve_neon.c"
+    ${AOM_DSP_COMMON_INTRIN_NEON}
     "${AOM_ROOT}/aom_dsp/arm/avg_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/fwd_txfm_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/hadamard_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/idct16x16_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c"
-    "${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/sad4d_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/sad_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/subpel_variance_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/subtract_neon.c"
     "${AOM_ROOT}/aom_dsp/arm/variance_neon.c")
 
+if (NOT CONFIG_PARALLEL_DEBLOCKING)
+  set(AOM_DSP_COMMON_INTRIN_NEON
+      ${AOM_DSP_COMMON_INTRIN_NEON}
+      "${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c")
+endif ()
+
 if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
+  if (NOT CONFIG_EXT_PARTITION)
+    set(AOM_DSP_COMMON_INTRIN_NEON
+        ${AOM_DSP_COMMON_INTRIN_NEON}
+        "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon.c"
+        "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_neon.c"
+        "${AOM_ROOT}/aom_dsp/arm/aom_convolve_avg_neon.c"
+        "${AOM_ROOT}/aom_dsp/arm/aom_convolve_copy_neon.c")
+  endif ()
+
   set(AOM_DSP_COMMON_INTRIN_NEON
       ${AOM_DSP_COMMON_INTRIN_NEON}
-      "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_avg_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/aom_convolve8_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/aom_convolve_avg_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/aom_convolve_copy_neon.c"
       "${AOM_ROOT}/aom_dsp/arm/idct16x16_1_add_neon.c"
       "${AOM_ROOT}/aom_dsp/arm/idct16x16_add_neon.c"
       "${AOM_ROOT}/aom_dsp/arm/idct32x32_1_add_neon.c"
@@ -118,10 +154,15 @@ if ("${AOM_TARGET_CPU}" STREQUAL "arm64")
       "${AOM_ROOT}/aom_dsp/arm/idct4x4_add_neon.c"
       "${AOM_ROOT}/aom_dsp/arm/idct8x8_1_add_neon.c"
       "${AOM_ROOT}/aom_dsp/arm/idct8x8_add_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.c"
-      "${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.c")
+      "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c")
+
+  if (NOT CONFIG_PARALLEL_DEBLOCKING)
+    set(AOM_DSP_COMMON_INTRIN_NEON
+        ${AOM_DSP_COMMON_INTRIN_NEON}
+        "${AOM_ROOT}/aom_dsp/arm/loopfilter_16_neon.c"
+        "${AOM_ROOT}/aom_dsp/arm/loopfilter_4_neon.c"
+        "${AOM_ROOT}/aom_dsp/arm/loopfilter_8_neon.c")
+  endif ()
 endif ()
 
 set(AOM_DSP_COMMON_INTRIN_DSPR2
@@ -141,14 +182,19 @@ set(AOM_DSP_COMMON_INTRIN_DSPR2
     "${AOM_ROOT}/aom_dsp/mips/intrapred16_dspr2.c"
     "${AOM_ROOT}/aom_dsp/mips/intrapred4_dspr2.c"
     "${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c"
-    "${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.h"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_macros_dspr2.h"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_masks_dspr2.h"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_dspr2.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_vert_dspr2.c")
+    "${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h")
+
+if (NOT CONFIG_PARALLEL_DEBLOCKING)
+  set(AOM_DSP_COMMON_INTRIN_DSPR2
+      ${AOM_DSP_COMMON_INTRIN_DSPR2}
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_filters_dspr2.h"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_macros_dspr2.h"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_masks_dspr2.h"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_dspr2.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_mb_vert_dspr2.c")
+endif ()
 
 set(AOM_DSP_COMMON_INTRIN_MSA
     "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_avg_horiz_msa.c"
@@ -169,13 +215,18 @@ set(AOM_DSP_COMMON_INTRIN_MSA
     "${AOM_ROOT}/aom_dsp/mips/idct8x8_msa.c"
     "${AOM_ROOT}/aom_dsp/mips/intrapred_msa.c"
     "${AOM_ROOT}/aom_dsp/mips/inv_txfm_msa.h"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_16_msa.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_4_msa.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_8_msa.c"
-    "${AOM_ROOT}/aom_dsp/mips/loopfilter_msa.h"
     "${AOM_ROOT}/aom_dsp/mips/macros_msa.h"
     "${AOM_ROOT}/aom_dsp/mips/txfm_macros_msa.h")
 
+if (NOT CONFIG_PARALLEL_DEBLOCKING)
+  set(AOM_DSP_COMMON_INTRIN_MSA
+      ${AOM_DSP_COMMON_INTRIN_MSA}
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_16_msa.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_4_msa.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_8_msa.c"
+      "${AOM_ROOT}/aom_dsp/mips/loopfilter_msa.h")
+endif ()
+
 if (CONFIG_HIGHBITDEPTH)
   set(AOM_DSP_COMMON_ASM_SSE2
       ${AOM_DSP_COMMON_ASM_SSE2}
@@ -185,11 +236,18 @@ if (CONFIG_HIGHBITDEPTH)
 
   set(AOM_DSP_COMMON_INTRIN_SSE2
       ${AOM_DSP_COMMON_INTRIN_SSE2}
+      "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_sse2.c"
       "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_sse2.c")
 
+  set(AOM_DSP_COMMON_INTRIN_SSSE3
+      ${AOM_DSP_COMMON_INTRIN_SSSE3}
+      "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_ssse3.c")
+
   set(AOM_DSP_COMMON_INTRIN_AVX2
       ${AOM_DSP_COMMON_INTRIN_AVX2}
-      "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c")
+      "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
+      "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_avx2.c"
+      "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_avx2.c")
 else ()
   set(AOM_DSP_COMMON_INTRIN_DSPR2
       ${AOM_DSP_COMMON_INTRIN_DSPR2}
@@ -332,12 +390,10 @@ if (CONFIG_AV1_ENCODER)
         "${AOM_ROOT}/aom_dsp/mips/variance_msa.c"
         "${AOM_ROOT}/aom_dsp/mips/sub_pixel_variance_msa.c")
 
-    if (CONFIG_EXT_INTER)
       set(AOM_DSP_ENCODER_INTRIN_SSSE3
           ${AOM_DSP_ENCODER_INTRIN_SSSE3}
           "${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
           "${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c")
-    endif ()
 
     if (CONFIG_HIGHBITDEPTH)
       set(AOM_DSP_ENCODER_INTRIN_SSE2
diff --git a/third_party/aom/aom_dsp/aom_dsp.mk b/third_party/aom/aom_dsp/aom_dsp.mk
index f9d675ac0..950db0216 100644
--- a/third_party/aom/aom_dsp/aom_dsp.mk
+++ b/third_party/aom/aom_dsp/aom_dsp.mk
@@ -64,6 +64,7 @@ endif
 
 # intra predictions
 DSP_SRCS-yes += intrapred.c
+DSP_SRCS-yes += intrapred_common.h
 
 ifneq ($(CONFIG_ANS),yes)
 DSP_SRCS-yes += entcode.c
@@ -75,9 +76,16 @@ DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
 DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
 DSP_SRCS-$(HAVE_SSSE3) += x86/aom_subpixel_8t_ssse3.asm
 
+DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.c
+DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.c
+DSP_SRCS-$(HAVE_AVX2) += x86/intrapred_avx2.c
+
 ifeq ($(CONFIG_HIGHBITDEPTH),yes)
 DSP_SRCS-$(HAVE_SSE)  += x86/highbd_intrapred_sse2.asm
 DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.c
+DSP_SRCS-$(HAVE_SSSE3) += x86/highbd_intrapred_ssse3.c
+DSP_SRCS-$(HAVE_SSSE3) += x86/highbd_intrapred_avx2.c
 endif  # CONFIG_HIGHBITDEPTH
 
 DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
@@ -120,6 +128,7 @@ DSP_SRCS-$(HAVE_AVX2)  += x86/highbd_convolve_avx2.c
 endif
 DSP_SRCS-$(HAVE_SSE2)  += x86/aom_convolve_copy_sse2.asm
 
+ifneq ($(CONFIG_EXT_PARTITION),yes)
 ifeq ($(HAVE_NEON_ASM),yes)
 DSP_SRCS-yes += arm/aom_convolve_copy_neon_asm$(ASM)
 DSP_SRCS-yes += arm/aom_convolve8_avg_neon_asm$(ASM)
@@ -135,6 +144,7 @@ DSP_SRCS-yes += arm/aom_convolve_avg_neon.c
 DSP_SRCS-yes += arm/aom_convolve_neon.c
 endif  # HAVE_NEON
 endif  # HAVE_NEON_ASM
+endif  # CONFIG_EXT_PARTITION
 
 # common (msa)
 DSP_SRCS-$(HAVE_MSA) += mips/aom_convolve8_avg_horiz_msa.c
@@ -164,7 +174,10 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/convolve8_vert_dspr2.c
 DSP_SRCS-yes += loopfilter.c
 
 DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64)   += x86/loopfilter_sse2.c
-DSP_SRCS-$(HAVE_AVX2)                += x86/loopfilter_avx2.c
+DSP_SRCS-$(HAVE_SSE2)                += x86/lpf_common_sse2.h
+
+ifneq ($(CONFIG_PARALLEL_DEBLOCKING),yes)
+DSP_SRCS-$(HAVE_AVX2)   += x86/loopfilter_avx2.c
 
 DSP_SRCS-$(HAVE_NEON)   += arm/loopfilter_neon.c
 ifeq ($(HAVE_NEON_ASM),yes)
@@ -191,13 +204,16 @@ DSP_SRCS-$(HAVE_DSPR2)  += mips/loopfilter_masks_dspr2.h
 DSP_SRCS-$(HAVE_DSPR2)  += mips/loopfilter_mb_dspr2.c
 DSP_SRCS-$(HAVE_DSPR2)  += mips/loopfilter_mb_horiz_dspr2.c
 DSP_SRCS-$(HAVE_DSPR2)  += mips/loopfilter_mb_vert_dspr2.c
+endif  # !CONFIG_PARALLEL_DEBLOCKING
 
 ifeq ($(CONFIG_HIGHBITDEPTH),yes)
 DSP_SRCS-$(HAVE_SSE2)   += x86/highbd_loopfilter_sse2.c
+DSP_SRCS-$(HAVE_AVX2)   += x86/highbd_loopfilter_avx2.c
 endif  # CONFIG_HIGHBITDEPTH
 
 DSP_SRCS-yes            += txfm_common.h
 DSP_SRCS-yes            += x86/txfm_common_intrin.h
+DSP_SRCS-$(HAVE_AVX2)   += x86/common_avx2.h
 DSP_SRCS-$(HAVE_SSE2)   += x86/txfm_common_sse2.h
 DSP_SRCS-$(HAVE_SSSE3)  += x86/obmc_intrinsic_ssse3.h
 DSP_SRCS-$(HAVE_MSA)    += mips/txfm_macros_msa.h
@@ -343,10 +359,8 @@ DSP_SRCS-$(HAVE_AVX2)   += x86/sad_highbd_avx2.c
 endif
 
 ifeq ($(CONFIG_AV1_ENCODER),yes)
-ifeq ($(CONFIG_EXT_INTER),yes)
 DSP_SRCS-$(HAVE_SSSE3)  += x86/masked_sad_intrin_ssse3.c
 DSP_SRCS-$(HAVE_SSSE3)  += x86/masked_variance_intrin_ssse3.c
-endif  #CONFIG_EXT_INTER
 ifeq ($(CONFIG_MOTION_VAR),yes)
 DSP_SRCS-$(HAVE_SSE4_1) += x86/obmc_sad_sse4.c
 DSP_SRCS-$(HAVE_SSE4_1) += x86/obmc_variance_sse4.c
diff --git a/third_party/aom/aom_dsp/aom_dsp_common.h b/third_party/aom/aom_dsp/aom_dsp_common.h
index 5b104321b..3d3bcba37 100644
--- a/third_party/aom/aom_dsp/aom_dsp_common.h
+++ b/third_party/aom/aom_dsp/aom_dsp_common.h
@@ -52,10 +52,9 @@ extern "C" {
 #define UNLIKELY(v) (v)
 #endif
 
-#if CONFIG_AOM_QM
 typedef uint16_t qm_val_t;
 #define AOM_QM_BITS 5
-#endif
+
 #if CONFIG_HIGHBITDEPTH
 // Note:
 // tran_low_t  is the datatype used for final transform coefficients.
@@ -78,6 +77,10 @@ static INLINE int clamp(int value, int low, int high) {
   return value < low ? low : (value > high ? high : value);
 }
 
+static INLINE uint32_t clamp32u(uint32_t value, uint32_t low, uint32_t high) {
+  return value < low ? low : (value > high ? high : value);
+}
+
 static INLINE int64_t clamp64(int64_t value, int64_t low, int64_t high) {
   return value < low ? low : (value > high ? high : value);
 }
diff --git a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
index 0c0356870..f4f6c64d4 100755
--- a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -40,11 +40,17 @@ foreach $w (@block_widths) {
     push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
   }
 }
-if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
+if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
   push @block_sizes, [4, 16];
   push @block_sizes, [16, 4];
   push @block_sizes, [8, 32];
   push @block_sizes, [32, 8];
+  push @block_sizes, [16, 64];
+  push @block_sizes, [64, 16];
+  if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
+      push @block_sizes, [32, 128];
+      push @block_sizes, [128, 32];
+  }
 }
 
 @tx_dims = (2, 4, 8, 16, 32);
@@ -60,14 +66,9 @@ foreach $w (@tx_dims) {
   }
 }
 
-@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153/;
-if (aom_config("CONFIG_ALT_INTRA") eq "yes") {
-  push @pred_names, qw/paeth smooth/;
-  if (aom_config("CONFIG_SMOOTH_HV") eq "yes") {
-    push @pred_names, qw/smooth_v smooth_h/;
-  }
-} else {
-  push @pred_names, 'tm';
+@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153 paeth smooth/;
+if (aom_config("CONFIG_SMOOTH_HV") eq "yes") {
+  push @pred_names, qw/smooth_v smooth_h/;
 }
 
 #
@@ -86,70 +87,185 @@ foreach (@tx_sizes) {
   }
 }
 
-specialize qw/aom_d63e_predictor_4x4 ssse3/;
-specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
-specialize qw/aom_d135_predictor_4x4 neon/;
-specialize qw/aom_d153_predictor_4x4 ssse3/;
-specialize qw/aom_v_predictor_4x4 neon msa sse2/;
-if (aom_config("CONFIG_ALT_INTRA") eq "") {
-  specialize qw/aom_tm_predictor_4x4 neon dspr2 msa sse2/;
-}  # CONFIG_ALT_INTRA
-specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
 specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_top_predictor_4x8 sse2/;
+specialize qw/aom_dc_top_predictor_8x4 sse2/;
+specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_top_predictor_8x16 sse2/;
+specialize qw/aom_dc_top_predictor_16x8 sse2/;
+specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_top_predictor_16x32 sse2/;
+specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
+specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
 specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_left_predictor_4x8 sse2/;
+specialize qw/aom_dc_left_predictor_8x4 sse2/;
+specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_left_predictor_8x16 sse2/;
+specialize qw/aom_dc_left_predictor_16x8 sse2/;
+specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_left_predictor_16x32 sse2/;
+specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
+specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
 specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_128_predictor_4x8 sse2/;
+specialize qw/aom_dc_128_predictor_8x4 sse2/;
+specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_128_predictor_8x16 sse2/;
+specialize qw/aom_dc_128_predictor_16x8 sse2/;
+specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_128_predictor_16x32 sse2/;
+specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
+specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
+specialize qw/aom_v_predictor_4x4 neon msa sse2/;
+specialize qw/aom_v_predictor_4x8 sse2/;
+specialize qw/aom_v_predictor_8x4 sse2/;
+specialize qw/aom_v_predictor_8x8 neon msa sse2/;
+specialize qw/aom_v_predictor_8x16 sse2/;
+specialize qw/aom_v_predictor_16x8 sse2/;
+specialize qw/aom_v_predictor_16x16 neon msa sse2/;
+specialize qw/aom_v_predictor_16x32 sse2/;
+specialize qw/aom_v_predictor_32x16 sse2 avx2/;
+specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
+specialize qw/aom_h_predictor_4x8 sse2/;
+specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_8x4 sse2/;
 specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_8x16 sse2/;
+specialize qw/aom_h_predictor_16x8 sse2/;
+specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_16x32 sse2/;
+specialize qw/aom_h_predictor_32x16 sse2/;
+specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
+specialize qw/aom_paeth_predictor_4x4 ssse3/;
+specialize qw/aom_paeth_predictor_4x8 ssse3/;
+specialize qw/aom_paeth_predictor_8x4 ssse3/;
+specialize qw/aom_paeth_predictor_8x8 ssse3/;
+specialize qw/aom_paeth_predictor_8x16 ssse3/;
+specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_16x8 ssse3/;
+specialize qw/aom_paeth_predictor_16x16 ssse3/;
+specialize qw/aom_paeth_predictor_16x32 ssse3/;
+specialize qw/aom_paeth_predictor_32x16 ssse3/;
+specialize qw/aom_paeth_predictor_32x32 ssse3/;
+specialize qw/aom_smooth_predictor_4x4 ssse3/;
+specialize qw/aom_smooth_predictor_4x8 ssse3/;
+specialize qw/aom_smooth_predictor_8x4 ssse3/;
+specialize qw/aom_smooth_predictor_8x8 ssse3/;
+specialize qw/aom_smooth_predictor_8x16 ssse3/;
+specialize qw/aom_smooth_predictor_16x8 ssse3/;
+specialize qw/aom_smooth_predictor_16x16 ssse3/;
+specialize qw/aom_smooth_predictor_16x32 ssse3/;
+specialize qw/aom_smooth_predictor_32x16 ssse3/;
+specialize qw/aom_smooth_predictor_32x32 ssse3/;
+
+specialize qw/aom_d63e_predictor_4x4 ssse3/;
+specialize qw/aom_d135_predictor_4x4 neon/;
+specialize qw/aom_d153_predictor_4x4 ssse3/;
+specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
+specialize qw/aom_dc_predictor_4x8 sse2/;
 specialize qw/aom_d153_predictor_8x8 ssse3/;
-specialize qw/aom_v_predictor_8x8 neon msa sse2/;
-if (aom_config("CONFIG_ALT_INTRA") eq "") {
-  specialize qw/aom_tm_predictor_8x8 neon dspr2 msa sse2/;
-}  # CONFIG_ALT_INTRA
+specialize qw/aom_dc_predictor_8x4 sse2/;
 specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
-specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
-specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
-specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
-specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
+specialize qw/aom_dc_predictor_8x16 sse2/;
 specialize qw/aom_d153_predictor_16x16 ssse3/;
-specialize qw/aom_v_predictor_16x16 neon msa sse2/;
-if (aom_config("CONFIG_ALT_INTRA") eq "") {
-  specialize qw/aom_tm_predictor_16x16 neon msa sse2/;
-}  # CONFIG_ALT_INTRA
+specialize qw/aom_dc_predictor_16x8 sse2/;
 specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
-specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
-specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
-specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
-specialize qw/aom_h_predictor_32x32 neon msa sse2/;
+specialize qw/aom_dc_predictor_16x32 sse2/;
 specialize qw/aom_d153_predictor_32x32 ssse3/;
-specialize qw/aom_v_predictor_32x32 neon msa sse2/;
-if (aom_config("CONFIG_ALT_INTRA") eq "") {
-  specialize qw/aom_tm_predictor_32x32 neon msa sse2/;
-}  # CONFIG_ALT_INTRA
-specialize qw/aom_dc_predictor_32x32 msa neon sse2/;
-specialize qw/aom_dc_top_predictor_32x32 msa neon sse2/;
-specialize qw/aom_dc_left_predictor_32x32 msa neon sse2/;
-specialize qw/aom_dc_128_predictor_32x32 msa neon sse2/;
+
+specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
+specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
 
 if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
   specialize qw/aom_highbd_v_predictor_4x4 sse2/;
-  if (aom_config("CONFIG_ALT_INTRA") eq "") {
-    specialize qw/aom_highbd_tm_predictor_4x4 sse2/;
-  }  # CONFIG_ALT_INTRA
-  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
   specialize qw/aom_highbd_v_predictor_8x8 sse2/;
-  if (aom_config("CONFIG_ALT_INTRA") eq "") {
-    specialize qw/aom_highbd_tm_predictor_8x8 sse2/;
-  }  # CONFIG_ALT_INTRA
-  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
+  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
   specialize qw/aom_highbd_v_predictor_16x16 sse2/;
-  if (aom_config("CONFIG_ALT_INTRA") eq "") {
-    specialize qw/aom_highbd_tm_predictor_16x16 sse2/;
-  }  # CONFIG_ALT_INTRA
-  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
   specialize qw/aom_highbd_v_predictor_32x32 sse2/;
-  if (aom_config("CONFIG_ALT_INTRA") eq "") {
-    specialize qw/aom_highbd_tm_predictor_32x32 sse2/;
-  }  # CONFIG_ALT_INTRA
+  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
+  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
+  specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
+  specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
+  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
   specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
+  specialize qw/aom_highbd_h_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_h_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_h_predictor_8x4 sse2/;
+  specialize qw/aom_highbd_h_predictor_8x8 sse2/;
+  specialize qw/aom_highbd_h_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_h_predictor_16x8 sse2/;
+  specialize qw/aom_highbd_h_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_h_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_h_predictor_32x16 sse2/;
+  specialize qw/aom_highbd_h_predictor_32x32 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
+  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
+  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
+  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
+  
+  specialize qw/aom_highbd_d117_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_d117_predictor_8x8 ssse3/;
+  specialize qw/aom_highbd_d117_predictor_16x16 ssse3/;
+  specialize qw/aom_highbd_d117_predictor_32x32 ssse3/;
+  specialize qw/aom_highbd_d135_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_d135_predictor_8x8 ssse3/;
+  specialize qw/aom_highbd_d135_predictor_16x16 ssse3/;
+  specialize qw/aom_highbd_d135_predictor_32x32 ssse3/;
+  specialize qw/aom_highbd_d153_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_d153_predictor_8x8 ssse3/;
+  specialize qw/aom_highbd_d153_predictor_16x16 ssse3/;
+  specialize qw/aom_highbd_d153_predictor_32x32 ssse3/;
+
+  specialize qw/aom_highbd_d45e_predictor_4x4 sse2/;
+  specialize qw/aom_highbd_d45e_predictor_4x8 sse2/;
+  specialize qw/aom_highbd_d45e_predictor_8x4 sse2/;
+  specialize qw/aom_highbd_d45e_predictor_8x8 sse2/;
+  specialize qw/aom_highbd_d45e_predictor_8x16 sse2/;
+  specialize qw/aom_highbd_d45e_predictor_16x8 avx2/;
+  specialize qw/aom_highbd_d45e_predictor_16x16 avx2/;
+  specialize qw/aom_highbd_d45e_predictor_16x32 avx2/;
+  specialize qw/aom_highbd_d45e_predictor_32x16 avx2/;
+  specialize qw/aom_highbd_d45e_predictor_32x32 avx2/;
 }  # CONFIG_HIGHBITDEPTH
 
 #
@@ -257,83 +373,121 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
 # Loopfilter
 #
 add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
-$aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_vertical_16 sse2/;
+} else {
+  specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
+  $aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
+}
 
 add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
-$aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
+  specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
+  $aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
+}
 
 add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_vertical_8 sse2/;
+} else {
+  specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
+}
 
 add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
-$aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
+  specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
+  $aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
+}
 
 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_vertical_4 sse2/;
+} else {
+  specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
+}
 
 add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
+  specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
+}
 
 add_proto qw/void aom_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
-$aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_horizontal_edge_8 sse2/;
+} else {
+  specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
+  $aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;
+}
 
 add_proto qw/void aom_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
-$aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_horizontal_edge_16 sse2/;
+} else {
+  specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
+  $aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;
+}
 
 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_horizontal_8 sse2/;
+} else {
+  specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
+}
 
 add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
-$aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
+  specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
+  $aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
+}
 
 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
+  specialize qw/aom_lpf_horizontal_4 sse2/;
+} else {
+  specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
+}
 
 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
+if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
+  specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
+}
 
 if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_vertical_16 sse2/;
 
   add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-  specialize qw/aom_highbd_lpf_vertical_16_dual sse2/;
+  specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
 
   add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_vertical_8 sse2/;
 
   add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-  specialize qw/aom_highbd_lpf_vertical_8_dual sse2/;
+  specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
 
   add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_vertical_4 sse2/;
 
   add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-  specialize qw/aom_highbd_lpf_vertical_4_dual sse2/;
+  specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
-  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2/;
+  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2 avx2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2/;
+  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
   specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
 
   add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
-  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2/;
+  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
 }  # CONFIG_HIGHBITDEPTH
 
 #
@@ -412,51 +566,48 @@ if (aom_config("CONFIG_AV1") eq "yes") {
 
   add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
 
-  {
-    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct4x4_16_add sse2/;
+  add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct4x4_16_add sse2/;
 
-    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct4x4_1_add sse2/;
+  add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct4x4_1_add sse2/;
 
-    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct8x8_64_add sse2 ssse3/;
+  add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct8x8_64_add sse2 ssse3/;
 
-    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct8x8_12_add sse2 ssse3/;
+  add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct8x8_12_add sse2 ssse3/;
 
-    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct8x8_1_add sse2/;
+  add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct8x8_1_add sse2/;
 
-    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct16x16_256_add sse2 avx2/;
+  add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct16x16_256_add sse2 avx2/;
 
-    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct16x16_38_add avx2/;
+  add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct16x16_38_add avx2/;
 
-    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct16x16_10_add sse2 avx2/;
+  add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct16x16_10_add sse2 avx2/;
 
-    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct16x16_1_add sse2 avx2/;
+  add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct16x16_1_add sse2 avx2/;
 
-    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
+  add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
 
-    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
-    # Need to add 135 eob idct32x32 implementations.
-    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
+  add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
+  # Need to add 135 eob idct32x32 implementations.
+  $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
 
-    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
+  add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
 
-    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
-    specialize qw/aom_idct32x32_1_add sse2 avx2/;
-  }
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-} else {
-  {
+  add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+  specialize qw/aom_idct32x32_1_add sse2 avx2/;
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  } else {
     add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
     specialize qw/aom_idct4x4_1_add sse2 neon dspr2 msa/;
 
@@ -508,48 +659,32 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
 
     add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
     specialize qw/aom_iwht4x4_16_add msa sse2/;
-  }
-}  # CONFIG_HIGHBITDEPTH
+  }  # CONFIG_HIGHBITDEPTH
 }  # CONFIG_AV1
 
 #
 # Quantization
 #
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-    add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
-
-  }  # CONFIG_AV1_ENCODER
-} else {
-  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-    add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
 
-    add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
+  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
 
-    add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+}  # CONFIG_AV1_ENCODER
 
-    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_highbd_quantize_b sse2 avx2/;
+if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_highbd_quantize_b sse2 avx2/;
 
-    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/aom_highbd_quantize_b_32x32 sse2/;
+  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
 
-    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
 
-  }  # CONFIG_AV1_ENCODER
-} # CONFIG_AOM_QM
+}  # CONFIG_AV1_ENCODER
 if (aom_config("CONFIG_AV1") eq "yes") {
   #
   # Alpha blending with mask
@@ -575,147 +710,146 @@ if (aom_config("CONFIG_AV1") eq "yes") {
 }  # CONFIG_AV1
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-#
-# Block subtraction
-#
-add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/aom_subtract_block neon msa sse2/;
-
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
-#
-# Sum of Squares
-#
-add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
-specialize qw/aom_sum_squares_2d_i16 sse2/;
-
-add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
-specialize qw/aom_sum_squares_i16 sse2/;
-}
-
-
-#
-# Avg
-#
-if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
-  # Avg
+  # Block subtraction
   #
-  specialize qw/aom_avg_8x8 sse2 neon msa/;
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
-    specialize qw/aom_highbd_subtract_block sse2/;
+  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
+  specialize qw/aom_subtract_block neon msa sse2/;
+
+  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+    #
+    # Sum of Squares
+    #
+    add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
+    specialize qw/aom_sum_squares_2d_i16 sse2/;
+
+    add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
+    specialize qw/aom_sum_squares_i16 sse2/;
   }
 
+
   #
-  # Minmax
+  # Avg
   #
-  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-  specialize qw/aom_minmax_8x8 sse2 neon/;
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-  }
-
-  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
-  specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
+  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
+    #
+    # Avg
+    #
+    specialize qw/aom_avg_8x8 sse2 neon msa/;
+    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+      add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
+      specialize qw/aom_highbd_subtract_block sse2/;
+    }
 
-  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
-  specialize qw/aom_hadamard_16x16 sse2 neon/;
+    #
+    # Minmax
+    #
+    add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+    specialize qw/aom_minmax_8x8 sse2 neon/;
+    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+      add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+    }
 
-  add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
-  specialize qw/aom_satd sse2 neon/;
+    add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
+    specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
 
-  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
-  specialize qw/aom_int_pro_row sse2 neon/;
+    add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
+    specialize qw/aom_hadamard_16x16 sse2 neon/;
 
-  add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
-  specialize qw/aom_int_pro_col sse2 neon/;
+    add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
+    specialize qw/aom_satd sse2 neon/;
 
-  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
-  specialize qw/aom_vector_var neon sse2/;
-}  # CONFIG_AV1_ENCODER
+    add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
+    specialize qw/aom_int_pro_row sse2 neon/;
 
-#
-# Single block SAD / Single block Avg SAD
-#
-foreach (@block_sizes) {
-  ($w, $h) = @$_;
-  add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-  add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-}
+    add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
+    specialize qw/aom_int_pro_col sse2 neon/;
 
-specialize qw/aom_sad128x128    avx2          sse2/;
-specialize qw/aom_sad128x64     avx2          sse2/;
-specialize qw/aom_sad64x128     avx2          sse2/;
-specialize qw/aom_sad64x64      avx2 neon msa sse2/;
-specialize qw/aom_sad64x32      avx2      msa sse2/;
-specialize qw/aom_sad32x64      avx2      msa sse2/;
-specialize qw/aom_sad32x32      avx2 neon msa sse2/;
-specialize qw/aom_sad32x16      avx2      msa sse2/;
-specialize qw/aom_sad16x32                msa sse2/;
-specialize qw/aom_sad16x16           neon msa sse2/;
-specialize qw/aom_sad16x8            neon msa sse2/;
-specialize qw/aom_sad8x16            neon msa sse2/;
-specialize qw/aom_sad8x8             neon msa sse2/;
-specialize qw/aom_sad8x4                  msa sse2/;
-specialize qw/aom_sad4x8                  msa sse2/;
-specialize qw/aom_sad4x4             neon msa sse2/;
-
-specialize qw/aom_sad128x128_avg avx2     sse2/;
-specialize qw/aom_sad128x64_avg  avx2     sse2/;
-specialize qw/aom_sad64x128_avg  avx2     sse2/;
-specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
-specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
-specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
-specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
-specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
-specialize qw/aom_sad16x32_avg        msa sse2/;
-specialize qw/aom_sad16x16_avg        msa sse2/;
-specialize qw/aom_sad16x8_avg         msa sse2/;
-specialize qw/aom_sad8x16_avg         msa sse2/;
-specialize qw/aom_sad8x8_avg          msa sse2/;
-specialize qw/aom_sad8x4_avg          msa sse2/;
-specialize qw/aom_sad4x8_avg          msa sse2/;
-specialize qw/aom_sad4x4_avg          msa sse2/;
+    add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
+    specialize qw/aom_vector_var neon sse2/;
+  }  # CONFIG_AV1_ENCODER
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  #
+  # Single block SAD / Single block Avg SAD
+  #
   foreach (@block_sizes) {
     ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-    add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-    if ($w != 128 && $h != 128 && $w != 4) {
-      specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
-      specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
+    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+  }
+
+  specialize qw/aom_sad128x128    avx2          sse2/;
+  specialize qw/aom_sad128x64     avx2          sse2/;
+  specialize qw/aom_sad64x128     avx2          sse2/;
+  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
+  specialize qw/aom_sad64x32      avx2      msa sse2/;
+  specialize qw/aom_sad32x64      avx2      msa sse2/;
+  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
+  specialize qw/aom_sad32x16      avx2      msa sse2/;
+  specialize qw/aom_sad16x32                msa sse2/;
+  specialize qw/aom_sad16x16           neon msa sse2/;
+  specialize qw/aom_sad16x8            neon msa sse2/;
+  specialize qw/aom_sad8x16            neon msa sse2/;
+  specialize qw/aom_sad8x8             neon msa sse2/;
+  specialize qw/aom_sad8x4                  msa sse2/;
+  specialize qw/aom_sad4x8                  msa sse2/;
+  specialize qw/aom_sad4x4             neon msa sse2/;
+
+  specialize qw/aom_sad128x128_avg avx2     sse2/;
+  specialize qw/aom_sad128x64_avg  avx2     sse2/;
+  specialize qw/aom_sad64x128_avg  avx2     sse2/;
+  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
+  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
+  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
+  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
+  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
+  specialize qw/aom_sad16x32_avg        msa sse2/;
+  specialize qw/aom_sad16x16_avg        msa sse2/;
+  specialize qw/aom_sad16x8_avg         msa sse2/;
+  specialize qw/aom_sad8x16_avg         msa sse2/;
+  specialize qw/aom_sad8x8_avg          msa sse2/;
+  specialize qw/aom_sad8x4_avg          msa sse2/;
+  specialize qw/aom_sad4x8_avg          msa sse2/;
+  specialize qw/aom_sad4x4_avg          msa sse2/;
+
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    foreach (@block_sizes) {
+      ($w, $h) = @$_;
+      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+      if ($w != 128 && $h != 128 && $w != 4) {
+        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
+        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
+      }
     }
+    specialize qw/aom_highbd_sad128x128 avx2/;
+    specialize qw/aom_highbd_sad128x64  avx2/;
+    specialize qw/aom_highbd_sad64x128  avx2/;
+    specialize qw/aom_highbd_sad64x64   avx2/;
+    specialize qw/aom_highbd_sad64x32   avx2/;
+    specialize qw/aom_highbd_sad32x64   avx2/;
+    specialize qw/aom_highbd_sad32x32   avx2/;
+    specialize qw/aom_highbd_sad32x16   avx2/;
+    specialize qw/aom_highbd_sad16x32   avx2/;
+    specialize qw/aom_highbd_sad16x16   avx2/;
+    specialize qw/aom_highbd_sad16x8    avx2/;
+
+    specialize qw/aom_highbd_sad128x128_avg avx2/;
+    specialize qw/aom_highbd_sad128x64_avg  avx2/;
+    specialize qw/aom_highbd_sad64x128_avg  avx2/;
+    specialize qw/aom_highbd_sad64x64_avg   avx2/;
+    specialize qw/aom_highbd_sad64x32_avg   avx2/;
+    specialize qw/aom_highbd_sad32x64_avg   avx2/;
+    specialize qw/aom_highbd_sad32x32_avg   avx2/;
+    specialize qw/aom_highbd_sad32x16_avg   avx2/;
+    specialize qw/aom_highbd_sad16x32_avg   avx2/;
+    specialize qw/aom_highbd_sad16x16_avg   avx2/;
+    specialize qw/aom_highbd_sad16x8_avg    avx2/;
   }
-  specialize qw/aom_highbd_sad128x128 avx2/;
-  specialize qw/aom_highbd_sad128x64  avx2/;
-  specialize qw/aom_highbd_sad64x128  avx2/;
-  specialize qw/aom_highbd_sad64x64   avx2/;
-  specialize qw/aom_highbd_sad64x32   avx2/;
-  specialize qw/aom_highbd_sad32x64   avx2/;
-  specialize qw/aom_highbd_sad32x32   avx2/;
-  specialize qw/aom_highbd_sad32x16   avx2/;
-  specialize qw/aom_highbd_sad16x32   avx2/;
-  specialize qw/aom_highbd_sad16x16   avx2/;
-  specialize qw/aom_highbd_sad16x8    avx2/;
-
-  specialize qw/aom_highbd_sad128x128_avg avx2/;
-  specialize qw/aom_highbd_sad128x64_avg  avx2/;
-  specialize qw/aom_highbd_sad64x128_avg  avx2/;
-  specialize qw/aom_highbd_sad64x64_avg   avx2/;
-  specialize qw/aom_highbd_sad64x32_avg   avx2/;
-  specialize qw/aom_highbd_sad32x64_avg   avx2/;
-  specialize qw/aom_highbd_sad32x32_avg   avx2/;
-  specialize qw/aom_highbd_sad32x16_avg   avx2/;
-  specialize qw/aom_highbd_sad16x32_avg   avx2/;
-  specialize qw/aom_highbd_sad16x16_avg   avx2/;
-  specialize qw/aom_highbd_sad16x8_avg    avx2/;
-}
 
-#
-# Masked SAD
-#
-if (aom_config("CONFIG_EXT_INTER") eq "yes") {
+  #
+  # Masked SAD
+  #
   foreach (@block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
@@ -729,318 +863,326 @@ if (aom_config("CONFIG_EXT_INTER") eq "yes") {
       specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
     }
   }
-}
-
-#
-# OBMC SAD
-#
-if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
-  foreach (@block_sizes) {
-    ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
-    specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
-  }
 
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  #
+  # OBMC SAD
+  #
+  if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
     foreach (@block_sizes) {
       ($w, $h) = @$_;
-      add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
-      specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
+      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
+      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
+         specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
+      }
+    }
+
+    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+      foreach (@block_sizes) {
+        ($w, $h) = @$_;
+        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
+        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
+          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
+        }
+      }
     }
   }
-}
 
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-# Blocks of 3
-foreach $s (@block_widths) {
-  add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-}
-specialize qw/aom_sad64x64x3            msa/;
-specialize qw/aom_sad32x32x3            msa/;
-specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
-specialize qw/aom_sad8x8x3   sse3       msa/;
-specialize qw/aom_sad4x4x3   sse3       msa/;
-
-add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
-add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x16x3 sse3 msa/;
-
-# Blocks of 8
-foreach $s (@block_widths) {
-  add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-}
-specialize qw/aom_sad64x64x8        msa/;
-specialize qw/aom_sad32x32x8        msa/;
-specialize qw/aom_sad16x16x8 sse4_1 msa/;
-specialize qw/aom_sad8x8x8   sse4_1 msa/;
-specialize qw/aom_sad4x4x8   sse4_1 msa/;
-
-add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad16x8x8 sse4_1 msa/;
-add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x16x8 sse4_1 msa/;
-add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x4x8 msa/;
-add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad4x8x8 msa/;
+  #
+  # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+  #
+  # Blocks of 3
+  foreach $s (@block_widths) {
+    add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  }
+  specialize qw/aom_sad64x64x3            msa/;
+  specialize qw/aom_sad32x32x3            msa/;
+  specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
+  specialize qw/aom_sad8x8x3   sse3       msa/;
+  specialize qw/aom_sad4x4x3   sse3       msa/;
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
+  add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad8x16x3 sse3 msa/;
+
+  # Blocks of 8
   foreach $s (@block_widths) {
+    add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  }
+  specialize qw/aom_sad64x64x8        msa/;
+  specialize qw/aom_sad32x32x8        msa/;
+  specialize qw/aom_sad16x16x8 sse4_1 msa/;
+  specialize qw/aom_sad8x8x8   sse4_1 msa/;
+  specialize qw/aom_sad4x4x8   sse4_1 msa/;
+
+  add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad16x8x8 sse4_1 msa/;
+  add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad8x16x8 sse4_1 msa/;
+  add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad8x4x8 msa/;
+  add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+  specialize qw/aom_sad4x8x8 msa/;
+
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    foreach $s (@block_widths) {
+      # Blocks of 3
+      add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+      # Blocks of 8
+      add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    }
     # Blocks of 3
-    add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
     # Blocks of 8
-    add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+    add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
   }
-  # Blocks of 3
-  add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-  add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-  # Blocks of 8
-  add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-  add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-  add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-  add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
-}
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-foreach (@block_sizes) {
-  ($w, $h) = @$_;
-  add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-}
-
-specialize qw/aom_sad128x128x4d avx2          sse2/;
-specialize qw/aom_sad128x64x4d  avx2          sse2/;
-specialize qw/aom_sad64x128x4d  avx2          sse2/;
-specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
-specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
-specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
-specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
-specialize qw/aom_sad32x16x4d             msa sse2/;
-specialize qw/aom_sad16x32x4d             msa sse2/;
-specialize qw/aom_sad16x16x4d        neon msa sse2/;
-specialize qw/aom_sad16x8x4d              msa sse2/;
-specialize qw/aom_sad8x16x4d              msa sse2/;
-specialize qw/aom_sad8x8x4d               msa sse2/;
-specialize qw/aom_sad8x4x4d               msa sse2/;
-specialize qw/aom_sad4x8x4d               msa sse2/;
-specialize qw/aom_sad4x4x4d               msa sse2/;
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
   #
   # Multi-block SAD, comparing a reference to N independent blocks
   #
   foreach (@block_sizes) {
     ($w, $h) = @$_;
-    add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-    if ($w != 128 && $h != 128) {
-      specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
+    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+  }
+
+  specialize qw/aom_sad128x128x4d avx2          sse2/;
+  specialize qw/aom_sad128x64x4d  avx2          sse2/;
+  specialize qw/aom_sad64x128x4d  avx2          sse2/;
+  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
+  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
+  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
+  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
+  specialize qw/aom_sad32x16x4d             msa sse2/;
+  specialize qw/aom_sad16x32x4d             msa sse2/;
+  specialize qw/aom_sad16x16x4d        neon msa sse2/;
+  specialize qw/aom_sad16x8x4d              msa sse2/;
+  specialize qw/aom_sad8x16x4d              msa sse2/;
+  specialize qw/aom_sad8x8x4d               msa sse2/;
+  specialize qw/aom_sad8x4x4d               msa sse2/;
+  specialize qw/aom_sad4x8x4d               msa sse2/;
+  specialize qw/aom_sad4x4x4d               msa sse2/;
+
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    #
+    # Multi-block SAD, comparing a reference to N independent blocks
+    #
+    foreach (@block_sizes) {
+      ($w, $h) = @$_;
+      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
+      if ($w != 128 && $h != 128) {
+        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
+      }
     }
+    specialize qw/aom_highbd_sad128x128x4d avx2/;
+    specialize qw/aom_highbd_sad128x64x4d  avx2/;
+    specialize qw/aom_highbd_sad64x128x4d  avx2/;
+    specialize qw/aom_highbd_sad64x64x4d   avx2/;
+    specialize qw/aom_highbd_sad64x32x4d   avx2/;
+    specialize qw/aom_highbd_sad32x64x4d   avx2/;
+    specialize qw/aom_highbd_sad32x32x4d   avx2/;
+    specialize qw/aom_highbd_sad32x16x4d   avx2/;
+    specialize qw/aom_highbd_sad16x32x4d   avx2/;
+    specialize qw/aom_highbd_sad16x16x4d   avx2/;
+    specialize qw/aom_highbd_sad16x8x4d    avx2/;
   }
-  specialize qw/aom_highbd_sad128x128x4d avx2/;
-  specialize qw/aom_highbd_sad128x64x4d  avx2/;
-  specialize qw/aom_highbd_sad64x128x4d  avx2/;
-  specialize qw/aom_highbd_sad64x64x4d   avx2/;
-  specialize qw/aom_highbd_sad64x32x4d   avx2/;
-  specialize qw/aom_highbd_sad32x64x4d   avx2/;
-  specialize qw/aom_highbd_sad32x32x4d   avx2/;
-  specialize qw/aom_highbd_sad32x16x4d   avx2/;
-  specialize qw/aom_highbd_sad16x32x4d   avx2/;
-  specialize qw/aom_highbd_sad16x16x4d   avx2/;
-  specialize qw/aom_highbd_sad16x8x4d    avx2/;
-}
 
-#
-# Structured Similarity (SSIM)
-#
-if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
-  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
-  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
+  #
+  # Structured Similarity (SSIM)
+  #
+  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
 
-  add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
-  specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
+    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
 
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+      add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+    }
   }
-}
 }  # CONFIG_AV1_ENCODER
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 
-#
-# Specialty Variance
-#
-add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  #
+  # Specialty Variance
+  #
+  add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-specialize qw/aom_get16x16var sse2 avx2 neon msa/;
-specialize qw/aom_get8x8var   sse2      neon msa/;
+  specialize qw/aom_get16x16var sse2 avx2 neon msa/;
+  specialize qw/aom_get8x8var   sse2      neon msa/;
 
 
-add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 
-specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
-specialize qw/aom_mse16x8           sse2           msa/;
-specialize qw/aom_mse8x16           sse2           msa/;
-specialize qw/aom_mse8x8            sse2           msa/;
+  specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
+  specialize qw/aom_mse16x8           sse2           msa/;
+  specialize qw/aom_mse8x16           sse2           msa/;
+  specialize qw/aom_mse8x8            sse2           msa/;
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  foreach $bd (8, 10, 12) {
-    add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-    add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    foreach $bd (8, 10, 12) {
+      add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+      add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 
-    specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
-    specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
+      specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
+      specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
+    }
   }
-}
 
-#
-# ...
-#
-add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
-specialize qw/aom_upsampled_pred sse2/;
-add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
-specialize qw/aom_comp_avg_upsampled_pred sse2/;
+  #
+  # ...
+  #
+  add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
+  specialize qw/aom_upsampled_pred sse2/;
+  add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
+  specialize qw/aom_comp_avg_upsampled_pred sse2/;
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
-  specialize qw/aom_highbd_upsampled_pred sse2/;
-  add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
-  specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
-}
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
+    specialize qw/aom_highbd_upsampled_pred sse2/;
+    add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
+    specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
+  }
 
-#
-# ...
-#
-add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
-add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
+  #
+  # ...
+  #
+  add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
+  add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
 
-specialize qw/aom_get_mb_ss sse2 msa/;
-specialize qw/aom_get4x4sse_cs neon msa/;
+  specialize qw/aom_get_mb_ss sse2 msa/;
+  specialize qw/aom_get4x4sse_cs neon msa/;
 
-#
-# Variance / Subpixel Variance / Subpixel Avg Variance
-#
+  #
+  # Variance / Subpixel Variance / Subpixel Avg Variance
+  #
   add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
   add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
   add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-foreach (@block_sizes) {
-  ($w, $h) = @$_;
-  add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-}
+  foreach (@block_sizes) {
+    ($w, $h) = @$_;
+    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  }
 
-specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
-specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
-specialize qw/aom_variance32x64     sse2      neon msa/;
-specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
-specialize qw/aom_variance32x16     sse2 avx2 msa/;
-specialize qw/aom_variance16x32     sse2      msa/;
-specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
-specialize qw/aom_variance16x8      sse2      neon msa/;
-specialize qw/aom_variance8x16      sse2      neon msa/;
-specialize qw/aom_variance8x8       sse2      neon msa/;
-specialize qw/aom_variance8x4       sse2           msa/;
-specialize qw/aom_variance4x8       sse2           msa/;
-specialize qw/aom_variance4x4       sse2           msa/;
-
-specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance64x32               msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance32x64               msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance32x16               msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance16x32               msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance16x16          neon msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance16x8                msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance8x16                msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance8x8            neon msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance8x4                 msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance4x8                 msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_variance4x4                 msa sse2 ssse3/;
-
-specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance64x32      msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance32x64      msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance32x16      msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance16x32      msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance16x16      msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance16x8       msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance8x16       msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance8x8        msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance8x4        msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance4x8        msa sse2 ssse3/;
-specialize qw/aom_sub_pixel_avg_variance4x4        msa sse2 ssse3/;
-
-if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
-  specialize qw/aom_variance4x16 sse2/;
-  specialize qw/aom_variance16x4 sse2/;
-  specialize qw/aom_variance8x32 sse2/;
-  specialize qw/aom_variance32x8 sse2/;
-  specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
-}
+  specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
+  specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
+  specialize qw/aom_variance32x64     sse2      neon msa/;
+  specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
+  specialize qw/aom_variance32x16     sse2 avx2 msa/;
+  specialize qw/aom_variance16x32     sse2      msa/;
+  specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
+  specialize qw/aom_variance16x8      sse2      neon msa/;
+  specialize qw/aom_variance8x16      sse2      neon msa/;
+  specialize qw/aom_variance8x8       sse2      neon msa/;
+  specialize qw/aom_variance8x4       sse2           msa/;
+  specialize qw/aom_variance4x8       sse2           msa/;
+  specialize qw/aom_variance4x4       sse2           msa/;
+
+  specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance64x32               msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x64               msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x16               msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x32               msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x16          neon msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x8                msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x16                msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x8            neon msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x4                 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance4x8                 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance4x4                 msa sse2 ssse3/;
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  foreach $bd (8, 10, 12) {
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x32      msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x64      msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x16      msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x32      msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x16      msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x8       msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x16       msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x8        msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x4        msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x8        msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x4        msa sse2 ssse3/;
+
+  if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
+    specialize qw/aom_variance4x16 sse2/;
+    specialize qw/aom_variance16x4 sse2/;
+    specialize qw/aom_variance8x32 sse2/;
+    specialize qw/aom_variance32x8 sse2/;
+    specialize qw/aom_variance16x64 sse2/;
+    specialize qw/aom_variance64x16 sse2/;
+    specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
+    specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
+  }
 
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    foreach $bd (8, 10, 12) {
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-    foreach (@block_sizes) {
-      ($w, $h) = @$_;
-      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-      if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
-        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
-      }
-      # TODO(david.barker): When ext-partition-types is enabled, we currenly
-      # don't have vectorized 4x16 highbd variance functions
-      if ($w == 4 && $h == 4) {
-        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
-      }
-      if ($w != 128 && $h != 128 && $w != 4) {
-        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
-        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
-      }
-      if ($w == 4 && $h == 4) {
-        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
-        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
+      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+      foreach (@block_sizes) {
+        ($w, $h) = @$_;
+        add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+        if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
+          specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
+        }
+        # TODO(david.barker): When ext-partition-types is enabled, we currently
+        # don't have vectorized 4x16 highbd variance functions
+        if ($w == 4 && $h == 4) {
+          specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
+        }
+        if ($w != 128 && $h != 128 && $w != 4) {
+          specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
+          specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
+        }
+        if ($w == 4 && $h == 4) {
+          specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
+          specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
+        }
       }
     }
-  }
-}  # CONFIG_HIGHBITDEPTH
+  }  # CONFIG_HIGHBITDEPTH
 
-if (aom_config("CONFIG_EXT_INTER") eq "yes") {
-#
-# Masked Variance / Masked Subpixel Variance
-#
+  #
+  # Masked Variance / Masked Subpixel Variance
+  #
   foreach (@block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
@@ -1056,453 +1198,450 @@ if (aom_config("CONFIG_EXT_INTER") eq "yes") {
       }
     }
   }
-}
 
-#
-# OBMC Variance / OBMC Subpixel Variance
-#
-if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
-  foreach (@block_sizes) {
-    ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
-    add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
-    specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
-  }
+  #
+  # OBMC Variance / OBMC Subpixel Variance
+  #
+  if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
+    foreach (@block_sizes) {
+      ($w, $h) = @$_;
+      add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
+      add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
+      specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
+    }
 
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    foreach $bd ("_", "_10_", "_12_") {
-      foreach (@block_sizes) {
-        ($w, $h) = @$_;
-        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
-        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
-        specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
+    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+      foreach $bd ("_", "_10_", "_12_") {
+        foreach (@block_sizes) {
+          ($w, $h) = @$_;
+          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
+          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
+          specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
+        }
       }
     }
   }
-}
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
 
-add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+  add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
 
-#
-# Specialty Subpixel
-#
-add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
+  #
+  # Specialty Subpixel
+  #
+  add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
   specialize qw/aom_variance_halfpixvar16x16_h sse2/;
 
-add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
+  add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
   specialize qw/aom_variance_halfpixvar16x16_v sse2/;
 
-add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
+  add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
   specialize qw/aom_variance_halfpixvar16x16_hv sse2/;
 
-#
-# Comp Avg
-#
-add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance64x64 sse2/;
+  #
+  # Comp Avg
+  #
+  add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance64x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance64x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance64x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance32x64 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance32x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance32x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance32x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance32x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance32x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance16x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance16x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance16x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance16x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance8x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance8x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_variance8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_variance8x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-  add_proto qw/unsigned int aom_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance64x64 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance64x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance64x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance64x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance32x64 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance32x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance32x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance32x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance32x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance32x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance16x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance16x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance16x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance16x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance8x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance8x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_variance8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_variance8x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-  add_proto qw/unsigned int aom_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance64x64 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance64x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance64x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance64x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance32x64 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance32x64 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance32x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance32x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance32x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance32x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance16x32 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance16x32 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance16x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance16x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance8x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance8x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_variance8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_variance8x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-  add_proto qw/void aom_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  add_proto qw/void aom_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-  add_proto qw/void aom_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  add_proto qw/void aom_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-  add_proto qw/void aom_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-  add_proto qw/void aom_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+    add_proto qw/void aom_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-  add_proto qw/unsigned int aom_highbd_8_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_mse16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_mse16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_8_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_8_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_8_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_8_mse8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_8_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_8_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_8_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_8_mse8x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_mse16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_mse16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_10_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_10_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_10_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_10_mse8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_10_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_10_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_10_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_10_mse8x8 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_mse16x16 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_mse16x16 sse2/;
 
-  add_proto qw/unsigned int aom_highbd_12_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_12_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  add_proto qw/unsigned int aom_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
-  specialize qw/aom_highbd_12_mse8x8 sse2/;
+    add_proto qw/unsigned int aom_highbd_12_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_12_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    add_proto qw/unsigned int aom_highbd_12_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+    specialize qw/aom_highbd_12_mse8x8 sse2/;
 
-  add_proto qw/void aom_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
+    add_proto qw/void aom_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
 
-  #
-  # Subpixel Variance
-  #
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
+    #
+    # Subpixel Variance
+    #
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
 
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
 
-}  # CONFIG_HIGHBITDEPTH
+  }  # CONFIG_HIGHBITDEPTH
 
-if (aom_config("CONFIG_EXT_INTER") eq "yes") {
   add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
   add_proto qw/void aom_comp_mask_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
     add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
   }
-}
 
 }  # CONFIG_AV1_ENCODER
 
diff --git a/third_party/aom/aom_dsp/arm/intrapred_neon.c b/third_party/aom/aom_dsp/arm/intrapred_neon.c
index 2dc5b2e56..7d5f64004 100644
--- a/third_party/aom/aom_dsp/arm/intrapred_neon.c
+++ b/third_party/aom/aom_dsp/arm/intrapred_neon.c
@@ -529,229 +529,4 @@ void aom_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
     }
   }
 }
-
-void aom_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
-                               const uint8_t *above, const uint8_t *left) {
-  int i;
-  uint16x8_t q1u16, q3u16;
-  int16x8_t q1s16;
-  uint8x8_t d0u8 = vdup_n_u8(0);
-  uint32x2_t d2u32 = vdup_n_u32(0);
-
-  d0u8 = vld1_dup_u8(above - 1);
-  d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
-  q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
-  for (i = 0; i < 4; i++, dst += stride) {
-    q1u16 = vdupq_n_u16((uint16_t)left[i]);
-    q1s16 =
-        vaddq_s16(vreinterpretq_s16_u16(q1u16), vreinterpretq_s16_u16(q3u16));
-    d0u8 = vqmovun_s16(q1s16);
-    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
-  }
-}
-
-void aom_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
-                               const uint8_t *above, const uint8_t *left) {
-  int j;
-  uint16x8_t q0u16, q3u16, q10u16;
-  int16x8_t q0s16;
-  uint16x4_t d20u16;
-  uint8x8_t d0u8, d2u8, d30u8;
-
-  d0u8 = vld1_dup_u8(above - 1);
-  d30u8 = vld1_u8(left);
-  d2u8 = vld1_u8(above);
-  q10u16 = vmovl_u8(d30u8);
-  q3u16 = vsubl_u8(d2u8, d0u8);
-  d20u16 = vget_low_u16(q10u16);
-  for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
-    q0u16 = vdupq_lane_u16(d20u16, 0);
-    q0s16 =
-        vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
-    d0u8 = vqmovun_s16(q0s16);
-    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-    dst += stride;
-    q0u16 = vdupq_lane_u16(d20u16, 1);
-    q0s16 =
-        vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
-    d0u8 = vqmovun_s16(q0s16);
-    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-    dst += stride;
-    q0u16 = vdupq_lane_u16(d20u16, 2);
-    q0s16 =
-        vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
-    d0u8 = vqmovun_s16(q0s16);
-    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-    dst += stride;
-    q0u16 = vdupq_lane_u16(d20u16, 3);
-    q0s16 =
-        vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
-    d0u8 = vqmovun_s16(q0s16);
-    vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
-    dst += stride;
-  }
-}
-
-void aom_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                 const uint8_t *above, const uint8_t *left) {
-  int j, k;
-  uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
-  uint8x16_t q0u8, q1u8;
-  int16x8_t q0s16, q1s16, q8s16, q11s16;
-  uint16x4_t d20u16;
-  uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
-
-  q0u8 = vld1q_dup_u8(above - 1);
-  q1u8 = vld1q_u8(above);
-  q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
-  q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
-  for (k = 0; k < 2; k++, left += 8) {
-    d18u8 = vld1_u8(left);
-    q10u16 = vmovl_u8(d18u8);
-    d20u16 = vget_low_u16(q10u16);
-    for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
-      q0u16 = vdupq_lane_u16(d20u16, 0);
-      q8u16 = vdupq_lane_u16(d20u16, 1);
-      q1s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q2u16));
-      q0s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q3u16));
-      q11s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q2u16));
-      q8s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q3u16));
-      d2u8 = vqmovun_s16(q1s16);
-      d3u8 = vqmovun_s16(q0s16);
-      d22u8 = vqmovun_s16(q11s16);
-      d23u8 = vqmovun_s16(q8s16);
-      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
-      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
-      dst += stride;
-      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
-      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
-      dst += stride;
-
-      q0u16 = vdupq_lane_u16(d20u16, 2);
-      q8u16 = vdupq_lane_u16(d20u16, 3);
-      q1s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q2u16));
-      q0s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q3u16));
-      q11s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q2u16));
-      q8s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q3u16));
-      d2u8 = vqmovun_s16(q1s16);
-      d3u8 = vqmovun_s16(q0s16);
-      d22u8 = vqmovun_s16(q11s16);
-      d23u8 = vqmovun_s16(q8s16);
-      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
-      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
-      dst += stride;
-      vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
-      vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
-      dst += stride;
-    }
-  }
-}
-
-void aom_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
-                                 const uint8_t *above, const uint8_t *left) {
-  int j, k;
-  uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
-  uint8x16_t q0u8, q1u8, q2u8;
-  int16x8_t q12s16, q13s16, q14s16, q15s16;
-  uint16x4_t d6u16;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
-
-  q0u8 = vld1q_dup_u8(above - 1);
-  q1u8 = vld1q_u8(above);
-  q2u8 = vld1q_u8(above + 16);
-  q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
-  q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
-  q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
-  q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
-  for (k = 0; k < 4; k++, left += 8) {
-    d26u8 = vld1_u8(left);
-    q3u16 = vmovl_u8(d26u8);
-    d6u16 = vget_low_u16(q3u16);
-    for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
-      q0u16 = vdupq_lane_u16(d6u16, 0);
-      q12s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
-      q13s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
-      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q10u16));
-      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q11u16));
-      d0u8 = vqmovun_s16(q12s16);
-      d1u8 = vqmovun_s16(q13s16);
-      d2u8 = vqmovun_s16(q14s16);
-      d3u8 = vqmovun_s16(q15s16);
-      q0u8 = vcombine_u8(d0u8, d1u8);
-      q1u8 = vcombine_u8(d2u8, d3u8);
-      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-      dst += stride;
-
-      q0u16 = vdupq_lane_u16(d6u16, 1);
-      q12s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
-      q13s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
-      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q10u16));
-      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q11u16));
-      d0u8 = vqmovun_s16(q12s16);
-      d1u8 = vqmovun_s16(q13s16);
-      d2u8 = vqmovun_s16(q14s16);
-      d3u8 = vqmovun_s16(q15s16);
-      q0u8 = vcombine_u8(d0u8, d1u8);
-      q1u8 = vcombine_u8(d2u8, d3u8);
-      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-      dst += stride;
-
-      q0u16 = vdupq_lane_u16(d6u16, 2);
-      q12s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
-      q13s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
-      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q10u16));
-      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q11u16));
-      d0u8 = vqmovun_s16(q12s16);
-      d1u8 = vqmovun_s16(q13s16);
-      d2u8 = vqmovun_s16(q14s16);
-      d3u8 = vqmovun_s16(q15s16);
-      q0u8 = vcombine_u8(d0u8, d1u8);
-      q1u8 = vcombine_u8(d2u8, d3u8);
-      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-      dst += stride;
-
-      q0u16 = vdupq_lane_u16(d6u16, 3);
-      q12s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
-      q13s16 =
-          vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
-      q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q10u16));
-      q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
-                         vreinterpretq_s16_u16(q11u16));
-      d0u8 = vqmovun_s16(q12s16);
-      d1u8 = vqmovun_s16(q13s16);
-      d2u8 = vqmovun_s16(q14s16);
-      d3u8 = vqmovun_s16(q15s16);
-      q0u8 = vcombine_u8(d0u8, d1u8);
-      q1u8 = vcombine_u8(d2u8, d3u8);
-      vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
-      vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
-      dst += stride;
-    }
-  }
-}
 #endif  // !HAVE_NEON_ASM
diff --git a/third_party/aom/aom_dsp/arm/intrapred_neon_asm.asm b/third_party/aom/aom_dsp/arm/intrapred_neon_asm.asm
index 7d04d3553..fba9c1b5b 100644
--- a/third_party/aom/aom_dsp/arm/intrapred_neon_asm.asm
+++ b/third_party/aom/aom_dsp/arm/intrapred_neon_asm.asm
@@ -19,10 +19,6 @@
     EXPORT  |aom_h_predictor_8x8_neon|
     EXPORT  |aom_h_predictor_16x16_neon|
     EXPORT  |aom_h_predictor_32x32_neon|
-    EXPORT  |aom_tm_predictor_4x4_neon|
-    EXPORT  |aom_tm_predictor_8x8_neon|
-    EXPORT  |aom_tm_predictor_16x16_neon|
-    EXPORT  |aom_tm_predictor_32x32_neon|
     ARM
     REQUIRE8
     PRESERVE8
@@ -289,345 +285,3 @@ loop_h
     bgt                 loop_h
     bx                  lr
     ENDP                ; |aom_h_predictor_32x32_neon|
-
-;void aom_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
-;                                const uint8_t *above,
-;                                const uint8_t *left)
-; r0  uint8_t *dst
-; r1  ptrdiff_t y_stride
-; r2  const uint8_t *above
-; r3  const uint8_t *left
-
-|aom_tm_predictor_4x4_neon| PROC
-    ; Load ytop_left = above[-1];
-    sub                 r12, r2, #1
-    vld1.u8             {d0[]}, [r12]
-
-    ; Load above 4 pixels
-    vld1.32             {d2[0]}, [r2]
-
-    ; Compute above - ytop_left
-    vsubl.u8            q3, d2, d0
-
-    ; Load left row by row and compute left + (above - ytop_left)
-    ; 1st row and 2nd row
-    vld1.u8             {d2[]}, [r3]!
-    vld1.u8             {d4[]}, [r3]!
-    vmovl.u8            q1, d2
-    vmovl.u8            q2, d4
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqmovun.s16         d0, q1
-    vqmovun.s16         d1, q2
-    vst1.32             {d0[0]}, [r0], r1
-    vst1.32             {d1[0]}, [r0], r1
-
-    ; 3rd row and 4th row
-    vld1.u8             {d2[]}, [r3]!
-    vld1.u8             {d4[]}, [r3]
-    vmovl.u8            q1, d2
-    vmovl.u8            q2, d4
-    vadd.s16            q1, q1, q3
-    vadd.s16            q2, q2, q3
-    vqmovun.s16         d0, q1
-    vqmovun.s16         d1, q2
-    vst1.32             {d0[0]}, [r0], r1
-    vst1.32             {d1[0]}, [r0], r1
-    bx                  lr
-    ENDP                ; |aom_tm_predictor_4x4_neon|
-
-;void aom_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
-;                                const uint8_t *above,
-;                                const uint8_t *left)
-; r0  uint8_t *dst
-; r1  ptrdiff_t y_stride
-; r2  const uint8_t *above
-; r3  const uint8_t *left
-
-|aom_tm_predictor_8x8_neon| PROC
-    ; Load ytop_left = above[-1];
-    sub                 r12, r2, #1
-    vld1.8              {d0[]}, [r12]
-
-    ; preload 8 left
-    vld1.8              {d30}, [r3]
-
-    ; Load above 8 pixels
-    vld1.64             {d2}, [r2]
-
-    vmovl.u8            q10, d30
-
-    ; Compute above - ytop_left
-    vsubl.u8            q3, d2, d0
-
-    ; Load left row by row and compute left + (above - ytop_left)
-    ; 1st row and 2nd row
-    vdup.16             q0, d20[0]
-    vdup.16             q1, d20[1]
-    vadd.s16            q0, q3, q0
-    vadd.s16            q1, q3, q1
-
-    ; 3rd row and 4th row
-    vdup.16             q8, d20[2]
-    vdup.16             q9, d20[3]
-    vadd.s16            q8, q3, q8
-    vadd.s16            q9, q3, q9
-
-    vqmovun.s16         d0, q0
-    vqmovun.s16         d1, q1
-    vqmovun.s16         d2, q8
-    vqmovun.s16         d3, q9
-
-    vst1.64             {d0}, [r0], r1
-    vst1.64             {d1}, [r0], r1
-    vst1.64             {d2}, [r0], r1
-    vst1.64             {d3}, [r0], r1
-
-    ; 5th row and 6th row
-    vdup.16             q0, d21[0]
-    vdup.16             q1, d21[1]
-    vadd.s16            q0, q3, q0
-    vadd.s16            q1, q3, q1
-
-    ; 7th row and 8th row
-    vdup.16             q8, d21[2]
-    vdup.16             q9, d21[3]
-    vadd.s16            q8, q3, q8
-    vadd.s16            q9, q3, q9
-
-    vqmovun.s16         d0, q0
-    vqmovun.s16         d1, q1
-    vqmovun.s16         d2, q8
-    vqmovun.s16         d3, q9
-
-    vst1.64             {d0}, [r0], r1
-    vst1.64             {d1}, [r0], r1
-    vst1.64             {d2}, [r0], r1
-    vst1.64             {d3}, [r0], r1
-
-    bx                  lr
-    ENDP                ; |aom_tm_predictor_8x8_neon|
-
-;void aom_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
-;                                const uint8_t *above,
-;                                const uint8_t *left)
-; r0  uint8_t *dst
-; r1  ptrdiff_t y_stride
-; r2  const uint8_t *above
-; r3  const uint8_t *left
-
-|aom_tm_predictor_16x16_neon| PROC
-    ; Load ytop_left = above[-1];
-    sub                 r12, r2, #1
-    vld1.8              {d0[]}, [r12]
-
-    ; Load above 8 pixels
-    vld1.8              {q1}, [r2]
-
-    ; preload 8 left into r12
-    vld1.8              {d18}, [r3]!
-
-    ; Compute above - ytop_left
-    vsubl.u8            q2, d2, d0
-    vsubl.u8            q3, d3, d0
-
-    vmovl.u8            q10, d18
-
-    ; Load left row by row and compute left + (above - ytop_left)
-    ; Process 8 rows in each single loop and loop 2 times to process 16 rows.
-    mov                 r2, #2
-
-loop_16x16_neon
-    ; Process two rows.
-    vdup.16             q0, d20[0]
-    vdup.16             q8, d20[1]
-    vadd.s16            q1, q0, q2
-    vadd.s16            q0, q0, q3
-    vadd.s16            q11, q8, q2
-    vadd.s16            q8, q8, q3
-    vqmovun.s16         d2, q1
-    vqmovun.s16         d3, q0
-    vqmovun.s16         d22, q11
-    vqmovun.s16         d23, q8
-    vdup.16             q0, d20[2]                  ; proload next 2 rows data
-    vdup.16             q8, d20[3]
-    vst1.64             {d2,d3}, [r0], r1
-    vst1.64             {d22,d23}, [r0], r1
-
-    ; Process two rows.
-    vadd.s16            q1, q0, q2
-    vadd.s16            q0, q0, q3
-    vadd.s16            q11, q8, q2
-    vadd.s16            q8, q8, q3
-    vqmovun.s16         d2, q1
-    vqmovun.s16         d3, q0
-    vqmovun.s16         d22, q11
-    vqmovun.s16         d23, q8
-    vdup.16             q0, d21[0]                  ; proload next 2 rows data
-    vdup.16             q8, d21[1]
-    vst1.64             {d2,d3}, [r0], r1
-    vst1.64             {d22,d23}, [r0], r1
-
-    vadd.s16            q1, q0, q2
-    vadd.s16            q0, q0, q3
-    vadd.s16            q11, q8, q2
-    vadd.s16            q8, q8, q3
-    vqmovun.s16         d2, q1
-    vqmovun.s16         d3, q0
-    vqmovun.s16         d22, q11
-    vqmovun.s16         d23, q8
-    vdup.16             q0, d21[2]                  ; proload next 2 rows data
-    vdup.16             q8, d21[3]
-    vst1.64             {d2,d3}, [r0], r1
-    vst1.64             {d22,d23}, [r0], r1
-
-
-    vadd.s16            q1, q0, q2
-    vadd.s16            q0, q0, q3
-    vadd.s16            q11, q8, q2
-    vadd.s16            q8, q8, q3
-    vqmovun.s16         d2, q1
-    vqmovun.s16         d3, q0
-    vqmovun.s16         d22, q11
-    vqmovun.s16         d23, q8
-    vld1.8              {d18}, [r3]!                  ; preload 8 left into r12
-    vmovl.u8            q10, d18
-    vst1.64             {d2,d3}, [r0], r1
-    vst1.64             {d22,d23}, [r0], r1
-
-    subs                r2, r2, #1
-    bgt                 loop_16x16_neon
-
-    bx                  lr
-    ENDP                ; |aom_tm_predictor_16x16_neon|
-
-;void aom_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
-;                                  const uint8_t *above,
-;                                  const uint8_t *left)
-; r0  uint8_t *dst
-; r1  ptrdiff_t y_stride
-; r2  const uint8_t *above
-; r3  const uint8_t *left
-
-|aom_tm_predictor_32x32_neon| PROC
-    ; Load ytop_left = above[-1];
-    sub                 r12, r2, #1
-    vld1.8              {d0[]}, [r12]
-
-    ; Load above 32 pixels
-    vld1.8              {q1}, [r2]!
-    vld1.8              {q2}, [r2]
-
-    ; preload 8 left pixels
-    vld1.8              {d26}, [r3]!
-
-    ; Compute above - ytop_left
-    vsubl.u8            q8, d2, d0
-    vsubl.u8            q9, d3, d0
-    vsubl.u8            q10, d4, d0
-    vsubl.u8            q11, d5, d0
-
-    vmovl.u8            q3, d26
-
-    ; Load left row by row and compute left + (above - ytop_left)
-    ; Process 8 rows in each single loop and loop 4 times to process 32 rows.
-    mov                 r2, #4
-
-loop_32x32_neon
-    ; Process two rows.
-    vdup.16             q0, d6[0]
-    vdup.16             q2, d6[1]
-    vadd.s16            q12, q0, q8
-    vadd.s16            q13, q0, q9
-    vadd.s16            q14, q0, q10
-    vadd.s16            q15, q0, q11
-    vqmovun.s16         d0, q12
-    vqmovun.s16         d1, q13
-    vadd.s16            q12, q2, q8
-    vadd.s16            q13, q2, q9
-    vqmovun.s16         d2, q14
-    vqmovun.s16         d3, q15
-    vadd.s16            q14, q2, q10
-    vadd.s16            q15, q2, q11
-    vst1.64             {d0-d3}, [r0], r1
-    vqmovun.s16         d24, q12
-    vqmovun.s16         d25, q13
-    vqmovun.s16         d26, q14
-    vqmovun.s16         d27, q15
-    vdup.16             q1, d6[2]
-    vdup.16             q2, d6[3]
-    vst1.64             {d24-d27}, [r0], r1
-
-    ; Process two rows.
-    vadd.s16            q12, q1, q8
-    vadd.s16            q13, q1, q9
-    vadd.s16            q14, q1, q10
-    vadd.s16            q15, q1, q11
-    vqmovun.s16         d0, q12
-    vqmovun.s16         d1, q13
-    vadd.s16            q12, q2, q8
-    vadd.s16            q13, q2, q9
-    vqmovun.s16         d2, q14
-    vqmovun.s16         d3, q15
-    vadd.s16            q14, q2, q10
-    vadd.s16            q15, q2, q11
-    vst1.64             {d0-d3}, [r0], r1
-    vqmovun.s16         d24, q12
-    vqmovun.s16         d25, q13
-    vqmovun.s16         d26, q14
-    vqmovun.s16         d27, q15
-    vdup.16             q0, d7[0]
-    vdup.16             q2, d7[1]
-    vst1.64             {d24-d27}, [r0], r1
-
-    ; Process two rows.
-    vadd.s16            q12, q0, q8
-    vadd.s16            q13, q0, q9
-    vadd.s16            q14, q0, q10
-    vadd.s16            q15, q0, q11
-    vqmovun.s16         d0, q12
-    vqmovun.s16         d1, q13
-    vadd.s16            q12, q2, q8
-    vadd.s16            q13, q2, q9
-    vqmovun.s16         d2, q14
-    vqmovun.s16         d3, q15
-    vadd.s16            q14, q2, q10
-    vadd.s16            q15, q2, q11
-    vst1.64             {d0-d3}, [r0], r1
-    vqmovun.s16         d24, q12
-    vqmovun.s16         d25, q13
-    vqmovun.s16         d26, q14
-    vqmovun.s16         d27, q15
-    vdup.16             q0, d7[2]
-    vdup.16             q2, d7[3]
-    vst1.64             {d24-d27}, [r0], r1
-
-    ; Process two rows.
-    vadd.s16            q12, q0, q8
-    vadd.s16            q13, q0, q9
-    vadd.s16            q14, q0, q10
-    vadd.s16            q15, q0, q11
-    vqmovun.s16         d0, q12
-    vqmovun.s16         d1, q13
-    vadd.s16            q12, q2, q8
-    vadd.s16            q13, q2, q9
-    vqmovun.s16         d2, q14
-    vqmovun.s16         d3, q15
-    vadd.s16            q14, q2, q10
-    vadd.s16            q15, q2, q11
-    vst1.64             {d0-d3}, [r0], r1
-    vqmovun.s16         d24, q12
-    vqmovun.s16         d25, q13
-    vld1.8              {d0}, [r3]!                   ; preload 8 left pixels
-    vqmovun.s16         d26, q14
-    vqmovun.s16         d27, q15
-    vmovl.u8            q3, d0
-    vst1.64             {d24-d27}, [r0], r1
-
-    subs                r2, r2, #1
-    bgt                 loop_32x32_neon
-
-    bx                  lr
-    ENDP                ; |aom_tm_predictor_32x32_neon|
-
-    END
diff --git a/third_party/aom/aom_dsp/binary_codes_reader.c b/third_party/aom/aom_dsp/binary_codes_reader.c
index bf304dada..4f38afbc5 100644
--- a/third_party/aom/aom_dsp/binary_codes_reader.c
+++ b/third_party/aom/aom_dsp/binary_codes_reader.c
@@ -53,6 +53,15 @@ uint16_t aom_read_primitive_quniform_(aom_reader *r,
   return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_STR_NAME);
 }
 
+static uint16_t aom_rb_read_primitive_quniform(struct aom_read_bit_buffer *rb,
+                                               uint16_t n) {
+  if (n <= 1) return 0;
+  const int l = get_msb(n - 1) + 1;
+  const int m = (1 << l) - n;
+  const int v = aom_rb_read_literal(rb, l - 1);
+  return v < m ? v : (v << 1) - m + aom_rb_read_bit(rb);
+}
+
 uint16_t aom_read_primitive_refbilevel_(aom_reader *r, uint16_t n, uint16_t p,
                                         uint16_t ref ACCT_STR_PARAM) {
   if (n <= 1) return 0;
@@ -101,15 +110,42 @@ uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n,
   return v;
 }
 
-// Decode finite subexponential code that for a symbol v in [0, n-1] with
-// parameter k
-// based on a reference ref also in [0, n-1].
+static uint16_t aom_rb_read_primitive_subexpfin(struct aom_read_bit_buffer *rb,
+                                                uint16_t n, uint16_t k) {
+  int i = 0;
+  int mk = 0;
+  uint16_t v;
+  while (1) {
+    int b = (i ? k + i - 1 : k);
+    int a = (1 << b);
+    if (n <= mk + 3 * a) {
+      v = aom_rb_read_primitive_quniform(rb, n - mk) + mk;
+      break;
+    } else {
+      if (aom_rb_read_bit(rb)) {
+        i = i + 1;
+        mk += a;
+      } else {
+        v = aom_rb_read_literal(rb, b) + mk;
+        break;
+      }
+    }
+  }
+  return v;
+}
+
 uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
                                           uint16_t ref ACCT_STR_PARAM) {
   return inv_recenter_finite_nonneg(
       n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME));
 }
 
+static uint16_t aom_rb_read_primitive_refsubexpfin(
+    struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, uint16_t ref) {
+  return inv_recenter_finite_nonneg(n, ref,
+                                    aom_rb_read_primitive_subexpfin(rb, n, k));
+}
+
 // Decode finite subexponential code that for a symbol v in [-(n-1), n-1] with
 // parameter k based on a reference ref also in [-(n-1), n-1].
 int16_t aom_read_signed_primitive_refsubexpfin_(aom_reader *r, uint16_t n,
@@ -120,3 +156,10 @@ int16_t aom_read_signed_primitive_refsubexpfin_(aom_reader *r, uint16_t n,
   return aom_read_primitive_refsubexpfin(r, scaled_n, k, ref, ACCT_STR_NAME) -
          n + 1;
 }
+
+int16_t aom_rb_read_signed_primitive_refsubexpfin(
+    struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, int16_t ref) {
+  ref += n - 1;
+  const uint16_t scaled_n = (n << 1) - 1;
+  return aom_rb_read_primitive_refsubexpfin(rb, scaled_n, k, ref) - n + 1;
+}
diff --git a/third_party/aom/aom_dsp/binary_codes_reader.h b/third_party/aom/aom_dsp/binary_codes_reader.h
index 1540cf46b..8885142c9 100644
--- a/third_party/aom/aom_dsp/binary_codes_reader.h
+++ b/third_party/aom/aom_dsp/binary_codes_reader.h
@@ -17,9 +17,11 @@ extern "C" {
 #endif
 
 #include <assert.h>
+
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/bitreader.h"
+#include "aom_dsp/bitreader_buffer.h"
 
 #define aom_read_primitive_symmetric(r, n, ACCT_STR_NAME) \
   aom_read_primitive_symmetric_(r, n ACCT_STR_ARG(ACCT_STR_NAME))
@@ -47,6 +49,9 @@ uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
 int16_t aom_read_signed_primitive_refsubexpfin_(aom_reader *r, uint16_t n,
                                                 uint16_t k,
                                                 int16_t ref ACCT_STR_PARAM);
+
+int16_t aom_rb_read_signed_primitive_refsubexpfin(
+    struct aom_read_bit_buffer *rb, uint16_t n, uint16_t k, int16_t ref);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/aom_dsp/binary_codes_writer.c b/third_party/aom/aom_dsp/binary_codes_writer.c
index 91e807b29..e092b6278 100644
--- a/third_party/aom/aom_dsp/binary_codes_writer.c
+++ b/third_party/aom/aom_dsp/binary_codes_writer.c
@@ -10,6 +10,7 @@
  */
 
 #include "aom_dsp/bitwriter.h"
+#include "aom_dsp/binary_codes_writer.h"
 
 #include "av1/common/common.h"
 
@@ -68,6 +69,19 @@ void aom_write_primitive_quniform(aom_writer *w, uint16_t n, uint16_t v) {
   }
 }
 
+static void aom_wb_write_primitive_quniform(struct aom_write_bit_buffer *wb,
+                                            uint16_t n, uint16_t v) {
+  if (n <= 1) return;
+  const int l = get_msb(n - 1) + 1;
+  const int m = (1 << l) - n;
+  if (v < m) {
+    aom_wb_write_literal(wb, v, l - 1);
+  } else {
+    aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1);
+    aom_wb_write_bit(wb, (v - m) & 1);
+  }
+}
+
 int aom_count_primitive_quniform(uint16_t n, uint16_t v) {
   if (n <= 1) return 0;
   const int l = get_msb(n - 1) + 1;
@@ -155,6 +169,31 @@ void aom_write_primitive_subexpfin(aom_writer *w, uint16_t n, uint16_t k,
   }
 }
 
+static void aom_wb_write_primitive_subexpfin(struct aom_write_bit_buffer *wb,
+                                             uint16_t n, uint16_t k,
+                                             uint16_t v) {
+  int i = 0;
+  int mk = 0;
+  while (1) {
+    int b = (i ? k + i - 1 : k);
+    int a = (1 << b);
+    if (n <= mk + 3 * a) {
+      aom_wb_write_primitive_quniform(wb, n - mk, v - mk);
+      break;
+    } else {
+      int t = (v >= mk + a);
+      aom_wb_write_bit(wb, t);
+      if (t) {
+        i = i + 1;
+        mk += a;
+      } else {
+        aom_wb_write_literal(wb, v - mk, b);
+        break;
+      }
+    }
+  }
+}
+
 int aom_count_primitive_subexpfin(uint16_t n, uint16_t k, uint16_t v) {
   int count = 0;
   int i = 0;
@@ -184,19 +223,34 @@ int aom_count_primitive_subexpfin(uint16_t n, uint16_t k, uint16_t v) {
 // based on a reference ref also in [0, n-1].
 // Recenters symbol around r first and then uses a finite subexponential code.
 void aom_write_primitive_refsubexpfin(aom_writer *w, uint16_t n, uint16_t k,
-                                      int16_t ref, int16_t v) {
+                                      uint16_t ref, uint16_t v) {
   aom_write_primitive_subexpfin(w, n, k, recenter_finite_nonneg(n, ref, v));
 }
 
+static void aom_wb_write_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
+                                                uint16_t n, uint16_t k,
+                                                uint16_t ref, uint16_t v) {
+  aom_wb_write_primitive_subexpfin(wb, n, k, recenter_finite_nonneg(n, ref, v));
+}
+
 void aom_write_signed_primitive_refsubexpfin(aom_writer *w, uint16_t n,
-                                             uint16_t k, uint16_t ref,
-                                             uint16_t v) {
+                                             uint16_t k, int16_t ref,
+                                             int16_t v) {
   ref += n - 1;
   v += n - 1;
   const uint16_t scaled_n = (n << 1) - 1;
   aom_write_primitive_refsubexpfin(w, scaled_n, k, ref, v);
 }
 
+void aom_wb_write_signed_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
+                                                uint16_t n, uint16_t k,
+                                                int16_t ref, int16_t v) {
+  ref += n - 1;
+  v += n - 1;
+  const uint16_t scaled_n = (n << 1) - 1;
+  aom_wb_write_primitive_refsubexpfin(wb, scaled_n, k, ref, v);
+}
+
 int aom_count_primitive_refsubexpfin(uint16_t n, uint16_t k, uint16_t ref,
                                      uint16_t v) {
   return aom_count_primitive_subexpfin(n, k, recenter_finite_nonneg(n, ref, v));
diff --git a/third_party/aom/aom_dsp/binary_codes_writer.h b/third_party/aom/aom_dsp/binary_codes_writer.h
index ab5ccbf15..18ad5078f 100644
--- a/third_party/aom/aom_dsp/binary_codes_writer.h
+++ b/third_party/aom/aom_dsp/binary_codes_writer.h
@@ -20,6 +20,7 @@ extern "C" {
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/bitwriter.h"
+#include "aom_dsp/bitwriter_buffer.h"
 
 // Codes a symbol v in [-2^mag_bits, 2^mag_bits]
 // mag_bits is number of bits for magnitude. The alphabet is of size
@@ -53,6 +54,10 @@ void aom_write_signed_primitive_refsubexpfin(aom_writer *w, uint16_t n,
                                              uint16_t k, int16_t ref,
                                              int16_t v);
 
+void aom_wb_write_signed_primitive_refsubexpfin(struct aom_write_bit_buffer *wb,
+                                                uint16_t n, uint16_t k,
+                                                int16_t ref, int16_t v);
+
 // Functions that counts bits for the above primitives
 int aom_count_primitive_symmetric(int16_t v, unsigned int mag_bits);
 int aom_count_primitive_quniform(uint16_t n, uint16_t v);
diff --git a/third_party/aom/aom_dsp/bitreader.h b/third_party/aom/aom_dsp/bitreader.h
index 88bedccc2..00424fa76 100644
--- a/third_party/aom/aom_dsp/bitreader.h
+++ b/third_party/aom/aom_dsp/bitreader.h
@@ -50,6 +50,11 @@
 #define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
   aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
 
+#if CONFIG_LV_MAP
+#define aom_read_bin(r, cdf, nsymbs, ACCT_STR_NAME) \
+  aom_read_bin_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -198,6 +203,16 @@ static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf,
   return ret;
 }
 
+#if CONFIG_LV_MAP
+static INLINE int aom_read_bin_(aom_reader *r, aom_cdf_prob *cdf,
+                                int nsymbs ACCT_STR_PARAM) {
+  int ret;
+  ret = aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME);
+  update_cdf(cdf, ret, nsymbs);
+  return ret;
+}
+#endif
+
 static INLINE int aom_read_tree_as_cdf(aom_reader *r,
                                        const aom_tree_index *tree,
                                        const aom_prob *probs) {
diff --git a/third_party/aom/aom_dsp/bitwriter.h b/third_party/aom/aom_dsp/bitwriter.h
index 68bc1c8f8..7d3b34306 100644
--- a/third_party/aom/aom_dsp/bitwriter.h
+++ b/third_party/aom/aom_dsp/bitwriter.h
@@ -62,9 +62,8 @@ static INLINE void init_token_stats(TOKEN_STATS *token_stats) {
 
 static INLINE void aom_start_encode(aom_writer *bc, uint8_t *buffer) {
 #if CONFIG_ANS
-  (void)bc;
-  (void)buffer;
-  assert(0 && "buf_ans requires a more complicated startup procedure");
+  aom_buf_ans_alloc(bc, /* error context*/ NULL);
+  buf_ans_write_init(bc, buffer);
 #else
   aom_daala_start_encode(bc, buffer);
 #endif
@@ -72,8 +71,8 @@ static INLINE void aom_start_encode(aom_writer *bc, uint8_t *buffer) {
 
 static INLINE void aom_stop_encode(aom_writer *bc) {
 #if CONFIG_ANS
-  (void)bc;
-  assert(0 && "buf_ans requires a more complicated shutdown procedure");
+  aom_buf_ans_flush(bc);
+  bc->pos = buf_ans_write_end(bc);
 #else
   aom_daala_stop_encode(bc);
 #endif
@@ -143,6 +142,14 @@ static INLINE void aom_write_symbol(aom_writer *w, int symb, aom_cdf_prob *cdf,
   update_cdf(cdf, symb, nsymbs);
 }
 
+#if CONFIG_LV_MAP
+static INLINE void aom_write_bin(aom_writer *w, int symb, aom_cdf_prob *cdf,
+                                 int nsymbs) {
+  aom_write_cdf(w, symb, cdf, nsymbs);
+  update_cdf(cdf, symb, nsymbs);
+}
+#endif
+
 static INLINE void aom_write_tree_as_cdf(aom_writer *w,
                                          const aom_tree_index *tree,
                                          const aom_prob *probs, int bits,
diff --git a/third_party/aom/aom_dsp/buf_ans.c b/third_party/aom/aom_dsp/buf_ans.c
index 8fe1ff763..f7703dffc 100644
--- a/third_party/aom/aom_dsp/buf_ans.c
+++ b/third_party/aom/aom_dsp/buf_ans.c
@@ -16,9 +16,8 @@
 #include "aom/internal/aom_codec_internal.h"
 
 void aom_buf_ans_alloc(struct BufAnsCoder *c,
-                       struct aom_internal_error_info *error, int size) {
+                       struct aom_internal_error_info *error) {
   c->error = error;
-  c->size = size;
   assert(c->size > 1);
   AOM_CHECK_MEM_ERROR(error, c->buf, aom_malloc(c->size * sizeof(*c->buf)));
   // Initialize to overfull to trigger the assert in write.
diff --git a/third_party/aom/aom_dsp/buf_ans.h b/third_party/aom/aom_dsp/buf_ans.h
index 0768506b3..f84ff3aed 100644
--- a/third_party/aom/aom_dsp/buf_ans.h
+++ b/third_party/aom/aom_dsp/buf_ans.h
@@ -46,6 +46,7 @@ struct BufAnsCoder {
 #if ANS_MAX_SYMBOLS
   int window_size;
 #endif
+  int pos;  // Dummy variable to store the output buffer after closing
 };
 
 // Allocate a buffered ANS coder to store size symbols.
@@ -54,7 +55,7 @@ struct BufAnsCoder {
 // When ANS_MAX_SYMBOLS is turned off, size is merely an initial hint and the
 // buffer will grow on demand
 void aom_buf_ans_alloc(struct BufAnsCoder *c,
-                       struct aom_internal_error_info *error, int hint);
+                       struct aom_internal_error_info *error);
 
 void aom_buf_ans_free(struct BufAnsCoder *c);
 
diff --git a/third_party/aom/aom_dsp/daalaboolreader.c b/third_party/aom/aom_dsp/daalaboolreader.c
index 0fc7b14a5..c6e3ac82d 100644
--- a/third_party/aom/aom_dsp/daalaboolreader.c
+++ b/third_party/aom/aom_dsp/daalaboolreader.c
@@ -17,7 +17,7 @@ int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size) {
   }
   r->buffer_end = buffer + size;
   r->buffer = buffer;
-  od_ec_dec_init(&r->ec, buffer, size - 1);
+  od_ec_dec_init(&r->ec, buffer, size);
 #if CONFIG_ACCOUNTING
   r->accounting = NULL;
 #endif
diff --git a/third_party/aom/aom_dsp/daalaboolreader.h b/third_party/aom/aom_dsp/daalaboolreader.h
index 428d74db0..55ff8d3d5 100644
--- a/third_party/aom/aom_dsp/daalaboolreader.h
+++ b/third_party/aom/aom_dsp/daalaboolreader.h
@@ -45,11 +45,7 @@ uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
 
 static INLINE int aom_daala_read(daala_reader *r, int prob) {
   int bit;
-#if CONFIG_EC_SMALLMUL
   int p = (0x7FFFFF - (prob << 15) + prob) >> 8;
-#else
-  int p = ((prob << 15) + 256 - prob) >> 8;
-#endif
 #if CONFIG_BITSTREAM_DEBUG
 /*{
   const int queue_r = bitstream_queue_get_read();
@@ -113,6 +109,7 @@ static INLINE int aom_daala_reader_has_error(daala_reader *r) {
 static INLINE int daala_read_symbol(daala_reader *r, const aom_cdf_prob *cdf,
                                     int nsymbs) {
   int symb;
+  assert(cdf != NULL);
   symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
 
 #if CONFIG_BITSTREAM_DEBUG
diff --git a/third_party/aom/aom_dsp/daalaboolwriter.c b/third_party/aom/aom_dsp/daalaboolwriter.c
index 0ba8f6ab8..59af2a243 100644
--- a/third_party/aom/aom_dsp/daalaboolwriter.c
+++ b/third_party/aom/aom_dsp/daalaboolwriter.c
@@ -24,9 +24,5 @@ void aom_daala_stop_encode(daala_writer *br) {
   daala_data = od_ec_enc_done(&br->ec, &daala_bytes);
   memcpy(br->buffer, daala_data, daala_bytes);
   br->pos = daala_bytes;
-  /* Prevent ec bitstream from being detected as a superframe marker.
-     Must always be added, so that rawbits knows the exact length of the
-      bitstream. */
-  br->buffer[br->pos++] = 0;
   od_ec_enc_clear(&br->ec);
 }
diff --git a/third_party/aom/aom_dsp/daalaboolwriter.h b/third_party/aom/aom_dsp/daalaboolwriter.h
index bbaf53c69..6ec0f0b54 100644
--- a/third_party/aom/aom_dsp/daalaboolwriter.h
+++ b/third_party/aom/aom_dsp/daalaboolwriter.h
@@ -36,11 +36,7 @@ void aom_daala_start_encode(daala_writer *w, uint8_t *buffer);
 void aom_daala_stop_encode(daala_writer *w);
 
 static INLINE void aom_daala_write(daala_writer *w, int bit, int prob) {
-#if CONFIG_EC_SMALLMUL
   int p = (0x7FFFFF - (prob << 15) + prob) >> 8;
-#else
-  int p = ((prob << 15) + 256 - prob) >> 8;
-#endif
 #if CONFIG_BITSTREAM_DEBUG
   aom_cdf_prob cdf[2] = { (aom_cdf_prob)p, 32767 };
   /*int queue_r = 0;
diff --git a/third_party/aom/aom_dsp/entcode.h b/third_party/aom/aom_dsp/entcode.h
index 534959e66..981a951e6 100644
--- a/third_party/aom/aom_dsp/entcode.h
+++ b/third_party/aom/aom_dsp/entcode.h
@@ -28,15 +28,11 @@ typedef uint32_t od_ec_window;
    3 => 1/8th bits.*/
 #define OD_BITRES (3)
 
-/*With CONFIG_EC_SMALLMUL, the value stored in a CDF is 32768 minus the actual
-   Q15 cumulative probability (an "inverse" CDF).
+/*The value stored in an iCDF is 32768 minus the actual Q15 cumulative
+   probability (an "inverse" CDF).
   This function converts from one representation to the other (and is its own
    inverse).*/
-#if CONFIG_EC_SMALLMUL
 #define OD_ICDF(x) (32768U - (x))
-#else
-#define OD_ICDF(x) (x)
-#endif
 
 /*See entcode.c for further documentation.*/
 
diff --git a/third_party/aom/aom_dsp/entdec.c b/third_party/aom/aom_dsp/entdec.c
index 49b176cd8..71dad0df6 100644
--- a/third_party/aom/aom_dsp/entdec.c
+++ b/third_party/aom/aom_dsp/entdec.c
@@ -114,12 +114,8 @@ static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng,
   OD_ASSERT(rng <= 65535U);
   d = 16 - OD_ILOG_NZ(rng);
   dec->cnt -= d;
-#if CONFIG_EC_SMALLMUL
   /*This is equivalent to shifting in 1's instead of 0's.*/
   dec->dif = ((dif + 1) << d) - 1;
-#else
-  dec->dif = dif << d;
-#endif
   dec->rng = rng << d;
   if (dec->cnt < 0) od_ec_dec_refill(dec);
   return ret;
@@ -137,11 +133,7 @@ void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
   dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
   dec->end = buf + storage;
   dec->bptr = buf;
-#if CONFIG_EC_SMALLMUL
   dec->dif = ((od_ec_window)1 << (OD_EC_WINDOW_SIZE - 1)) - 1;
-#else
-  dec->dif = 0;
-#endif
   dec->rng = 0x8000;
   dec->cnt = -15;
   dec->error = 0;
@@ -149,8 +141,7 @@ void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
 }
 
 /*Decode a single binary value.
-  {EC_SMALLMUL} f: The probability that the bit is one, scaled by 32768.
-  {else} f: The probability that the bit is zero, scaled by 32768.
+  f: The probability that the bit is one, scaled by 32768.
   Return: The value decoded (0 or 1).*/
 int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) {
   od_ec_window dif;
@@ -165,7 +156,6 @@ int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) {
   r = dec->rng;
   OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
   OD_ASSERT(32768U <= r);
-#if CONFIG_EC_SMALLMUL
   v = (r >> 8) * (uint32_t)f >> 7;
   vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
   ret = 1;
@@ -175,30 +165,19 @@ int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) {
     dif -= vw;
     ret = 0;
   }
-#else
-  v = f * (uint32_t)r >> 15;
-  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
-  ret = 0;
-  r_new = v;
-  if (dif >= vw) {
-    r_new = r - v;
-    dif -= vw;
-    ret = 1;
-  }
-#endif
   return od_ec_dec_normalize(dec, dif, r_new, ret);
 }
 
-/*Decodes a symbol given a cumulative distribution function (CDF) table in Q15.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-increasing, and cdf[nsyms - 1]
-        must be 32768.
-       {EC_SMALLMUL}: The CDF contains 32768 minus those values.
+/*Decodes a symbol given an inverse cumulative distribution function (CDF)
+   table in Q15.
+  icdf: 32768 minus the CDF, such that symbol s falls in the range
+         [s > 0 ? (32768 - icdf[s - 1]) : 0, 32768 - icdf[s]).
+        The values must be monotonically non-increasing, and icdf[nsyms - 1]
+         must be 0.
   nsyms: The number of symbols in the alphabet.
          This should be at most 16.
   Return: The decoded symbol s.*/
-int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
+int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *icdf, int nsyms) {
   od_ec_window dif;
   unsigned r;
   unsigned c;
@@ -209,33 +188,19 @@ int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
   dif = dec->dif;
   r = dec->rng;
   OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
-  OD_ASSERT(cdf[nsyms - 1] == OD_ICDF(32768U));
+  OD_ASSERT(icdf[nsyms - 1] == OD_ICDF(32768U));
   OD_ASSERT(32768U <= r);
-#if CONFIG_EC_SMALLMUL
   c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
   v = r;
   ret = -1;
   do {
     u = v;
-    v = (r >> 8) * (uint32_t)cdf[++ret] >> 7;
+    v = (r >> 8) * (uint32_t)icdf[++ret] >> 7;
   } while (c < v);
   OD_ASSERT(v < u);
   OD_ASSERT(u <= r);
   r = u - v;
   dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
-#else
-  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
-  v = 0;
-  ret = -1;
-  do {
-    u = v;
-    v = cdf[++ret] * (uint32_t)r >> 15;
-  } while (v <= c);
-  OD_ASSERT(u < v);
-  OD_ASSERT(v <= r);
-  r = v - u;
-  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
-#endif
   return od_ec_dec_normalize(dec, dif, r, ret);
 }
 
diff --git a/third_party/aom/aom_dsp/entdec.h b/third_party/aom/aom_dsp/entdec.h
index e1145e81d..35ac7fe0d 100644
--- a/third_party/aom/aom_dsp/entdec.h
+++ b/third_party/aom/aom_dsp/entdec.h
@@ -47,10 +47,8 @@ struct od_ec_dec {
   const unsigned char *end;
   /*The read pointer for the entropy-coded bits.*/
   const unsigned char *bptr;
-  /*The difference between the coded value and the low end of the current
-     range.
-    {EC_SMALLMUL} The difference between the high end of the current range,
-     (low + rng), and the coded value, minus 1.
+  /*The difference between the high end of the current range, (low + rng), and
+     the coded value, minus 1.
     This stores up to OD_EC_WINDOW_SIZE bits of that difference, but the
      decoder only uses the top 16 bits of the window to decode the next symbol.
     As we shift up during renormalization, if we don't have enough bits left in
diff --git a/third_party/aom/aom_dsp/entenc.c b/third_party/aom/aom_dsp/entenc.c
index a350f27f4..b8c4dc047 100644
--- a/third_party/aom/aom_dsp/entenc.c
+++ b/third_party/aom/aom_dsp/entenc.c
@@ -143,11 +143,10 @@ void od_ec_enc_clear(od_ec_enc *enc) {
 }
 
 /*Encodes a symbol given its frequency in Q15.
-  fl: The cumulative frequency of all symbols that come before the one to be
-       encoded.
-  fh: The cumulative frequency of all symbols up to and including the one to
-       be encoded.
-  {EC_SMALLMUL} Both values are 32768 minus that.*/
+  fl: 32768 minus the cumulative frequency of all symbols that come before the
+       one to be encoded.
+  fh: 32768 minus the cumulative frequency of all symbols up to and including
+       the one to be encoded.*/
 static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
   od_ec_window l;
   unsigned r;
@@ -156,7 +155,6 @@ static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
   l = enc->low;
   r = enc->rng;
   OD_ASSERT(32768U <= r);
-#if CONFIG_EC_SMALLMUL
   OD_ASSERT(fh < fl);
   OD_ASSERT(fl <= 32768U);
   if (fl < 32768U) {
@@ -167,14 +165,6 @@ static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
   } else {
     r -= (r >> 8) * (uint32_t)fh >> 7;
   }
-#else
-  OD_ASSERT(fl < fh);
-  OD_ASSERT(fh <= 32768U);
-  u = fl * (uint32_t)r >> 15;
-  v = fh * (uint32_t)r >> 15;
-  r = v - u;
-  l += u;
-#endif
   od_ec_enc_normalize(enc, l, r);
 #if OD_MEASURE_EC_OVERHEAD
   enc->entropy -= OD_LOG2((double)(OD_ICDF(fh) - OD_ICDF(fl)) / 32768.);
@@ -184,8 +174,7 @@ static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
 
 /*Encode a single binary value.
   val: The value to encode (0 or 1).
-  {EC_SMALLMUL} f: The probability that the val is one, scaled by 32768.
-  {else} f: The probability that val is zero, scaled by 32768.*/
+  f: The probability that the val is one, scaled by 32768.*/
 void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f) {
   od_ec_window l;
   unsigned r;
@@ -195,15 +184,9 @@ void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f) {
   l = enc->low;
   r = enc->rng;
   OD_ASSERT(32768U <= r);
-#if CONFIG_EC_SMALLMUL
   v = (r >> 8) * (uint32_t)f >> 7;
   if (val) l += r - v;
   r = val ? v : r - v;
-#else
-  v = f * (uint32_t)r >> 15;
-  if (val) l += v;
-  r = val ? r - v : v;
-#endif
   od_ec_enc_normalize(enc, l, r);
 #if OD_MEASURE_EC_OVERHEAD
   enc->entropy -=
@@ -214,19 +197,19 @@ void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned f) {
 
 /*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
   s: The index of the symbol to encode.
-  cdf: The CDF, such that symbol s falls in the range
-        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
-       The values must be monotonically non-decreasing, and the last value
-        must be exactly 32768.
+  icdf: 32768 minus the CDF, such that symbol s falls in the range
+         [s > 0 ? (32768 - icdf[s - 1]) : 0, 32768 - icdf[s]).
+        The values must be monotonically decreasing, and icdf[nsyms - 1] must
+         be 0.
   nsyms: The number of symbols in the alphabet.
          This should be at most 16.*/
-void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf,
+void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *icdf,
                           int nsyms) {
   (void)nsyms;
   OD_ASSERT(s >= 0);
   OD_ASSERT(s < nsyms);
-  OD_ASSERT(cdf[nsyms - 1] == OD_ICDF(32768U));
-  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : OD_ICDF(0), cdf[s]);
+  OD_ASSERT(icdf[nsyms - 1] == OD_ICDF(32768U));
+  od_ec_encode_q15(enc, s > 0 ? icdf[s - 1] : OD_ICDF(0), icdf[s]);
 }
 
 #if CONFIG_RAWBITS
diff --git a/third_party/aom/aom_dsp/intrapred.c b/third_party/aom/aom_dsp/intrapred.c
index b4d47ae89..6d2ac37d9 100644
--- a/third_party/aom/aom_dsp/intrapred.c
+++ b/third_party/aom/aom_dsp/intrapred.c
@@ -16,6 +16,7 @@
 #include "./aom_dsp_rtcd.h"
 
 #include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/intrapred_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/bitops.h"
 
@@ -179,7 +180,6 @@ static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
   }
 }
 
-#if CONFIG_ALT_INTRA
 static INLINE int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
 
 static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
@@ -208,40 +208,6 @@ static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
   }
 }
 
-// Weights are quadratic from '1' to '1 / block_size', scaled by
-// 2^sm_weight_log2_scale.
-static const int sm_weight_log2_scale = 8;
-
-#if CONFIG_TX64X64
-// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
-#define MAX_BLOCK_DIM 64
-#else
-#define MAX_BLOCK_DIM 32
-#endif  // CONFIG_TX64X64
-
-static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
-  // Unused, because we always offset by bs, which is at least 2.
-  0, 0,
-  // bs = 2
-  255, 128,
-  // bs = 4
-  255, 149, 85, 64,
-  // bs = 8
-  255, 197, 146, 105, 73, 50, 37, 32,
-  // bs = 16
-  255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
-  // bs = 32
-  255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
-  66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
-#if CONFIG_TX64X64
-  // bs = 64
-  255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
-  150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
-  65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
-  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
-#endif  // CONFIG_TX64X64
-};
-
 // Some basic checks on weights for smooth predictor.
 #define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
                                  pred_scale)                          \
@@ -344,21 +310,6 @@ static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
 }
 #endif  // CONFIG_SMOOTH_HV
 
-#else
-
-static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
-                                const uint8_t *above, const uint8_t *left) {
-  int r, c;
-  int ytop_left = above[-1];
-
-  for (r = 0; r < bh; r++) {
-    for (c = 0; c < bw; c++)
-      dst[c] = clip_pixel(left[r] + above[c] - ytop_left);
-    dst += stride;
-  }
-}
-#endif  // CONFIG_ALT_INTRA
-
 static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
                                     int bh, const uint8_t *above,
                                     const uint8_t *left) {
@@ -794,7 +745,6 @@ void aom_highbd_d153_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
   DST(1, 1) = AVG3(J, I, X);
 }
 
-#if CONFIG_ALT_INTRA
 static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
                                           int bw, int bh, const uint16_t *above,
                                           const uint16_t *left, int bd) {
@@ -901,23 +851,7 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
     dst += stride;
   }
 }
-#endif
-
-#else
-static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
-                                       int bh, const uint16_t *above,
-                                       const uint16_t *left, int bd) {
-  int r, c;
-  int ytop_left = above[-1];
-  (void)bd;
-
-  for (r = 0; r < bh; r++) {
-    for (c = 0; c < bw; c++)
-      dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd);
-    dst += stride;
-  }
-}
-#endif  // CONFIG_ALT_INTRA
+#endif  // CONFIG_SMOOTH_HV
 
 static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
                                            int bw, int bh,
@@ -1017,12 +951,16 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
   intra_pred_sized(type, 16, 8) \
   intra_pred_sized(type, 16, 32) \
   intra_pred_sized(type, 32, 16) \
+  intra_pred_sized(type, 32, 64) \
+  intra_pred_sized(type, 64, 32) \
   intra_pred_highbd_sized(type, 4, 8) \
   intra_pred_highbd_sized(type, 8, 4) \
   intra_pred_highbd_sized(type, 8, 16) \
   intra_pred_highbd_sized(type, 16, 8) \
   intra_pred_highbd_sized(type, 16, 32) \
-  intra_pred_highbd_sized(type, 32, 16)
+  intra_pred_highbd_sized(type, 32, 16) \
+  intra_pred_highbd_sized(type, 32, 64) \
+  intra_pred_highbd_sized(type, 64, 32)
 #define intra_pred_above_4x4(type) \
   intra_pred_sized(type, 8, 8) \
   intra_pred_sized(type, 16, 16) \
@@ -1078,7 +1016,9 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
   intra_pred_sized(type, 8, 16) \
   intra_pred_sized(type, 16, 8) \
   intra_pred_sized(type, 16, 32) \
-  intra_pred_sized(type, 32, 16)
+  intra_pred_sized(type, 32, 16) \
+  intra_pred_sized(type, 32, 64) \
+  intra_pred_sized(type, 64, 32)
 #define intra_pred_above_4x4(type) \
   intra_pred_sized(type, 8, 8) \
   intra_pred_sized(type, 16, 16) \
@@ -1118,16 +1058,12 @@ intra_pred_above_4x4(d135)
 intra_pred_above_4x4(d153)
 intra_pred_allsizes(v)
 intra_pred_allsizes(h)
-#if CONFIG_ALT_INTRA
 intra_pred_allsizes(smooth)
 #if CONFIG_SMOOTH_HV
 intra_pred_allsizes(smooth_v)
 intra_pred_allsizes(smooth_h)
 #endif  // CONFIG_SMOOTH_HV
 intra_pred_allsizes(paeth)
-#else
-intra_pred_allsizes(tm)
-#endif  // CONFIG_ALT_INTRA
 intra_pred_allsizes(dc_128)
 intra_pred_allsizes(dc_left)
 intra_pred_allsizes(dc_top)
diff --git a/third_party/aom/aom_dsp/intrapred_common.h b/third_party/aom/aom_dsp/intrapred_common.h
new file mode 100644
index 000000000..96da49b03
--- /dev/null
+++ b/third_party/aom/aom_dsp/intrapred_common.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef _AOM_DSP_INTRAPRED_COMMON_H
+#define _AOM_DSP_INTRAPRED_COMMON_H
+
+#include "./aom_config.h"
+
+// Weights are quadratic from '1' to '1 / block_size', scaled by
+// 2^sm_weight_log2_scale.
+static const int sm_weight_log2_scale = 8;
+
+#if CONFIG_TX64X64
+// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
+#define MAX_BLOCK_DIM 64
+#else
+#define MAX_BLOCK_DIM 32
+#endif  // CONFIG_TX64X64
+
+static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
+  // Unused, because we always offset by bs, which is at least 2.
+  0, 0,
+  // bs = 2
+  255, 128,
+  // bs = 4
+  255, 149, 85, 64,
+  // bs = 8
+  255, 197, 146, 105, 73, 50, 37, 32,
+  // bs = 16
+  255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
+  // bs = 32
+  255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+  66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
+#if CONFIG_TX64X64
+  // bs = 64
+  255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+  150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
+  65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
+  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
+#endif  // CONFIG_TX64X64
+};
+
+#endif  // _AOM_DSP_INTRAPRED_COMMON_H
diff --git a/third_party/aom/aom_dsp/inv_txfm.c b/third_party/aom/aom_dsp/inv_txfm.c
index 398eb0a12..6b7c1c2ab 100644
--- a/third_party/aom/aom_dsp/inv_txfm.c
+++ b/third_party/aom/aom_dsp/inv_txfm.c
@@ -14,7 +14,8 @@
 
 #include "./aom_dsp_rtcd.h"
 #include "aom_dsp/inv_txfm.h"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
+    CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
 #include "av1/common/daala_tx.h"
 #endif
 
@@ -96,18 +97,6 @@ void aom_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
   }
 }
 
-#if CONFIG_DAALA_DCT4
-void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[4];
-  od_coeff y[4];
-  for (i = 0; i < 4; i++) y[i] = input[i];
-  od_bin_idct4(x, 1, y);
-  for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
-}
-
-#else
-
 void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
   tran_low_t step[4];
   tran_high_t temp1, temp2;
@@ -127,7 +116,6 @@ void aom_idct4_c(const tran_low_t *input, tran_low_t *output) {
   output[2] = WRAPLOW(step[1] - step[2]);
   output[3] = WRAPLOW(step[0] - step[3]);
 }
-#endif
 
 void aom_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
   tran_low_t out[4 * 4];
@@ -172,18 +160,6 @@ void aom_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
   }
 }
 
-#if CONFIG_DAALA_DCT8
-void aom_idct8_c(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[8];
-  od_coeff y[8];
-  for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
-  od_bin_idct8(x, 1, y);
-  for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
-}
-
-#else
-
 void aom_idct8_c(const tran_low_t *input, tran_low_t *output) {
   tran_low_t step1[8], step2[8];
   tran_high_t temp1, temp2;
@@ -237,7 +213,6 @@ void aom_idct8_c(const tran_low_t *input, tran_low_t *output) {
   output[6] = WRAPLOW(step1[1] - step1[6]);
   output[7] = WRAPLOW(step1[0] - step1[7]);
 }
-#endif
 
 void aom_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
   tran_low_t out[8 * 8];
@@ -313,18 +288,6 @@ void aom_iadst4_c(const tran_low_t *input, tran_low_t *output) {
   output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
 }
 
-#if CONFIG_DAALA_DCT8
-void aom_iadst8_c(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[8];
-  od_coeff y[8];
-  for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
-  od_bin_idst8(x, 1, y);
-  for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
-}
-
-#else
-
 void aom_iadst8_c(const tran_low_t *input, tran_low_t *output) {
   int s0, s1, s2, s3, s4, s5, s6, s7;
 
@@ -402,8 +365,6 @@ void aom_iadst8_c(const tran_low_t *input, tran_low_t *output) {
   output[7] = WRAPLOW(-x1);
 }
 
-#endif
-
 void aom_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
   tran_low_t out[8 * 8] = { 0 };
   tran_low_t *outptr = out;
@@ -1224,7 +1185,7 @@ void aom_idct32_c(const tran_low_t *input, tran_low_t *output) {
 
 #if CONFIG_MRC_TX
 void aom_imrc32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
-                              int stride, int *mask) {
+                              int stride, uint8_t *mask) {
   tran_low_t out[32 * 32];
   tran_low_t *outptr = out;
   int i, j;
@@ -1265,7 +1226,7 @@ void aom_imrc32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
 }
 
 void aom_imrc32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                             int *mask) {
+                             uint8_t *mask) {
   tran_low_t out[32 * 32] = { 0 };
   tran_low_t *outptr = out;
   int i, j;
@@ -1295,7 +1256,7 @@ void aom_imrc32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 }
 
 void aom_imrc32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                            int *mask) {
+                            uint8_t *mask) {
   tran_low_t out[32 * 32] = { 0 };
   tran_low_t *outptr = out;
   int i, j;
diff --git a/third_party/aom/aom_dsp/inv_txfm.h b/third_party/aom/aom_dsp/inv_txfm.h
index a9c485e74..644a6599f 100644
--- a/third_party/aom/aom_dsp/inv_txfm.h
+++ b/third_party/aom/aom_dsp/inv_txfm.h
@@ -55,19 +55,22 @@ static INLINE tran_high_t check_range(tran_high_t input, int bd) {
 #if CONFIG_MRC_TX
 // These each perform dct but add coefficients based on a mask
 void aom_imrc32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
-                              int stride, int *mask);
+                              int stride, uint8_t *mask);
 
 void aom_imrc32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                             int *mask);
+                             uint8_t *mask);
 
 void aom_imrc32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                            int *mask);
+                            uint8_t *mask);
 #endif  // CONFIG_MRC_TX
 
 void aom_idct4_c(const tran_low_t *input, tran_low_t *output);
 void aom_idct8_c(const tran_low_t *input, tran_low_t *output);
 void aom_idct16_c(const tran_low_t *input, tran_low_t *output);
 void aom_idct32_c(const tran_low_t *input, tran_low_t *output);
+#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
+void aom_idct64_c(const tran_low_t *input, tran_low_t *output);
+#endif
 void aom_iadst4_c(const tran_low_t *input, tran_low_t *output);
 void aom_iadst8_c(const tran_low_t *input, tran_low_t *output);
 void aom_iadst16_c(const tran_low_t *input, tran_low_t *output);
diff --git a/third_party/aom/aom_dsp/loopfilter.c b/third_party/aom/aom_dsp/loopfilter.c
index 7ea1e6b89..69f131378 100644
--- a/third_party/aom/aom_dsp/loopfilter.c
+++ b/third_party/aom/aom_dsp/loopfilter.c
@@ -23,6 +23,14 @@ static INLINE int8_t signed_char_clamp(int t) {
 #define PARALLEL_DEBLOCKING_11_TAP 0
 #define PARALLEL_DEBLOCKING_9_TAP 0
 
+#if CONFIG_DEBLOCK_13TAP
+#define PARALLEL_DEBLOCKING_13_TAP 1
+#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
+#else
+#define PARALLEL_DEBLOCKING_13_TAP 0
+#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
+#endif
+
 #if CONFIG_HIGHBITDEPTH
 static INLINE int16_t signed_char_clamp_high(int t, int bd) {
   switch (bd) {
@@ -58,6 +66,19 @@ static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3,
   return ~mask;
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+static INLINE int8_t flat_mask3_chroma(uint8_t thresh, uint8_t p2, uint8_t p1,
+                                       uint8_t p0, uint8_t q0, uint8_t q1,
+                                       uint8_t q2) {
+  int8_t mask = 0;
+  mask |= (abs(p1 - p0) > thresh) * -1;
+  mask |= (abs(q1 - q0) > thresh) * -1;
+  mask |= (abs(p2 - p0) > thresh) * -1;
+  mask |= (abs(q2 - q0) > thresh) * -1;
+  return ~mask;
+}
+#endif
+
 static INLINE int8_t flat_mask4(uint8_t thresh, uint8_t p3, uint8_t p2,
                                 uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1,
                                 uint8_t q2, uint8_t q3) {
@@ -216,6 +237,25 @@ void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
   aom_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+static INLINE void filter6(int8_t mask, uint8_t thresh, int8_t flat,
+                           uint8_t *op2, uint8_t *op1, uint8_t *op0,
+                           uint8_t *oq0, uint8_t *oq1, uint8_t *oq2) {
+  if (flat && mask) {
+    const uint8_t p2 = *op2, p1 = *op1, p0 = *op0;
+    const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2;
+
+    // 5-tap filter [1, 2, 2, 2, 1]
+    *op1 = ROUND_POWER_OF_TWO(p2 * 3 + p1 * 2 + p0 * 2 + q0, 3);
+    *op0 = ROUND_POWER_OF_TWO(p2 + p1 * 2 + p0 * 2 + q0 * 2 + q1, 3);
+    *oq0 = ROUND_POWER_OF_TWO(p1 + p0 * 2 + q0 * 2 + q1 * 2 + q2, 3);
+    *oq1 = ROUND_POWER_OF_TWO(p0 + q0 * 2 + q1 * 2 + q2 * 3, 3);
+  } else {
+    filter4(mask, thresh, op1, op0, oq0, oq1);
+  }
+}
+#endif
+
 static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
                            uint8_t *op3, uint8_t *op2, uint8_t *op1,
                            uint8_t *op0, uint8_t *oq0, uint8_t *oq1,
@@ -236,6 +276,32 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
   }
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
+                            const uint8_t *limit, const uint8_t *thresh) {
+  int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  int count = 4;
+#else
+  int count = 8;
+#endif
+
+  // loop filter designed to work using chars so that we can make maximum use
+  // of 8 bit simd instructions.
+  for (i = 0; i < count; ++i) {
+    const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
+    const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
+
+    const int8_t mask =
+        filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
+    const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
+    filter6(mask, *thresh, flat, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
+            s + 2 * p);
+    ++s;
+  }
+}
+#endif
+
 void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
                             const uint8_t *limit, const uint8_t *thresh) {
   int i;
@@ -268,6 +334,28 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
   aom_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
+                          const uint8_t *limit, const uint8_t *thresh) {
+  int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  int count = 4;
+#else
+  int count = 8;
+#endif
+
+  for (i = 0; i < count; ++i) {
+    const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
+    const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
+    const int8_t mask =
+        filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
+    const int8_t flat = flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2);
+    filter6(mask, *thresh, flat, s - 3, s - 2, s - 1, s, s + 1, s + 2);
+    s += pitch;
+  }
+}
+#endif
+
 void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh) {
   int i;
@@ -297,6 +385,56 @@ void aom_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
   aom_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+#if PARALLEL_DEBLOCKING_13_TAP
+static INLINE void filter14(int8_t mask, uint8_t thresh, int8_t flat,
+                            int8_t flat2, uint8_t *op6, uint8_t *op5,
+                            uint8_t *op4, uint8_t *op3, uint8_t *op2,
+                            uint8_t *op1, uint8_t *op0, uint8_t *oq0,
+                            uint8_t *oq1, uint8_t *oq2, uint8_t *oq3,
+                            uint8_t *oq4, uint8_t *oq5, uint8_t *oq6) {
+  if (flat2 && flat && mask) {
+    const uint8_t p6 = *op6, p5 = *op5, p4 = *op4, p3 = *op3, p2 = *op2,
+                  p1 = *op1, p0 = *op0;
+    const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, q4 = *oq4,
+                  q5 = *oq5, q6 = *oq6;
+
+    // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
+    *op5 = ROUND_POWER_OF_TWO(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0,
+                              4);
+    *op4 = ROUND_POWER_OF_TWO(
+        p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 + p1 + p0 + q0 + q1, 4);
+    *op3 = ROUND_POWER_OF_TWO(
+        p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 + p1 + p0 + q0 + q1 + q2, 4);
+    *op2 = ROUND_POWER_OF_TWO(
+        p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 + p1 * 2 + p0 + q0 + q1 + q2 + q3,
+        4);
+    *op1 = ROUND_POWER_OF_TWO(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 + p0 * 2 +
+                                  q0 + q1 + q2 + q3 + q4,
+                              4);
+    *op0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
+                                  q0 * 2 + q1 + q2 + q3 + q4 + q5,
+                              4);
+    *oq0 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
+                                  q1 * 2 + q2 + q3 + q4 + q5 + q6,
+                              4);
+    *oq1 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
+                                  q2 * 2 + q3 + q4 + q5 + q6 * 2,
+                              4);
+    *oq2 = ROUND_POWER_OF_TWO(
+        p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 + q3 * 2 + q4 + q5 + q6 * 3,
+        4);
+    *oq3 = ROUND_POWER_OF_TWO(
+        p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 + q4 * 2 + q5 + q6 * 4, 4);
+    *oq4 = ROUND_POWER_OF_TWO(
+        p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 + q5 * 2 + q6 * 5, 4);
+    *oq5 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7,
+                              4);
+  } else {
+    filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
+  }
+}
+#endif
+
 #if PARALLEL_DEBLOCKING_11_TAP
 static INLINE void filter12(int8_t mask, uint8_t thresh, int8_t flat,
                             int8_t flat2, uint8_t *op5, uint8_t *op4,
@@ -428,7 +566,16 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
         filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
     const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
 
-#if PARALLEL_DEBLOCKING_11_TAP
+#if PARALLEL_DEBLOCKING_13_TAP
+    (void)p7;
+    (void)q7;
+    const int8_t flat2 = flat_mask4(1, p6, p5, p4, p0, q0, q4, q5, q6);
+
+    filter14(mask, *thresh, flat, flat2, s - 7 * p, s - 6 * p, s - 5 * p,
+             s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
+             s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, s + 6 * p);
+
+#elif PARALLEL_DEBLOCKING_11_TAP
     const int8_t flat2 = flat_mask3(1, p5, p4, p0, q0, q4, q5);
 
     filter12(mask, *thresh, flat, flat2, s - 6 * p, s - 5 * p, s - 4 * p,
@@ -482,7 +629,14 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
         filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3);
     const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
 
-#if PARALLEL_DEBLOCKING_11_TAP
+#if PARALLEL_DEBLOCKING_13_TAP
+    (void)p7;
+    (void)q7;
+    const int8_t flat2 = flat_mask4(1, p6, p5, p4, p0, q0, q4, q5, q6);
+
+    filter14(mask, *thresh, flat, flat2, s - 7, s - 6, s - 5, s - 4, s - 3,
+             s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6);
+#elif PARALLEL_DEBLOCKING_11_TAP
     const int8_t flat2 = flat_mask3(1, p5, p4, p0, q0, q4, q5);
 
     filter12(mask, *thresh, flat, flat2, s - 6, s - 5, s - 4, s - 3, s - 2,
@@ -553,6 +707,21 @@ static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit,
   return ~mask;
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+static INLINE int8_t highbd_flat_mask3_chroma(uint8_t thresh, uint16_t p2,
+                                              uint16_t p1, uint16_t p0,
+                                              uint16_t q0, uint16_t q1,
+                                              uint16_t q2, int bd) {
+  int8_t mask = 0;
+  int16_t thresh16 = (uint16_t)thresh << (bd - 8);
+  mask |= (abs(p1 - p0) > thresh16) * -1;
+  mask |= (abs(q1 - q0) > thresh16) * -1;
+  mask |= (abs(p2 - p0) > thresh16) * -1;
+  mask |= (abs(q2 - q0) > thresh16) * -1;
+  return ~mask;
+}
+#endif
+
 static INLINE int8_t highbd_flat_mask4(uint8_t thresh, uint16_t p3, uint16_t p2,
                                        uint16_t p1, uint16_t p0, uint16_t q0,
                                        uint16_t q1, uint16_t q2, uint16_t q3,
@@ -708,6 +877,26 @@ void aom_highbd_lpf_vertical_4_dual_c(
                               bd);
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+static INLINE void highbd_filter6(int8_t mask, uint8_t thresh, int8_t flat,
+                                  uint16_t *op2, uint16_t *op1, uint16_t *op0,
+                                  uint16_t *oq0, uint16_t *oq1, uint16_t *oq2,
+                                  int bd) {
+  if (flat && mask) {
+    const uint16_t p2 = *op2, p1 = *op1, p0 = *op0;
+    const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2;
+
+    // 5-tap filter [1, 2, 2, 2, 1]
+    *op1 = ROUND_POWER_OF_TWO(p2 * 3 + p1 * 2 + p0 * 2 + q0, 3);
+    *op0 = ROUND_POWER_OF_TWO(p2 + p1 * 2 + p0 * 2 + q0 * 2 + q1, 3);
+    *oq0 = ROUND_POWER_OF_TWO(p1 + p0 * 2 + q0 * 2 + q1 * 2 + q2, 3);
+    *oq1 = ROUND_POWER_OF_TWO(p0 + q0 * 2 + q1 * 2 + q2 * 3, 3);
+  } else {
+    highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd);
+  }
+}
+#endif
+
 static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, int8_t flat,
                                   uint16_t *op3, uint16_t *op2, uint16_t *op1,
                                   uint16_t *op0, uint16_t *oq0, uint16_t *oq1,
@@ -754,6 +943,33 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
   }
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh,
+                                   int bd) {
+  int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  int count = 4;
+#else
+  int count = 8;
+#endif
+
+  // loop filter designed to work using chars so that we can make maximum use
+  // of 8 bit simd instructions.
+  for (i = 0; i < count; ++i) {
+    const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
+    const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
+
+    const int8_t mask =
+        highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
+    const int8_t flat = highbd_flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2, bd);
+    highbd_filter6(mask, *thresh, flat, s - 3 * p, s - 2 * p, s - 1 * p, s,
+                   s + 1 * p, s + 2 * p, bd);
+    ++s;
+  }
+}
+#endif
+
 void aom_highbd_lpf_horizontal_8_dual_c(
     uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
     const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
@@ -762,6 +978,30 @@ void aom_highbd_lpf_horizontal_8_dual_c(
   aom_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
 }
 
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit,
+                                 const uint8_t *limit, const uint8_t *thresh,
+                                 int bd) {
+  int i;
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  int count = 4;
+#else
+  int count = 8;
+#endif
+
+  for (i = 0; i < count; ++i) {
+    const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
+    const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
+    const int8_t mask =
+        highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
+    const int8_t flat = highbd_flat_mask3_chroma(1, p2, p1, p0, q0, q1, q2, bd);
+    highbd_filter6(mask, *thresh, flat, s - 3, s - 2, s - 1, s, s + 1, s + 2,
+                   bd);
+    s += pitch;
+  }
+}
+#endif
+
 void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
                                  const uint8_t *limit, const uint8_t *thresh,
                                  int bd) {
@@ -794,6 +1034,68 @@ void aom_highbd_lpf_vertical_8_dual_c(
                               bd);
 }
 
+#if PARALLEL_DEBLOCKING_13_TAP
+static INLINE void highbd_filter14(int8_t mask, uint8_t thresh, int8_t flat,
+                                   int8_t flat2, uint16_t *op6, uint16_t *op5,
+                                   uint16_t *op4, uint16_t *op3, uint16_t *op2,
+                                   uint16_t *op1, uint16_t *op0, uint16_t *oq0,
+                                   uint16_t *oq1, uint16_t *oq2, uint16_t *oq3,
+                                   uint16_t *oq4, uint16_t *oq5, uint16_t *oq6,
+                                   int bd) {
+  if (flat2 && flat && mask) {
+    const uint16_t p6 = *op6;
+    const uint16_t p5 = *op5;
+    const uint16_t p4 = *op4;
+    const uint16_t p3 = *op3;
+    const uint16_t p2 = *op2;
+    const uint16_t p1 = *op1;
+    const uint16_t p0 = *op0;
+    const uint16_t q0 = *oq0;
+    const uint16_t q1 = *oq1;
+    const uint16_t q2 = *oq2;
+    const uint16_t q3 = *oq3;
+    const uint16_t q4 = *oq4;
+    const uint16_t q5 = *oq5;
+    const uint16_t q6 = *oq6;
+
+    // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
+    *op5 = ROUND_POWER_OF_TWO(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0,
+                              4);
+    *op4 = ROUND_POWER_OF_TWO(
+        p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 + p1 + p0 + q0 + q1, 4);
+    *op3 = ROUND_POWER_OF_TWO(
+        p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 + p1 + p0 + q0 + q1 + q2, 4);
+    *op2 = ROUND_POWER_OF_TWO(
+        p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 + p1 * 2 + p0 + q0 + q1 + q2 + q3,
+        4);
+    *op1 = ROUND_POWER_OF_TWO(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 + p0 * 2 +
+                                  q0 + q1 + q2 + q3 + q4,
+                              4);
+    *op0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
+                                  q0 * 2 + q1 + q2 + q3 + q4 + q5,
+                              4);
+    *oq0 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
+                                  q1 * 2 + q2 + q3 + q4 + q5 + q6,
+                              4);
+    *oq1 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
+                                  q2 * 2 + q3 + q4 + q5 + q6 * 2,
+                              4);
+    *oq2 = ROUND_POWER_OF_TWO(
+        p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 + q3 * 2 + q4 + q5 + q6 * 3,
+        4);
+    *oq3 = ROUND_POWER_OF_TWO(
+        p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 + q4 * 2 + q5 + q6 * 4, 4);
+    *oq4 = ROUND_POWER_OF_TWO(
+        p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 + q5 * 2 + q6 * 5, 4);
+    *oq5 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7,
+                              4);
+  } else {
+    highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3,
+                   bd);
+  }
+}
+#endif
+
 static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, int8_t flat,
                                    int8_t flat2, uint16_t *op7, uint16_t *op6,
                                    uint16_t *op5, uint16_t *op4, uint16_t *op3,
@@ -887,6 +1189,16 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
         highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
     const int8_t flat =
         highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
+
+#if PARALLEL_DEBLOCKING_13_TAP
+    const int8_t flat2 =
+        highbd_flat_mask4(1, s[-7 * p], s[-6 * p], s[-5 * p], p0, q0, s[4 * p],
+                          s[5 * p], s[6 * p], bd);
+
+    highbd_filter14(mask, *thresh, flat, flat2, s - 7 * p, s - 6 * p, s - 5 * p,
+                    s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p,
+                    s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p, s + 6 * p, bd);
+#else
     const int8_t flat2 =
         highbd_flat_mask5(1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, q0,
                           s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd);
@@ -895,6 +1207,7 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
                     s - 5 * p, s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, s,
                     s + 1 * p, s + 2 * p, s + 3 * p, s + 4 * p, s + 5 * p,
                     s + 6 * p, s + 7 * p, bd);
+#endif
     ++s;
   }
 }
@@ -937,12 +1250,21 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
         highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd);
     const int8_t flat =
         highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd);
+#if PARALLEL_DEBLOCKING_13_TAP
+    const int8_t flat2 =
+        highbd_flat_mask4(1, s[-7], s[-6], s[-5], p0, q0, s[4], s[5], s[6], bd);
+
+    highbd_filter14(mask, *thresh, flat, flat2, s - 7, s - 6, s - 5, s - 4,
+                    s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5,
+                    s + 6, bd);
+#else
     const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0,
                                            q0, s[4], s[5], s[6], s[7], bd);
 
     highbd_filter16(mask, *thresh, flat, flat2, s - 8, s - 7, s - 6, s - 5,
                     s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4,
                     s + 5, s + 6, s + 7, bd);
+#endif
     s += p;
   }
 }
diff --git a/third_party/aom/aom_dsp/mips/convolve8_avg_dspr2.c b/third_party/aom/aom_dsp/mips/convolve8_avg_dspr2.c
index 298065adb..3574da19f 100644
--- a/third_party/aom/aom_dsp/mips/convolve8_avg_dspr2.c
+++ b/third_party/aom/aom_dsp/mips/convolve8_avg_dspr2.c
@@ -407,6 +407,11 @@ void aom_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
   uint32_t tp1, tp2, tn1;
   uint32_t tp3, tp4, tn2;
 
+  (void)filter_x;
+  (void)filter_x_stride;
+  (void)filter_y;
+  (void)filter_y_stride;
+
   /* prefetch data to cache memory */
   prefetch_load(src);
   prefetch_load(src + 32);
diff --git a/third_party/aom/aom_dsp/mips/convolve8_dspr2.c b/third_party/aom/aom_dsp/mips/convolve8_dspr2.c
index c871702f4..dd4bc821a 100644
--- a/third_party/aom/aom_dsp/mips/convolve8_dspr2.c
+++ b/third_party/aom/aom_dsp/mips/convolve8_dspr2.c
@@ -1304,6 +1304,8 @@ void aom_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
   int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
   uint32_t pos = 38;
 
+  (void)x_step_q4;
+
   assert(x_step_q4 == 16);
   assert(y_step_q4 == 16);
   assert(((const int32_t *)filter_x)[1] != 0x800000);
@@ -1400,6 +1402,11 @@ void aom_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                              int w, int h) {
   int x, y;
 
+  (void)filter_x;
+  (void)filter_x_stride;
+  (void)filter_y;
+  (void)filter_y_stride;
+
   /* prefetch data to cache memory */
   prefetch_load(src);
   prefetch_load(src + 32);
diff --git a/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c
index dc8f20208..7c221ae89 100644
--- a/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c
+++ b/third_party/aom/aom_dsp/mips/intrapred16_dspr2.c
@@ -17,6 +17,8 @@ void aom_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
   int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
   int32_t tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
 
+  (void)above;
+
   __asm__ __volatile__(
       "lb         %[tmp1],      (%[left])                    \n\t"
       "lb         %[tmp2],      1(%[left])                   \n\t"
diff --git a/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c
index ea7c02810..0a21979c7 100644
--- a/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c
+++ b/third_party/aom/aom_dsp/mips/intrapred4_dspr2.c
@@ -15,6 +15,7 @@
 void aom_h_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
                                const uint8_t *above, const uint8_t *left) {
   int32_t tmp1, tmp2, tmp3, tmp4;
+  (void)above;
 
   __asm__ __volatile__(
       "lb         %[tmp1],      (%[left])                    \n\t"
@@ -78,148 +79,4 @@ void aom_dc_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
       : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
         [stride] "r"(stride));
 }
-
-void aom_tm_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                const uint8_t *above, const uint8_t *left) {
-  int32_t abovel, abover;
-  int32_t left0, left1, left2, left3;
-  int32_t res0, res1;
-  int32_t resl;
-  int32_t resr;
-  int32_t top_left;
-  uint8_t *cm = aom_ff_cropTbl;
-
-  __asm__ __volatile__(
-      "ulw             %[resl],       (%[above])                         \n\t"
-
-      "lbu             %[left0],       (%[left])                         \n\t"
-      "lbu             %[left1],       1(%[left])                        \n\t"
-      "lbu             %[left2],       2(%[left])                        \n\t"
-      "lbu             %[left3],       3(%[left])                        \n\t"
-
-      "lbu             %[top_left],    -1(%[above])                      \n\t"
-
-      "preceu.ph.qbl   %[abovel],      %[resl]                           \n\t"
-      "preceu.ph.qbr   %[abover],      %[resl]                           \n\t"
-
-      "replv.ph        %[left0],       %[left0]                          \n\t"
-      "replv.ph        %[left1],       %[left1]                          \n\t"
-      "replv.ph        %[left2],       %[left2]                          \n\t"
-      "replv.ph        %[left3],       %[left3]                          \n\t"
-
-      "replv.ph        %[top_left],    %[top_left]                       \n\t"
-
-      "addu.ph         %[resl],        %[abovel],         %[left0]       \n\t"
-      "subu.ph         %[resl],        %[resl],           %[top_left]    \n\t"
-
-      "addu.ph         %[resr],        %[abover],         %[left0]       \n\t"
-      "subu.ph         %[resr],        %[resr],           %[top_left]    \n\t"
-
-      "sll             %[res0],        %[resr],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sra             %[res1],        %[resr],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-      "sb              %[res0],        (%[dst])                          \n\t"
-
-      "sll             %[res0],        %[resl],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-      "sb              %[res1],        1(%[dst])                         \n\t"
-
-      "sra             %[res1],        %[resl],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-
-      "addu.ph         %[resl],        %[abovel],         %[left1]       \n\t"
-      "subu.ph         %[resl],        %[resl],           %[top_left]    \n\t"
-
-      "addu.ph         %[resr],        %[abover],         %[left1]       \n\t"
-      "subu.ph         %[resr],        %[resr],           %[top_left]    \n\t"
-
-      "sb              %[res0],        2(%[dst])                         \n\t"
-      "sb              %[res1],        3(%[dst])                         \n\t"
-
-      "add             %[dst],          %[dst],           %[stride]      \n\t"
-
-      "sll             %[res0],        %[resr],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sra             %[res1],        %[resr],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-      "sb              %[res0],        (%[dst])                          \n\t"
-
-      "sll             %[res0],        %[resl],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sb              %[res1],        1(%[dst])                         \n\t"
-      "sra             %[res1],        %[resl],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-
-      "addu.ph         %[resl],        %[abovel],         %[left2]       \n\t"
-      "subu.ph         %[resl],        %[resl],           %[top_left]    \n\t"
-
-      "addu.ph         %[resr],        %[abover],         %[left2]       \n\t"
-      "subu.ph         %[resr],        %[resr],           %[top_left]    \n\t"
-
-      "sb              %[res0],        2(%[dst])                         \n\t"
-      "sb              %[res1],        3(%[dst])                         \n\t"
-
-      "add             %[dst],          %[dst],           %[stride]      \n\t"
-
-      "sll             %[res0],        %[resr],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sra             %[res1],        %[resr],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-      "sb              %[res0],        (%[dst])                          \n\t"
-
-      "sll             %[res0],        %[resl],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sb              %[res1],        1(%[dst])                         \n\t"
-      "sra             %[res1],        %[resl],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-
-      "addu.ph         %[resl],        %[abovel],        %[left3]        \n\t"
-      "subu.ph         %[resl],        %[resl],          %[top_left]     \n\t"
-
-      "addu.ph         %[resr],        %[abover],        %[left3]        \n\t"
-      "subu.ph         %[resr],        %[resr],          %[top_left]     \n\t"
-
-      "sb              %[res0],        2(%[dst])                         \n\t"
-      "sb              %[res1],        3(%[dst])                         \n\t"
-
-      "add             %[dst],          %[dst],          %[stride]       \n\t"
-
-      "sll             %[res0],        %[resr],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-
-      "sra             %[res1],        %[resr],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-      "sb              %[res0],        (%[dst])                          \n\t"
-
-      "sll             %[res0],        %[resl],           16             \n\t"
-      "sra             %[res0],        %[res0],           16             \n\t"
-      "lbux            %[res0],        %[res0](%[cm])                    \n\t"
-      "sb              %[res1],        1(%[dst])                         \n\t"
-
-      "sra             %[res1],        %[resl],           16             \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                    \n\t"
-
-      "sb              %[res0],        2(%[dst])                         \n\t"
-      "sb              %[res1],        3(%[dst])                         \n\t"
-
-      : [abovel] "=&r"(abovel), [abover] "=&r"(abover), [left0] "=&r"(left0),
-        [left1] "=&r"(left1), [left2] "=&r"(left2), [res0] "=&r"(res0),
-        [res1] "=&r"(res1), [left3] "=&r"(left3), [resl] "=&r"(resl),
-        [resr] "=&r"(resr), [top_left] "=&r"(top_left)
-      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
-        [stride] "r"(stride), [cm] "r"(cm));
-}
 #endif  // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c b/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c
index 1114fbc00..d42a77c80 100644
--- a/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c
+++ b/third_party/aom/aom_dsp/mips/intrapred8_dspr2.c
@@ -15,6 +15,7 @@
 void aom_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
                                const uint8_t *above, const uint8_t *left) {
   int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+  (void)above;
 
   __asm__ __volatile__(
       "lb         %[tmp1],      (%[left])                   \n\t"
@@ -146,458 +147,4 @@ void aom_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
       : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
         [stride] "r"(stride));
 }
-
-void aom_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                const uint8_t *above, const uint8_t *left) {
-  int32_t abovel, abover;
-  int32_t abovel_1, abover_1;
-  int32_t left0;
-  int32_t res0, res1, res2, res3;
-  int32_t reshw;
-  int32_t top_left;
-  uint8_t *cm = aom_ff_cropTbl;
-
-  __asm__ __volatile__(
-      "ulw             %[reshw],       (%[above])                         \n\t"
-      "ulw             %[top_left],    4(%[above])                        \n\t"
-
-      "lbu             %[left0],       (%[left])                          \n\t"
-
-      "preceu.ph.qbl   %[abovel],      %[reshw]                           \n\t"
-      "preceu.ph.qbr   %[abover],      %[reshw]                           \n\t"
-      "preceu.ph.qbl   %[abovel_1],    %[top_left]                        \n\t"
-      "preceu.ph.qbr   %[abover_1],    %[top_left]                        \n\t"
-
-      "lbu             %[top_left],    -1(%[above])                       \n\t"
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-
-      "replv.ph        %[top_left],    %[top_left]                        \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       1(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       2(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       3(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       4(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       5(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       6(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbu             %[left0],       7(%[left])                         \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      "replv.ph        %[left0],       %[left0]                           \n\t"
-      "add             %[dst],          %[dst],             %[stride]     \n\t"
-
-      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        (%[dst])                           \n\t"
-      "sb              %[res1],        1(%[dst])                          \n\t"
-      "sb              %[res2],        2(%[dst])                          \n\t"
-      "sb              %[res3],        3(%[dst])                          \n\t"
-
-      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res2],        %[reshw],            16            \n\t"
-      "sra             %[res2],        %[res2],             16            \n\t"
-      "sra             %[res3],        %[reshw],            16            \n\t"
-
-      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
-      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
-
-      "sll             %[res0],        %[reshw],            16            \n\t"
-      "sra             %[res0],        %[res0],             16            \n\t"
-      "sra             %[res1],        %[reshw],            16            \n\t"
-
-      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
-      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
-      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
-      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
-
-      "sb              %[res0],        4(%[dst])                          \n\t"
-      "sb              %[res1],        5(%[dst])                          \n\t"
-      "sb              %[res2],        6(%[dst])                          \n\t"
-      "sb              %[res3],        7(%[dst])                          \n\t"
-
-      : [abovel] "=&r"(abovel), [abover] "=&r"(abover),
-        [abovel_1] "=&r"(abovel_1), [abover_1] "=&r"(abover_1),
-        [left0] "=&r"(left0), [res2] "=&r"(res2), [res3] "=&r"(res3),
-        [res0] "=&r"(res0), [res1] "=&r"(res1), [reshw] "=&r"(reshw),
-        [top_left] "=&r"(top_left)
-      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
-        [stride] "r"(stride), [cm] "r"(cm));
-}
 #endif  // #if HAVE_DSPR2
diff --git a/third_party/aom/aom_dsp/mips/intrapred_msa.c b/third_party/aom/aom_dsp/mips/intrapred_msa.c
index e8eaec7a9..bcb9c9df9 100644
--- a/third_party/aom/aom_dsp/mips/intrapred_msa.c
+++ b/third_party/aom/aom_dsp/mips/intrapred_msa.c
@@ -382,176 +382,6 @@ static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
   }
 }
 
-static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr,
-                                     const uint8_t *src_left, uint8_t *dst,
-                                     int32_t dst_stride) {
-  uint32_t val;
-  uint8_t top_left = src_top_ptr[-1];
-  v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
-  v16u8 src0, src1, src2, src3;
-  v8u16 src_top_left, vec0, vec1, vec2, vec3;
-
-  src_top_left = (v8u16)__msa_fill_h(top_left);
-  val = LW(src_top_ptr);
-  src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val);
-
-  src_left0 = __msa_fill_b(src_left[0]);
-  src_left1 = __msa_fill_b(src_left[1]);
-  src_left2 = __msa_fill_b(src_left[2]);
-  src_left3 = __msa_fill_b(src_left[3]);
-
-  ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
-             src_left3, src_top, src0, src1, src2, src3);
-  HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
-  IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
-  IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
-  SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
-  PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
-  ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
-}
-
-static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr,
-                                     const uint8_t *src_left, uint8_t *dst,
-                                     int32_t dst_stride) {
-  uint64_t val;
-  uint8_t top_left = src_top_ptr[-1];
-  uint32_t loop_cnt;
-  v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
-  v8u16 src_top_left, vec0, vec1, vec2, vec3;
-  v16u8 src0, src1, src2, src3;
-
-  val = LD(src_top_ptr);
-  src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val);
-  src_top_left = (v8u16)__msa_fill_h(top_left);
-
-  for (loop_cnt = 2; loop_cnt--;) {
-    src_left0 = __msa_fill_b(src_left[0]);
-    src_left1 = __msa_fill_b(src_left[1]);
-    src_left2 = __msa_fill_b(src_left[2]);
-    src_left3 = __msa_fill_b(src_left[3]);
-    src_left += 4;
-
-    ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
-               src_left3, src_top, src0, src1, src2, src3);
-    HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
-    SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
-    PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
-    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-    dst += (4 * dst_stride);
-  }
-}
-
-static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
-                                       const uint8_t *src_left, uint8_t *dst,
-                                       int32_t dst_stride) {
-  uint8_t top_left = src_top_ptr[-1];
-  uint32_t loop_cnt;
-  v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
-  v8u16 src_top_left, res_r, res_l;
-
-  src_top = LD_SB(src_top_ptr);
-  src_top_left = (v8u16)__msa_fill_h(top_left);
-
-  for (loop_cnt = 4; loop_cnt--;) {
-    src_left0 = __msa_fill_b(src_left[0]);
-    src_left1 = __msa_fill_b(src_left[1]);
-    src_left2 = __msa_fill_b(src_left[2]);
-    src_left3 = __msa_fill_b(src_left[3]);
-    src_left += 4;
-
-    ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
-    HADD_UB2_UH(res_r, res_l, res_r, res_l);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
-
-    SAT_UH2_UH(res_r, res_l, 7);
-    PCKEV_ST_SB(res_r, res_l, dst);
-    dst += dst_stride;
-
-    ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
-    HADD_UB2_UH(res_r, res_l, res_r, res_l);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
-    SAT_UH2_UH(res_r, res_l, 7);
-    PCKEV_ST_SB(res_r, res_l, dst);
-    dst += dst_stride;
-
-    ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
-    HADD_UB2_UH(res_r, res_l, res_r, res_l);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
-    SAT_UH2_UH(res_r, res_l, 7);
-    PCKEV_ST_SB(res_r, res_l, dst);
-    dst += dst_stride;
-
-    ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
-    HADD_UB2_UH(res_r, res_l, res_r, res_l);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
-    SAT_UH2_UH(res_r, res_l, 7);
-    PCKEV_ST_SB(res_r, res_l, dst);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
-                                       const uint8_t *src_left, uint8_t *dst,
-                                       int32_t dst_stride) {
-  uint8_t top_left = src_top[-1];
-  uint32_t loop_cnt;
-  v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
-  v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
-
-  LD_SB2(src_top, 16, src_top0, src_top1);
-  src_top_left = (v8u16)__msa_fill_h(top_left);
-
-  for (loop_cnt = 8; loop_cnt--;) {
-    src_left0 = __msa_fill_b(src_left[0]);
-    src_left1 = __msa_fill_b(src_left[1]);
-    src_left2 = __msa_fill_b(src_left[2]);
-    src_left3 = __msa_fill_b(src_left[3]);
-    src_left += 4;
-
-    ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
-    ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
-    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
-    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
-    PCKEV_ST_SB(res_r0, res_l0, dst);
-    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
-    dst += dst_stride;
-
-    ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
-    ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
-    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
-    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
-    PCKEV_ST_SB(res_r0, res_l0, dst);
-    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
-    dst += dst_stride;
-
-    ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
-    ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
-    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
-    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
-    PCKEV_ST_SB(res_r0, res_l0, dst);
-    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
-    dst += dst_stride;
-
-    ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
-    ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
-    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
-    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
-    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
-    PCKEV_ST_SB(res_r0, res_l0, dst);
-    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
-    dst += dst_stride;
-  }
-}
-
 void aom_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
                              const uint8_t *above, const uint8_t *left) {
   (void)left;
@@ -717,23 +547,3 @@ void aom_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
 
   intra_predict_128dc_32x32_msa(dst, y_stride);
 }
-
-void aom_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                              const uint8_t *above, const uint8_t *left) {
-  intra_predict_tm_4x4_msa(above, left, dst, y_stride);
-}
-
-void aom_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                              const uint8_t *above, const uint8_t *left) {
-  intra_predict_tm_8x8_msa(above, left, dst, y_stride);
-}
-
-void aom_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                const uint8_t *above, const uint8_t *left) {
-  intra_predict_tm_16x16_msa(above, left, dst, y_stride);
-}
-
-void aom_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                const uint8_t *above, const uint8_t *left) {
-  intra_predict_tm_32x32_msa(above, left, dst, y_stride);
-}
diff --git a/third_party/aom/aom_dsp/mips/inv_txfm_dspr2.h b/third_party/aom/aom_dsp/mips/inv_txfm_dspr2.h
index 8a85e26f3..c69835173 100644
--- a/third_party/aom/aom_dsp/mips/inv_txfm_dspr2.h
+++ b/third_party/aom/aom_dsp/mips/inv_txfm_dspr2.h
@@ -24,10 +24,12 @@ extern "C" {
 #endif
 
 #if HAVE_DSPR2
+/* Note: this macro expects a local int32_t named out to exist, and will write
+ * to that variable. */
 #define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input)                         \
   ({                                                                           \
                                                                                \
-    int32_t tmp, out;                                                          \
+    int32_t tmp;                                                               \
     int dct_cost_rounding = DCT_CONST_ROUNDING;                                \
     int in = input;                                                            \
                                                                                \
diff --git a/third_party/aom/aom_dsp/prob.h b/third_party/aom/aom_dsp/prob.h
index 35db134e5..a517e810a 100644
--- a/third_party/aom/aom_dsp/prob.h
+++ b/third_party/aom/aom_dsp/prob.h
@@ -46,6 +46,14 @@ typedef uint16_t aom_cdf_prob;
 
 #define MAX_PROB 255
 
+#define LV_MAP_PROB 1
+
+#define BR_NODE 1
+
+#if CONFIG_ADAPT_SCAN
+#define CACHE_SCAN_PROB 1
+#endif
+
 #define aom_prob_half ((aom_prob)128)
 
 typedef int8_t aom_tree_index;
@@ -149,7 +157,11 @@ static INLINE void av1_tree_to_cdf(const aom_tree_index *tree,
 void av1_indices_from_tree(int *ind, int *inv, const aom_tree_index *tree);
 
 static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
-  const int rate = 4 + (cdf[nsymbs] > 31) + get_msb(nsymbs);
+  int rate = 4 + (cdf[nsymbs] > 31) + get_msb(nsymbs);
+#if CONFIG_LV_MAP
+  if (nsymbs == 2)
+    rate = 4 + (cdf[nsymbs] > 7) + (cdf[nsymbs] > 15) + get_msb(nsymbs);
+#endif
   const int rate2 = 5;
   int i, tmp;
   int diff;
@@ -158,7 +170,7 @@ static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
   tmp = AOM_ICDF(tmp0);
   diff = ((CDF_PROB_TOP - (nsymbs << rate2)) >> rate) << rate;
 // Single loop (faster)
-#if !CONFIG_ANS && CONFIG_EC_SMALLMUL
+#if !CONFIG_ANS
   for (i = 0; i < nsymbs - 1; ++i, tmp -= tmp0) {
     tmp -= (i == val ? diff : 0);
     cdf[i] += ((tmp - cdf[i]) >> rate);
@@ -183,6 +195,12 @@ static INLINE void update_cdf(aom_cdf_prob *cdf, int val, int nsymbs) {
   cdf[nsymbs] += (cdf[nsymbs] < 32);
 }
 
+#if CONFIG_LV_MAP
+static INLINE void update_bin(aom_cdf_prob *cdf, int val, int nsymbs) {
+  update_cdf(cdf, val, nsymbs);
+}
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/aom_dsp/psnr.c b/third_party/aom/aom_dsp/psnr.c
index 461c13729..d543f12d1 100644
--- a/third_party/aom/aom_dsp/psnr.c
+++ b/third_party/aom/aom_dsp/psnr.c
@@ -289,6 +289,27 @@ int64_t aom_highbd_get_v_sse(const YV12_BUFFER_CONFIG *a,
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
+int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a,
+                          const YV12_BUFFER_CONFIG *b, int plane, int highbd) {
+#if CONFIG_HIGHBITDEPTH
+  if (highbd) {
+    switch (plane) {
+      case 0: return aom_highbd_get_y_sse(a, b);
+      case 1: return aom_highbd_get_u_sse(a, b);
+      case 2: return aom_highbd_get_v_sse(a, b);
+      default: assert(plane >= 0 && plane <= 2); return 0;
+    }
+  }
+#endif
+  (void)highbd;
+  switch (plane) {
+    case 0: return aom_get_y_sse(a, b);
+    case 1: return aom_get_u_sse(a, b);
+    case 2: return aom_get_v_sse(a, b);
+    default: assert(plane >= 0 && plane <= 2); return 0;
+  }
+}
+
 #if CONFIG_HIGHBITDEPTH
 void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
                           const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr,
@@ -296,9 +317,7 @@ void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
   const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
   const int heights[3] = { a->y_crop_height, a->uv_crop_height,
                            a->uv_crop_height };
-  const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
   const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
-  const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
   const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
   int i;
   uint64_t total_sse = 0;
@@ -313,14 +332,15 @@ void aom_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
     uint64_t sse;
     if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
       if (input_shift) {
-        sse = highbd_get_sse_shift(a_planes[i], a_strides[i], b_planes[i],
+        sse = highbd_get_sse_shift(a->buffers[i], a_strides[i], b->buffers[i],
                                    b_strides[i], w, h, input_shift);
       } else {
-        sse = highbd_get_sse(a_planes[i], a_strides[i], b_planes[i],
+        sse = highbd_get_sse(a->buffers[i], a_strides[i], b->buffers[i],
                              b_strides[i], w, h);
       }
     } else {
-      sse = get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h);
+      sse = get_sse(a->buffers[i], a_strides[i], b->buffers[i], b_strides[i], w,
+                    h);
     }
     psnr->sse[1 + i] = sse;
     psnr->samples[1 + i] = samples;
@@ -344,9 +364,7 @@ void aom_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
   const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
   const int heights[3] = { a->y_crop_height, a->uv_crop_height,
                            a->uv_crop_height };
-  const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
   const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
-  const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
   const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
   int i;
   uint64_t total_sse = 0;
@@ -357,7 +375,7 @@ void aom_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
     const int h = heights[i];
     const uint32_t samples = w * h;
     const uint64_t sse =
-        get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h);
+        get_sse(a->buffers[i], a_strides[i], b->buffers[i], b_strides[i], w, h);
     psnr->sse[1 + i] = sse;
     psnr->samples[1 + i] = samples;
     psnr->psnr[1 + i] = aom_sse_to_psnr(samples, peak, (double)sse);
diff --git a/third_party/aom/aom_dsp/psnr.h b/third_party/aom/aom_dsp/psnr.h
index 480140e6f..df5f8f9f2 100644
--- a/third_party/aom/aom_dsp/psnr.h
+++ b/third_party/aom/aom_dsp/psnr.h
@@ -47,6 +47,8 @@ int64_t aom_get_v_sse_part(const YV12_BUFFER_CONFIG *a,
                            const YV12_BUFFER_CONFIG *b, int hstart, int width,
                            int vstart, int height);
 int64_t aom_get_v_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
+int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a,
+                          const YV12_BUFFER_CONFIG *b, int plane, int highbd);
 #if CONFIG_HIGHBITDEPTH
 int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
                                   const YV12_BUFFER_CONFIG *b, int hstart,
diff --git a/third_party/aom/aom_dsp/quantize.c b/third_party/aom/aom_dsp/quantize.c
index fe98b6028..21bcc486a 100644
--- a/third_party/aom/aom_dsp/quantize.c
+++ b/third_party/aom/aom_dsp/quantize.c
@@ -12,18 +12,14 @@
 #include "aom_dsp/quantize.h"
 #include "aom_mem/aom_mem.h"
 
-static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                int skip_block, const int16_t *zbin_ptr,
-                                const int16_t *round_ptr,
-                                const int16_t *quant_ptr,
-                                const int16_t *quant_shift_ptr,
-                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                                const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                                const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-                                const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-                                const int log_scale) {
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                         int skip_block, const int16_t *zbin_ptr,
+                         const int16_t *round_ptr, const int16_t *quant_ptr,
+                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                         uint16_t *eob_ptr, const int16_t *scan,
+                         const int16_t *iscan, const qm_val_t *qm_ptr,
+                         const qm_val_t *iqm_ptr, const int log_scale) {
   const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
                          ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
@@ -37,20 +33,12 @@ static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
     // Pre-scan pass
     for (i = (int)n_coeffs - 1; i >= 0; i--) {
       const int rc = scan[i];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
       const int coeff = coeff_ptr[rc] * wt;
-#else
-      const int coeff = coeff_ptr[rc];
-#endif  // CONFIG_AOM_QM
 
-#if CONFIG_AOM_QM
-      if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
-          coeff > (nzbins[rc != 0] << AOM_QM_BITS))
+      if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS)) &&
+          coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
         non_zero_count--;
-#else
-      if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--;
-#endif  // CONFIG_AOM_QM
       else
         break;
     }
@@ -64,35 +52,21 @@ static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       int tmp32;
 
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
       if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
-#else
-      if (abs_coeff >= zbins[rc != 0]) {
-#endif  // CONFIG_AOM_QM
         int64_t tmp =
             clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
                   INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
         tmp *= wt;
         tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
                        quant_shift_ptr[rc != 0]) >>
                       (16 - log_scale + AOM_QM_BITS));  // quantization
-#else
-        tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
-                       quant_shift_ptr[rc != 0]) >>
-                      (16 - log_scale));  // quantization
-#endif  // CONFIG_AOM_QM
         qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-#if CONFIG_AOM_QM
+        const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
         const int dequant =
-            (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
+            (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
             AOM_QM_BITS;
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
-        dqcoeff_ptr[rc] =
-            qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif  // CONFIG_AOM_QM
 
         if (tmp32) eob = i;
       }
@@ -101,324 +75,25 @@ static void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   *eob_ptr = eob + 1;
 }
 
-void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                      int skip_block, const int16_t *zbin_ptr,
-                      const int16_t *round_ptr, const int16_t *quant_ptr,
-                      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                      uint16_t *eob_ptr, const int16_t *scan,
-                      const int16_t *iscan
-#if CONFIG_AOM_QM
-                      ,
-                      const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                      ) {
-  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      0);
-}
-
-void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            int skip_block, const int16_t *zbin_ptr,
-                            const int16_t *round_ptr, const int16_t *quant_ptr,
-                            const int16_t *quant_shift_ptr,
-                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                            const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                            const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                            ,
-                            const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                            ) {
-  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      1);
-}
-
-#if CONFIG_TX64X64
-void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                            int skip_block, const int16_t *zbin_ptr,
-                            const int16_t *round_ptr, const int16_t *quant_ptr,
-                            const int16_t *quant_shift_ptr,
-                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                            const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                            const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                            ,
-                            const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                            ) {
-  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                      dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                      qm_ptr, iqm_ptr,
-#endif
-                      2);
-}
-#endif  // CONFIG_TX64X64
-
-#if CONFIG_AOM_QM
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-                     const int16_t *round_ptr, const int16_t quant,
-                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr,
-                     const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant =
-      (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
-                INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dequant =
-        (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int64_t tmp, eob = -1;
-  int32_t tmp32;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
-                INT16_MIN, INT16_MAX);
-    tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS));
-    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-    dequant =
-        (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
-    if (tmp32) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
-                            int skip_block, const int16_t *round_ptr,
-                            const int16_t quant, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
-                            uint16_t *eob_ptr, const qm_val_t *qm_ptr,
-                            const qm_val_t *iqm_ptr) {
-  int eob = -1;
-  int dequant =
-      (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr, uint16_t *eob_ptr,
-                                  const qm_val_t *qm_ptr,
-                                  const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dequant =
-        (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr, uint16_t *eob_ptr,
-                                  const qm_val_t *qm_ptr,
-                                  const qm_val_t *iqm_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-  int dequant;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
-    const uint32_t abs_qcoeff =
-        (uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS));
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dequant =
-        (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
-    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
-  int i, non_zero_count = (int)n_coeffs, eob = -1;
-  const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-  int dequant;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = (int)n_coeffs - 1; i >= 0; i--) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff = coeff_ptr[rc] * wt;
-
-      if (coeff < (zbins[rc != 0] << AOM_QM_BITS) &&
-          coeff > (nzbins[rc != 0] << AOM_QM_BITS))
-        non_zero_count--;
-      else
-        break;
-    }
-
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < non_zero_count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-      if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
-        const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
-        const int64_t tmpw = tmp1 * wt;
-        const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-        const uint32_t abs_qcoeff =
-            (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS));
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        dequant =
-            (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-            AOM_QM_BITS;
-        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-        if (abs_qcoeff) eob = i;
-      }
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_b_32x32_c(
+void highbd_quantize_b_helper_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[1024];
+    const qm_val_t *iqm_ptr, const int log_scale) {
   int i, eob = -1;
+  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+                         ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
   int dequant;
+#if CONFIG_TX64X64
+  int idx_arr[4096];
+#else
+  int idx_arr[1024];
+#endif
   (void)iscan;
+  int idx = 0;
 
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
@@ -427,13 +102,13 @@ void aom_highbd_quantize_b_32x32_c(
     // Pre-scan pass
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
       const int coeff = coeff_ptr[rc] * wt;
 
       // If the coefficient is out of the base ZBIN range, keep it for
       // quantization.
-      if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
-          coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
+      if (coeff >= (zbins[rc != 0] * (1 << AOM_QM_BITS)) ||
+          coeff <= (nzbins[rc != 0] * (1 << AOM_QM_BITS)))
         idx_arr[idx++] = i;
     }
 
@@ -443,134 +118,112 @@ void aom_highbd_quantize_b_32x32_c(
       const int rc = scan[idx_arr[i]];
       const int coeff = coeff_ptr[rc];
       const int coeff_sign = (coeff >> 31);
-      const qm_val_t wt = qm_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
       const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
       const int64_t tmpw = tmp1 * wt;
       const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-      const uint32_t abs_qcoeff =
-          (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS));
+      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
+                                   (16 - log_scale + AOM_QM_BITS));
       qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dequant =
-          (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
+      dequant = (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+                AOM_QM_BITS;
+      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
       if (abs_qcoeff) eob = idx_arr[i];
     }
   }
   *eob_ptr = eob + 1;
 }
 
-#if CONFIG_TX64X64
-void aom_highbd_quantize_b_64x64_c(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
-    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
-    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
-    const qm_val_t *iqm_ptr) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[4096];
-  int i, eob = -1;
+void quantize_dc_helper(const tran_low_t *coeff_ptr, int n_coeffs,
+                        int skip_block, const int16_t *round_ptr,
+                        const int16_t quant, tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+                        uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                        const qm_val_t *iqm_ptr, const int log_scale) {
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int64_t tmp;
+  int eob = -1;
+  int32_t tmp32;
   int dequant;
-  (void)iscan;
 
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
 
   if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const qm_val_t wt = qm_ptr[rc];
-      const int coeff = coeff_ptr[rc] * wt;
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
-          coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const qm_val_t wt = qm_ptr[rc];
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
-      const int64_t tmpw = tmp1 * wt;
-      const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
-      const uint32_t abs_qcoeff =
-          (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dequant =
-          (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-      dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
+    const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+                INT16_MIN, INT16_MAX);
+    tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+    dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+    if (tmp32) eob = 0;
   }
   *eob_ptr = eob + 1;
 }
-#endif  // CONFIG_TX64X64
 
-#else  // CONFIG_AOM_QM
+/* These functions should only be called when quantisation matrices
+   are not used. */
+void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                      int skip_block, const int16_t *zbin_ptr,
+                      const int16_t *round_ptr, const int16_t *quant_ptr,
+                      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                      uint16_t *eob_ptr, const int16_t *scan,
+                      const int16_t *iscan) {
+  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
+                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
+}
+
+void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                            int skip_block, const int16_t *zbin_ptr,
+                            const int16_t *round_ptr, const int16_t *quant_ptr,
+                            const int16_t *quant_shift_ptr,
+                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                            const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                            const int16_t *scan, const int16_t *iscan) {
+  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
+                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
+}
+
+#if CONFIG_TX64X64
+void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                            int skip_block, const int16_t *zbin_ptr,
+                            const int16_t *round_ptr, const int16_t *quant_ptr,
+                            const int16_t *quant_shift_ptr,
+                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                            const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                            const int16_t *scan, const int16_t *iscan) {
+  quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
+                      quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+                      dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
+}
+#endif  // CONFIG_TX64X64
 
 void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
                      const int16_t *round_ptr, const int16_t quant,
                      tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                      const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 16;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, n_coeffs, skip_block, round_ptr, quant,
+                     qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL,
+                     0);
 }
 
 void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int n_coeffs = 1024;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
-                INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 15;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, 1024, skip_block, round_ptr, quant, qcoeff_ptr,
+                     dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -578,100 +231,8 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                            const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  const int n_coeffs = 4096;
-  const int rc = 0;
-  const int coeff = coeff_ptr[rc];
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  int tmp, eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
-                INT16_MIN, INT16_MAX);
-    tmp = (tmp * quant) >> 14;
-    qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-    dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4;
-    if (tmp) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-#endif  // CONFIG_TX64X64
-
-void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
-                            int skip_block, const int16_t *round_ptr,
-                            const int16_t quant, tran_low_t *qcoeff_ptr,
-                            tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
-                            uint16_t *eob_ptr) {
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr,
-                                  uint16_t *eob_ptr) {
-  const int n_coeffs = 1024;
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
-}
-
-#if CONFIG_TX64X64
-void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                                  const int16_t *round_ptr, const int16_t quant,
-                                  tran_low_t *qcoeff_ptr,
-                                  tran_low_t *dqcoeff_ptr,
-                                  const int16_t dequant_ptr,
-                                  uint16_t *eob_ptr) {
-  const int n_coeffs = 4096;
-  int eob = -1;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    const int coeff = coeff_ptr[0];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
-    const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14);
-    qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4;
-    if (abs_qcoeff) eob = 0;
-  }
-  *eob_ptr = eob + 1;
+  quantize_dc_helper(coeff_ptr, 4096, skip_block, round_ptr, quant, qcoeff_ptr,
+                     dqcoeff_ptr, dequant_ptr, eob_ptr, NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
 
@@ -682,45 +243,10 @@ void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
                              const int16_t *scan, const int16_t *iscan) {
-  int i, non_zero_count = (int)n_coeffs, eob = -1;
-  const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = (int)n_coeffs - 1; i >= 0; i--) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
-        non_zero_count--;
-      else
-        break;
-    }
-
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < non_zero_count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-      if (abs_coeff >= zbins[rc != 0]) {
-        const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
-        const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-        const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
-        qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-        if (abs_qcoeff) eob = i;
-      }
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 0);
 }
 
 void aom_highbd_quantize_b_32x32_c(
@@ -729,47 +255,10 @@ void aom_highbd_quantize_b_32x32_c(
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[1024];
-  int i, eob = -1;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -779,47 +268,9 @@ void aom_highbd_quantize_b_64x64_c(
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
     const int16_t *scan, const int16_t *iscan) {
-  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
-                         ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
-  const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
-
-  int idx = 0;
-  int idx_arr[4096];
-  int i, eob = -1;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Pre-scan pass
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-
-      // If the coefficient is out of the base ZBIN range, keep it for
-      // quantization.
-      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
-        idx_arr[idx++] = i;
-    }
-
-    // Quantization pass: only process the coefficients selected in
-    // pre-scan pass. Note: idx can be zero.
-    for (i = 0; i < idx; i++) {
-      const int rc = scan[idx_arr[i]];
-      const int coeff = coeff_ptr[rc];
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp1 =
-          abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
-      const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
-      const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 14);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
-      if (abs_qcoeff) eob = idx_arr[i];
-    }
-  }
-  *eob_ptr = eob + 1;
+  highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr,
+                             round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                             NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
-#endif  // CONFIG_AOM_QM
diff --git a/third_party/aom/aom_dsp/quantize.h b/third_party/aom/aom_dsp/quantize.h
index fe49b830f..03609e8b4 100644
--- a/third_party/aom/aom_dsp/quantize.h
+++ b/third_party/aom/aom_dsp/quantize.h
@@ -19,32 +19,57 @@
 extern "C" {
 #endif
 
-#if CONFIG_AOM_QM
+void quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                         int skip_block, const int16_t *zbin_ptr,
+                         const int16_t *round_ptr, const int16_t *quant_ptr,
+                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                         uint16_t *eob_ptr, const int16_t *scan,
+                         const int16_t *iscan, const qm_val_t *qm_ptr,
+                         const qm_val_t *iqm_ptr, const int log_scale);
+
+void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                      int skip_block, const int16_t *zbin_ptr,
+                      const int16_t *round_ptr, const int16_t *quant_ptr,
+                      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                      uint16_t *eob_ptr, const int16_t *scan,
+                      const int16_t *iscan);
+
+void highbd_quantize_b_helper_c(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, const int log_scale);
+
+#if CONFIG_HIGHBITDEPTH
+void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                             int skip_block, const int16_t *zbin_ptr,
+                             const int16_t *round_ptr, const int16_t *quant_ptr,
+                             const int16_t *quant_shift_ptr,
+                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                             const int16_t *scan, const int16_t *iscan);
+#endif
+
 void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
                      const int16_t *round_ptr, const int16_t quant_ptr,
                      tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr,
-                     const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
+                     const int16_t dequant_ptr, uint16_t *eob_ptr);
 void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant_ptr,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
+                           const int16_t dequant_ptr, uint16_t *eob_ptr);
 #if CONFIG_TX64X64
 void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
                            const int16_t *round_ptr, const int16_t quant_ptr,
                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr,
-                           const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
+                           const int16_t dequant_ptr, uint16_t *eob_ptr);
 #endif  // CONFIG_TX64X64
-void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                      int skip_block, const int16_t *zbin_ptr,
-                      const int16_t *round_ptr, const int16_t *quant_ptr,
-                      const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                      uint16_t *eob_ptr, const int16_t *scan,
-                      const int16_t *iscan, const qm_val_t *qm_ptr,
-                      const qm_val_t *iqm_ptr);
+
+#if CONFIG_AOM_QM
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
                             int skip_block, const int16_t *round_ptr,
@@ -64,32 +89,10 @@ void aom_highbd_quantize_dc_64x64(
     const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
     const qm_val_t *iqm_ptr);
 #endif  // CONFIG_TX64X64
-void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                             int skip_block, const int16_t *zbin_ptr,
-                             const int16_t *round_ptr, const int16_t *quant_ptr,
-                             const int16_t *quant_shift_ptr,
-                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
 #endif  // CONFIG_HIGHBITDEPTH
 
 #else  // CONFIG_AOM_QM
 
-void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
-                     const int16_t *round_ptr, const int16_t quant_ptr,
-                     tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                     const int16_t dequant_ptr, uint16_t *eob_ptr);
-void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr);
-#if CONFIG_TX64X64
-void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
-                           const int16_t *round_ptr, const int16_t quant_ptr,
-                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                           const int16_t dequant_ptr, uint16_t *eob_ptr);
-#endif  // CONFIG_TX64X64
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
                             int skip_block, const int16_t *round_ptr,
diff --git a/third_party/aom/aom_dsp/sad.c b/third_party/aom/aom_dsp/sad.c
index b9c789ce5..6b8ca669b 100644
--- a/third_party/aom/aom_dsp/sad.c
+++ b/third_party/aom/aom_dsp/sad.c
@@ -163,11 +163,19 @@ sadMxN(8, 32)
 sadMxNx4D(8, 32)
 sadMxN(32, 8)
 sadMxNx4D(32, 8)
+sadMxN(16, 64)
+sadMxNx4D(16, 64)
+sadMxN(64, 16)
+sadMxNx4D(64, 16)
+sadMxN(32, 128)
+sadMxNx4D(32, 128)
+sadMxN(128, 32)
+sadMxNx4D(128, 32)
 #endif
 /* clang-format on */
 
 #if CONFIG_HIGHBITDEPTH
-                static INLINE
+                            static INLINE
     unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8,
                             int b_stride, int width, int height) {
   int y, x;
@@ -328,12 +336,20 @@ highbd_sadMxN(8, 32)
 highbd_sadMxNx4D(8, 32)
 highbd_sadMxN(32, 8)
 highbd_sadMxNx4D(32, 8)
+highbd_sadMxN(16, 64)
+highbd_sadMxNx4D(16, 64)
+highbd_sadMxN(64, 16)
+highbd_sadMxNx4D(64, 16)
+highbd_sadMxN(32, 128)
+highbd_sadMxNx4D(32, 128)
+highbd_sadMxN(128, 32)
+highbd_sadMxNx4D(128, 32)
 #endif
 /* clang-format on */
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if CONFIG_AV1 && CONFIG_EXT_INTER
-                            static INLINE
+#if CONFIG_AV1
+                                                static INLINE
     unsigned int masked_sad(const uint8_t *src, int src_stride,
                             const uint8_t *a, int a_stride, const uint8_t *b,
                             int b_stride, const uint8_t *m, int m_stride,
@@ -395,11 +411,15 @@ MASKSADMxN(4, 16)
 MASKSADMxN(16, 4)
 MASKSADMxN(8, 32)
 MASKSADMxN(32, 8)
+MASKSADMxN(16, 64)
+MASKSADMxN(64, 16)
+MASKSADMxN(32, 128)
+MASKSADMxN(128, 32)
 #endif
 /* clang-format on */
 
 #if CONFIG_HIGHBITDEPTH
-                            static INLINE
+                                static INLINE
     unsigned int highbd_masked_sad(const uint8_t *src8, int src_stride,
                                    const uint8_t *a8, int a_stride,
                                    const uint8_t *b8, int b_stride,
@@ -464,9 +484,13 @@ HIGHBD_MASKSADMXN(4, 16)
 HIGHBD_MASKSADMXN(16, 4)
 HIGHBD_MASKSADMXN(8, 32)
 HIGHBD_MASKSADMXN(32, 8)
+HIGHBD_MASKSADMXN(16, 64)
+HIGHBD_MASKSADMXN(64, 16)
+HIGHBD_MASKSADMXN(32, 128)
+HIGHBD_MASKSADMXN(128, 32)
 #endif
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
+#endif  // CONFIG_AV1
 
 #if CONFIG_AV1 && CONFIG_MOTION_VAR
 // pre: predictor being evaluated
@@ -522,11 +546,15 @@ OBMCSADMxN(4, 16)
 OBMCSADMxN(16, 4)
 OBMCSADMxN(8, 32)
 OBMCSADMxN(32, 8)
+OBMCSADMxN(16, 64)
+OBMCSADMxN(64, 16)
+OBMCSADMxN(32, 128)
+OBMCSADMxN(128, 32)
 #endif
 /* clang-format on */
 
 #if CONFIG_HIGHBITDEPTH
-                            static INLINE
+                                static INLINE
     unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
                                  const int32_t *wsrc, const int32_t *mask,
                                  int width, int height) {
@@ -578,6 +606,10 @@ HIGHBD_OBMCSADMXN(4, 16)
 HIGHBD_OBMCSADMXN(16, 4)
 HIGHBD_OBMCSADMXN(8, 32)
 HIGHBD_OBMCSADMXN(32, 8)
+HIGHBD_OBMCSADMXN(16, 64)
+HIGHBD_OBMCSADMXN(64, 16)
+HIGHBD_OBMCSADMXN(32, 128)
+HIGHBD_OBMCSADMXN(128, 32)
 #endif
 /* clang-format on */
 #endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/aom_dsp/ssim.c b/third_party/aom/aom_dsp/ssim.c
index 141bf01c7..6ae378ff2 100644
--- a/third_party/aom/aom_dsp/ssim.c
+++ b/third_party/aom/aom_dsp/ssim.c
@@ -168,23 +168,16 @@ static double aom_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,
 
 double aom_calc_ssim(const YV12_BUFFER_CONFIG *source,
                      const YV12_BUFFER_CONFIG *dest, double *weight) {
-  double a, b, c;
-  double ssimv;
-
-  a = aom_ssim2(source->y_buffer, dest->y_buffer, source->y_stride,
-                dest->y_stride, source->y_crop_width, source->y_crop_height);
-
-  b = aom_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride,
-                dest->uv_stride, source->uv_crop_width, source->uv_crop_height);
-
-  c = aom_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride,
-                dest->uv_stride, source->uv_crop_width, source->uv_crop_height);
-
-  ssimv = a * .8 + .1 * (b + c);
+  double abc[3];
+  for (int i = 0; i < 3; ++i) {
+    const int is_uv = i > 0;
+    abc[i] = aom_ssim2(source->buffers[i], dest->buffers[i],
+                       source->strides[is_uv], dest->strides[is_uv],
+                       source->crop_widths[is_uv], source->crop_heights[is_uv]);
+  }
 
   *weight = 1;
-
-  return ssimv;
+  return abc[0] * .8 + .1 * (abc[1] + abc[2]);
 }
 
 // traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity
@@ -433,30 +426,19 @@ double aom_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
 double aom_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,
                             const YV12_BUFFER_CONFIG *dest, double *weight,
                             uint32_t bd, uint32_t in_bd) {
-  double a, b, c;
-  double ssimv;
-  uint32_t shift = 0;
-
   assert(bd >= in_bd);
-  shift = bd - in_bd;
-
-  a = aom_highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride,
-                       dest->y_stride, source->y_crop_width,
-                       source->y_crop_height, in_bd, shift);
-
-  b = aom_highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride,
-                       dest->uv_stride, source->uv_crop_width,
-                       source->uv_crop_height, in_bd, shift);
-
-  c = aom_highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride,
-                       dest->uv_stride, source->uv_crop_width,
-                       source->uv_crop_height, in_bd, shift);
-
-  ssimv = a * .8 + .1 * (b + c);
+  const uint32_t shift = bd - in_bd;
+
+  double abc[3];
+  for (int i = 0; i < 3; ++i) {
+    const int is_uv = i > 0;
+    abc[i] = aom_highbd_ssim2(source->buffers[i], dest->buffers[i],
+                              source->strides[is_uv], dest->strides[is_uv],
+                              source->crop_widths[is_uv],
+                              source->crop_heights[is_uv], in_bd, shift);
+  }
 
   *weight = 1;
-
-  return ssimv;
+  return abc[0] * .8 + .1 * (abc[1] + abc[2]);
 }
-
 #endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/aom_dsp/txfm_common.h b/third_party/aom/aom_dsp/txfm_common.h
index 01732ae64..ef9e9bc98 100644
--- a/third_party/aom/aom_dsp/txfm_common.h
+++ b/third_party/aom/aom_dsp/txfm_common.h
@@ -13,6 +13,7 @@
 #define AOM_DSP_TXFM_COMMON_H_
 
 #include "aom_dsp/aom_dsp_common.h"
+#include "av1/common/enums.h"
 
 // Constants and Macros used by all idct/dct functions
 #define DCT_CONST_BITS 14
@@ -23,18 +24,25 @@
 
 typedef struct txfm_param {
   // for both forward and inverse transforms
-  int tx_type;
-  int tx_size;
+  TX_TYPE tx_type;
+  TX_SIZE tx_size;
   int lossless;
   int bd;
 #if CONFIG_MRC_TX || CONFIG_LGT
+  int is_inter;
+#endif  // CONFIG_MRC_TX || CONFIG_LGT
+#if CONFIG_MRC_TX || CONFIG_LGT_FROM_PRED
   int stride;
   uint8_t *dst;
-#endif  // CONFIG_MRC_TX || CONFIG_LGT
-#if CONFIG_LGT
-  int is_inter;
+#if CONFIG_MRC_TX
+  int *valid_mask;
+  uint8_t *mask;
+#endif  // CONFIG_MRC_TX
+#if CONFIG_LGT_FROM_PRED
   int mode;
-#endif
+  int use_lgt;
+#endif  // CONFIG_LGT_FROM_PRED
+#endif  // CONFIG_MRC_TX || CONFIG_LGT_FROM_PRED
 // for inverse transforms only
 #if CONFIG_ADAPT_SCAN
   const int16_t *eob_threshold;
@@ -87,27 +95,608 @@ static const tran_high_t sinpi_4_9 = 15212;
 
 // 16384 * sqrt(2)
 static const tran_high_t Sqrt2 = 23170;
+static const tran_high_t InvSqrt2 = 11585;
 
 static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
   tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
   return rv;
 }
 
-#if CONFIG_LGT
-// The Line Graph Transforms (LGTs) matrices are written as follows.
-// Each 2D array is 16384 times an LGT matrix, which is the matrix of
-// eigenvectors of the graph Laplacian matrices for the line graph.
+#if CONFIG_LGT_FROM_PRED
+// Use negative numbers so they do not coincide with lgt*[0][0], which are
+// always nonnegative.
+typedef enum {
+  DCT4 = -1,
+  ADST4 = -2,
+  DCT8 = -3,
+  ADST8 = -4,
+  DCT16 = -5,
+  ADST16 = -6,
+  DCT32 = -7,
+  ADST32 = -8,
+} ButterflyLgt;
 
-// LGT4 name: lgt4_140
-// Self loops: 1.400, 0.000, 0.000, 0.000
+/* These are some LGTs already implementated in the codec. When any of them
+ * is chosen, the flgt or ilgt function will call the existing fast
+ * transform instead of the matrix product implementation. Thus, we
+ * do not need the actual basis functions here */
+static const tran_high_t lgt4_000[1][1] = { { (tran_high_t)DCT4 } };
+static const tran_high_t lgt4_100[1][1] = { { (tran_high_t)ADST4 } };
+static const tran_high_t lgt8_000[1][1] = { { (tran_high_t)DCT8 } };
+static const tran_high_t lgt8_200[1][1] = { { (tran_high_t)ADST8 } };
+static const tran_high_t lgt16_000[1][1] = { { (tran_high_t)DCT16 } };
+static const tran_high_t lgt16_200[1][1] = { { (tran_high_t)ADST16 } };
+static const tran_high_t lgt32_000[1][1] = { { (tran_high_t)DCT32 } };
+static const tran_high_t lgt32_200[1][1] = { { (tran_high_t)ADST32 } };
+
+/* The Line Graph Transforms (LGTs) matrices are written as follows.
+   Each 2D array is sqrt(2)*16384 times an LGT matrix, which is the
+   matrix of eigenvectors of the graph Laplacian matrix of the associated
+   line graph. Some of those transforms have fast algorithms but not
+   implemented yet for now. */
+
+// LGT4 name: lgt4_150_000w3
+// Self loops: 1.500, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000
+static const tran_high_t lgt4_150_000w3[4][4] = {
+  { 0, 0, 0, 23170 },
+  { 5991, 13537, 17825, 0 },
+  { 15515, 10788, -13408, 0 },
+  { 16133, -15403, 6275, 0 },
+};
+
+// LGT4 name: lgt4_100_000w3
+// Self loops: 1.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000
+static const tran_high_t lgt4_100_000w3[4][4] = {
+  { 0, 0, 0, 23170 },
+  { 7600, 13694, 17076, 0 },
+  { 17076, 7600, -13694, 0 },
+  { 13694, -17076, 7600, 0 },
+};
+
+// LGT4 name: lgt4_060_000w3
+// Self loops: 0.600, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000
+static const tran_high_t lgt4_060_000w3[4][4] = {
+  { 0, 0, 0, 23170 },
+  { 9449, 13755, 16075, 0 },
+  { 17547, 4740, -14370, 0 },
+  { 11819, -18034, 8483, 0 },
+};
+
+// LGT4 name: lgt4_000w3
+// Self loops: 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000
+static const tran_high_t lgt4_000w3[4][4] = {
+  { 0, 0, 0, 23170 },
+  { 13377, 13377, 13377, 0 },
+  { 16384, 0, -16384, 0 },
+  { 9459, -18919, 9459, 0 },
+};
+
+// LGT4 name: lgt4_150_000w2
+// Self loops: 1.500, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000
+static const tran_high_t lgt4_150_000w2[4][4] = {
+  { 10362, 20724, 0, 0 },
+  { 20724, -10362, 0, 0 },
+  { 0, 0, 16384, 16384 },
+  { 0, 0, 16384, -16384 },
+};
+
+// LGT4 name: lgt4_100_000w2
+// Self loops: 1.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000
+static const tran_high_t lgt4_100_000w2[4][4] = {
+  { 12181, 19710, 0, 0 },
+  { 19710, -12181, 0, 0 },
+  { 0, 0, 16384, 16384 },
+  { 0, 0, 16384, -16384 },
+};
+
+// LGT4 name: lgt4_060_000w2
+// Self loops: 0.600, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000
+static const tran_high_t lgt4_060_000w2[4][4] = {
+  { 13831, 18590, 0, 0 },
+  { 18590, -13831, 0, 0 },
+  { 0, 0, 16384, 16384 },
+  { 0, 0, 16384, -16384 },
+};
+
+// LGT4 name: lgt4_000w2
+// Self loops: 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000
+static const tran_high_t lgt4_000w2[4][4] = {
+  { 16384, 16384, 0, 0 },
+  { 16384, -16384, 0, 0 },
+  { 0, 0, 16384, 16384 },
+  { 0, 0, 16384, -16384 },
+};
+
+// LGT4 name: lgt4_150_000w1
+// Self loops: 1.500, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000
+static const tran_high_t lgt4_150_000w1[4][4] = {
+  { 23170, 0, 0, 0 },
+  { 0, 13377, 13377, 13377 },
+  { 0, 16384, 0, -16384 },
+  { 0, 9459, -18919, 9459 },
+};
+
+// LGT4 name: lgt4_100_000w1
+// Self loops: 1.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000
+static const tran_high_t lgt4_100_000w1[4][4] = {
+  { 23170, 0, 0, 0 },
+  { 0, 13377, 13377, 13377 },
+  { 0, 16384, 0, -16384 },
+  { 0, 9459, -18919, 9459 },
+};
+
+// LGT4 name: lgt4_060_000w1
+// Self loops: 0.600, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000
+static const tran_high_t lgt4_060_000w1[4][4] = {
+  { 23170, 0, 0, 0 },
+  { 0, 13377, 13377, 13377 },
+  { 0, 16384, 0, -16384 },
+  { 0, 9459, -18919, 9459 },
+};
+
+// LGT4 name: lgt4_000w1
+// Self loops: 0.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000
+static const tran_high_t lgt4_000w1[4][4] = {
+  { 23170, 0, 0, 0 },
+  { 0, 13377, 13377, 13377 },
+  { 0, 16384, 0, -16384 },
+  { 0, 9459, -18919, 9459 },
+};
+
+// LGT4 name: lgt4_060
+// Self loops: 0.600, 0.000, 0.000, 0.000
 // Edges: 1.000, 1.000, 1.000
-static const tran_high_t lgt4_140[4][4] = {
-  { 4206, 9518, 13524, 15674 },
-  { 11552, 14833, 1560, -13453 },
-  { 15391, -1906, -14393, 9445 },
-  { 12201, -14921, 12016, -4581 },
+static const tran_high_t lgt4_060[4][4] = {
+  { 6971, 10504, 13060, 14400 },
+  { 14939, 11211, -2040, -13559 },
+  { 14096, -8258, -12561, 10593 },
+  { 8150, -15253, 14295, -5784 },
+};
+
+// LGT4 name: lgt4_150
+// Self loops: 1.500, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000
+static const tran_high_t lgt4_150[4][4] = {
+  { 3998, 9435, 13547, 15759 },
+  { 11106, 15105, 1886, -13483 },
+  { 15260, -1032, -14674, 9361 },
+  { 12833, -14786, 11596, -4372 },
+};
+
+// LGT8 name: lgt8_150_000w7
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.000
+static const tran_high_t lgt8_150_000w7[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 0, 32768 },
+  { 2522, 6185, 9551, 12461, 14775, 16381, 17204, 0 },
+  { 7390, 15399, 16995, 11515, 1240, -9551, -16365, 0 },
+  { 11716, 16625, 3560, -13353, -15831, -1194, 14733, 0 },
+  { 15073, 8866, -14291, -10126, 13398, 11308, -12401, 0 },
+  { 16848, -4177, -13724, 14441, 2923, -16628, 9513, 0 },
+  { 15942, -14888, 5405, 7137, -15640, 15288, -6281, 0 },
+  { 10501, -14293, 16099, -15670, 13063, -8642, 3021, 0 },
+};
+
+// LGT8 name: lgt8_100_000w7
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.000
+static const tran_high_t lgt8_100_000w7[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 0, 32768 },
+  { 3518, 6883, 9946, 12575, 14654, 16093, 16829, 0 },
+  { 9946, 16093, 16093, 9946, 0, -9946, -16093, 0 },
+  { 14654, 14654, 0, -14654, -14654, 0, 14654, 0 },
+  { 16829, 3518, -16093, -6883, 14654, 9946, -12575, 0 },
+  { 16093, -9946, -9946, 16093, 0, -16093, 9946, 0 },
+  { 12575, -16829, 9946, 3518, -14654, 16093, -6883, 0 },
+  { 6883, -12575, 16093, -16829, 14654, -9946, 3518, 0 },
+};
+
+// LGT8 name: lgt8_060_000w7
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.000
+static const tran_high_t lgt8_060_000w7[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 0, 32768 },
+  { 5087, 7951, 10521, 12701, 14411, 15587, 16186, 0 },
+  { 13015, 16486, 14464, 7621, -1762, -10557, -15834, 0 },
+  { 16581, 11475, -4050, -15898, -13311, 1362, 14798, 0 },
+  { 16536, -1414, -16981, -3927, 15746, 8879, -12953, 0 },
+  { 14104, -13151, -7102, 16932, -1912, -15914, 10385, 0 },
+  { 10156, -17168, 11996, 1688, -14174, 16602, -7249, 0 },
+  { 5295, -11721, 15961, -17224, 15274, -10476, 3723, 0 },
+};
+
+// LGT8 name: lgt8_000w7
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 0.000
+static const tran_high_t lgt8_000w7[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 0, 32768 },
+  { 12385, 12385, 12385, 12385, 12385, 12385, 12385, 0 },
+  { 17076, 13694, 7600, 0, -7600, -13694, -17076, 0 },
+  { 15781, 3898, -10921, -17515, -10921, 3898, 15781, 0 },
+  { 13694, -7600, -17076, 0, 17076, 7600, -13694, 0 },
+  { 10921, -15781, -3898, 17515, -3898, -15781, 10921, 0 },
+  { 7600, -17076, 13694, 0, -13694, 17076, -7600, 0 },
+  { 3898, -10921, 15781, -17515, 15781, -10921, 3898, 0 },
+};
+
+// LGT8 name: lgt8_150_000w6
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 0.000, 1.000
+static const tran_high_t lgt8_150_000w6[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 23170, 23170 },
+  { 0, 0, 0, 0, 0, 0, 23170, -23170 },
+  { 3157, 7688, 11723, 15002, 17312, 18506, 0, 0 },
+  { 9167, 17832, 16604, 6164, -7696, -17286, 0, 0 },
+  { 14236, 15584, -4969, -18539, -6055, 14938, 0, 0 },
+  { 17558, 1891, -18300, 5288, 16225, -11653, 0, 0 },
+  { 17776, -13562, -647, 14380, -17514, 7739, 0, 0 },
+  { 12362, -16318, 17339, -15240, 10399, -3688, 0, 0 },
+};
+
+// LGT8 name: lgt8_100_000w6
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 0.000, 1.000
+static const tran_high_t lgt8_100_000w6[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 23170, 23170 },
+  { 0, 0, 0, 0, 0, 0, 23170, -23170 },
+  { 4350, 8447, 12053, 14959, 16995, 18044, 0, 0 },
+  { 12053, 18044, 14959, 4350, -8447, -16995, 0, 0 },
+  { 16995, 12053, -8447, -18044, -4350, 14959, 0, 0 },
+  { 18044, -4350, -16995, 8447, 14959, -12053, 0, 0 },
+  { 14959, -16995, 4350, 12053, -18044, 8447, 0, 0 },
+  { 8447, -14959, 18044, -16995, 12053, -4350, 0, 0 },
+};
+
+// LGT8 name: lgt8_060_000w6
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 0.000, 1.000
+static const tran_high_t lgt8_060_000w6[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 23170, 23170 },
+  { 0, 0, 0, 0, 0, 0, 23170, -23170 },
+  { 6154, 9551, 12487, 14823, 16446, 17277, 0, 0 },
+  { 15149, 17660, 12503, 1917, -9502, -16795, 0, 0 },
+  { 18166, 7740, -11772, -17465, -2656, 15271, 0, 0 },
+  { 16682, -8797, -15561, 10779, 14189, -12586, 0, 0 },
+  { 12436, -18234, 7007, 10763, -18483, 8945, 0, 0 },
+  { 6591, -14172, 18211, -17700, 12766, -4642, 0, 0 },
+};
+
+// LGT8 name: lgt8_000w6
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 0.000, 1.000
+static const tran_high_t lgt8_000w6[8][8] = {
+  { 0, 0, 0, 0, 0, 0, 23170, 23170 },
+  { 0, 0, 0, 0, 0, 0, 23170, -23170 },
+  { 13377, 13377, 13377, 13377, 13377, 13377, 0, 0 },
+  { 18274, 13377, 4896, -4896, -13377, -18274, 0, 0 },
+  { 16384, 0, -16384, -16384, 0, 16384, 0, 0 },
+  { 13377, -13377, -13377, 13377, 13377, -13377, 0, 0 },
+  { 9459, -18919, 9459, 9459, -18919, 9459, 0, 0 },
+  { 4896, -13377, 18274, -18274, 13377, -4896, 0, 0 },
+};
+
+// LGT8 name: lgt8_150_000w5
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000
+static const tran_high_t lgt8_150_000w5[8][8] = {
+  { 0, 0, 0, 0, 0, 18919, 18919, 18919 },
+  { 0, 0, 0, 0, 0, 23170, 0, -23170 },
+  { 0, 0, 0, 0, 0, 13377, -26755, 13377 },
+  { 4109, 9895, 14774, 18299, 20146, 0, 0, 0 },
+  { 11753, 20300, 13161, -4148, -18252, 0, 0, 0 },
+  { 17573, 10921, -16246, -12895, 14679, 0, 0, 0 },
+  { 19760, -9880, -9880, 19760, -9880, 0, 0, 0 },
+  { 14815, -18624, 17909, -12844, 4658, 0, 0, 0 },
+};
+
+// LGT8 name: lgt8_100_000w5
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000
+static const tran_high_t lgt8_100_000w5[8][8] = {
+  { 0, 0, 0, 0, 0, 18919, 18919, 18919 },
+  { 0, 0, 0, 0, 0, 23170, 0, -23170 },
+  { 0, 0, 0, 0, 0, 13377, -26755, 13377 },
+  { 5567, 10683, 14933, 17974, 19559, 0, 0, 0 },
+  { 14933, 19559, 10683, -5567, -17974, 0, 0, 0 },
+  { 19559, 5567, -17974, -10683, 14933, 0, 0, 0 },
+  { 17974, -14933, -5567, 19559, -10683, 0, 0, 0 },
+  { 10683, -17974, 19559, -14933, 5567, 0, 0, 0 },
+};
+
+// LGT8 name: lgt8_060_000w5
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000
+static const tran_high_t lgt8_060_000w5[8][8] = {
+  { 0, 0, 0, 0, 0, 18919, 18919, 18919 },
+  { 0, 0, 0, 0, 0, 23170, 0, -23170 },
+  { 0, 0, 0, 0, 0, 13377, -26755, 13377 },
+  { 7650, 11741, 15069, 17415, 18628, 0, 0, 0 },
+  { 17824, 18002, 7558, -7345, -17914, 0, 0, 0 },
+  { 19547, 569, -19303, -8852, 15505, 0, 0, 0 },
+  { 15592, -17548, -2862, 19625, -11374, 0, 0, 0 },
+  { 8505, -17423, 20218, -15907, 6006, 0, 0, 0 },
+};
+
+// LGT8 name: lgt8_000w5
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000
+static const tran_high_t lgt8_000w5[8][8] = {
+  { 0, 0, 0, 0, 0, 18919, 18919, 18919 },
+  { 0, 0, 0, 0, 0, 23170, 0, -23170 },
+  { 0, 0, 0, 0, 0, 13377, -26755, 13377 },
+  { 14654, 14654, 14654, 14654, 14654, 0, 0, 0 },
+  { 19710, 12181, 0, -12181, -19710, 0, 0, 0 },
+  { 16766, -6404, -20724, -6404, 16766, 0, 0, 0 },
+  { 12181, -19710, 0, 19710, -12181, 0, 0, 0 },
+  { 6404, -16766, 20724, -16766, 6404, 0, 0, 0 },
+};
+
+// LGT8 name: lgt8_150_000w4
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_150_000w4[8][8] = {
+  { 5655, 13343, 19159, 22286, 0, 0, 0, 0 },
+  { 15706, 21362, 2667, -19068, 0, 0, 0, 0 },
+  { 21580, -1459, -20752, 13238, 0, 0, 0, 0 },
+  { 18148, -20910, 16399, -6183, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 16384, 16384, 16384, 16384 },
+  { 0, 0, 0, 0, 21407, 8867, -8867, -21407 },
+  { 0, 0, 0, 0, 16384, -16384, -16384, 16384 },
+  { 0, 0, 0, 0, 8867, -21407, 21407, -8867 },
+};
+
+// LGT8 name: lgt8_100_000w4
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_100_000w4[8][8] = {
+  { 7472, 14042, 18919, 21513, 0, 0, 0, 0 },
+  { 18919, 18919, 0, -18919, 0, 0, 0, 0 },
+  { 21513, -7472, -18919, 14042, 0, 0, 0, 0 },
+  { 14042, -21513, 18919, -7472, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 16384, 16384, 16384, 16384 },
+  { 0, 0, 0, 0, 21407, 8867, -8867, -21407 },
+  { 0, 0, 0, 0, 16384, -16384, -16384, 16384 },
+  { 0, 0, 0, 0, 8867, -21407, 21407, -8867 },
+};
+
+// LGT8 name: lgt8_060_000w4
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_060_000w4[8][8] = {
+  { 9858, 14855, 18470, 20365, 0, 0, 0, 0 },
+  { 21127, 15855, -2886, -19175, 0, 0, 0, 0 },
+  { 19935, -11679, -17764, 14980, 0, 0, 0, 0 },
+  { 11525, -21570, 20217, -8180, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 16384, 16384, 16384, 16384 },
+  { 0, 0, 0, 0, 21407, 8867, -8867, -21407 },
+  { 0, 0, 0, 0, 16384, -16384, -16384, 16384 },
+  { 0, 0, 0, 0, 8867, -21407, 21407, -8867 },
+};
+
+// LGT8 name: lgt8_000w4
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_000w4[8][8] = {
+  { 16384, 16384, 16384, 16384, 0, 0, 0, 0 },
+  { 21407, 8867, -8867, -21407, 0, 0, 0, 0 },
+  { 16384, -16384, -16384, 16384, 0, 0, 0, 0 },
+  { 8867, -21407, 21407, -8867, 0, 0, 0, 0 },
+  { 0, 0, 0, 0, 16384, 16384, 16384, 16384 },
+  { 0, 0, 0, 0, 21407, 8867, -8867, -21407 },
+  { 0, 0, 0, 0, 16384, -16384, -16384, 16384 },
+  { 0, 0, 0, 0, 8867, -21407, 21407, -8867 },
+};
+
+// LGT8 name: lgt8_150_000w3
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_150_000w3[8][8] = {
+  { 8473, 19144, 25209, 0, 0, 0, 0, 0 },
+  { 21942, 15257, -18961, 0, 0, 0, 0, 0 },
+  { 22815, -21783, 8874, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 14654, 14654, 14654, 14654, 14654 },
+  { 0, 0, 0, 19710, 12181, 0, -12181, -19710 },
+  { 0, 0, 0, 16766, -6404, -20724, -6404, 16766 },
+  { 0, 0, 0, 12181, -19710, 0, 19710, -12181 },
+  { 0, 0, 0, 6404, -16766, 20724, -16766, 6404 },
+};
+
+// LGT8 name: lgt8_100_000w3
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_100_000w3[8][8] = {
+  { 10747, 19366, 24149, 0, 0, 0, 0, 0 },
+  { 24149, 10747, -19366, 0, 0, 0, 0, 0 },
+  { 19366, -24149, 10747, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 14654, 14654, 14654, 14654, 14654 },
+  { 0, 0, 0, 19710, 12181, 0, -12181, -19710 },
+  { 0, 0, 0, 16766, -6404, -20724, -6404, 16766 },
+  { 0, 0, 0, 12181, -19710, 0, 19710, -12181 },
+  { 0, 0, 0, 6404, -16766, 20724, -16766, 6404 },
+};
+
+// LGT8 name: lgt8_060_000w3
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_060_000w3[8][8] = {
+  { 13363, 19452, 22733, 0, 0, 0, 0, 0 },
+  { 24815, 6704, -20323, 0, 0, 0, 0, 0 },
+  { 16715, -25503, 11997, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 14654, 14654, 14654, 14654, 14654 },
+  { 0, 0, 0, 19710, 12181, 0, -12181, -19710 },
+  { 0, 0, 0, 16766, -6404, -20724, -6404, 16766 },
+  { 0, 0, 0, 12181, -19710, 0, 19710, -12181 },
+  { 0, 0, 0, 6404, -16766, 20724, -16766, 6404 },
+};
+
+// LGT8 name: lgt8_000w3
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 0.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_000w3[8][8] = {
+  { 18919, 18919, 18919, 0, 0, 0, 0, 0 },
+  { 23170, 0, -23170, 0, 0, 0, 0, 0 },
+  { 13377, -26755, 13377, 0, 0, 0, 0, 0 },
+  { 0, 0, 0, 14654, 14654, 14654, 14654, 14654 },
+  { 0, 0, 0, 19710, 12181, 0, -12181, -19710 },
+  { 0, 0, 0, 16766, -6404, -20724, -6404, 16766 },
+  { 0, 0, 0, 12181, -19710, 0, 19710, -12181 },
+  { 0, 0, 0, 6404, -16766, 20724, -16766, 6404 },
+};
+
+// LGT8 name: lgt8_150_000w2
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_150_000w2[8][8] = {
+  { 14654, 29309, 0, 0, 0, 0, 0, 0 },
+  { 29309, -14654, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 13377, 13377, 13377, 13377, 13377, 13377 },
+  { 0, 0, 18274, 13377, 4896, -4896, -13377, -18274 },
+  { 0, 0, 16384, 0, -16384, -16384, 0, 16384 },
+  { 0, 0, 13377, -13377, -13377, 13377, 13377, -13377 },
+  { 0, 0, 9459, -18919, 9459, 9459, -18919, 9459 },
+  { 0, 0, 4896, -13377, 18274, -18274, 13377, -4896 },
+};
+
+// LGT8 name: lgt8_100_000w2
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_100_000w2[8][8] = {
+  { 17227, 27874, 0, 0, 0, 0, 0, 0 },
+  { 27874, -17227, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 13377, 13377, 13377, 13377, 13377, 13377 },
+  { 0, 0, 18274, 13377, 4896, -4896, -13377, -18274 },
+  { 0, 0, 16384, 0, -16384, -16384, 0, 16384 },
+  { 0, 0, 13377, -13377, -13377, 13377, 13377, -13377 },
+  { 0, 0, 9459, -18919, 9459, 9459, -18919, 9459 },
+  { 0, 0, 4896, -13377, 18274, -18274, 13377, -4896 },
+};
+
+// LGT8 name: lgt8_060_000w2
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_060_000w2[8][8] = {
+  { 19560, 26290, 0, 0, 0, 0, 0, 0 },
+  { 26290, -19560, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 13377, 13377, 13377, 13377, 13377, 13377 },
+  { 0, 0, 18274, 13377, 4896, -4896, -13377, -18274 },
+  { 0, 0, 16384, 0, -16384, -16384, 0, 16384 },
+  { 0, 0, 13377, -13377, -13377, 13377, 13377, -13377 },
+  { 0, 0, 9459, -18919, 9459, 9459, -18919, 9459 },
+  { 0, 0, 4896, -13377, 18274, -18274, 13377, -4896 },
+};
+
+// LGT8 name: lgt8_000w2
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 0.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_000w2[8][8] = {
+  { 23170, 23170, 0, 0, 0, 0, 0, 0 },
+  { 23170, -23170, 0, 0, 0, 0, 0, 0 },
+  { 0, 0, 13377, 13377, 13377, 13377, 13377, 13377 },
+  { 0, 0, 18274, 13377, 4896, -4896, -13377, -18274 },
+  { 0, 0, 16384, 0, -16384, -16384, 0, 16384 },
+  { 0, 0, 13377, -13377, -13377, 13377, 13377, -13377 },
+  { 0, 0, 9459, -18919, 9459, 9459, -18919, 9459 },
+  { 0, 0, 4896, -13377, 18274, -18274, 13377, -4896 },
+};
+
+// LGT8 name: lgt8_150_000w1
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_150_000w1[8][8] = {
+  { 32768, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 12385, 12385, 12385, 12385, 12385, 12385, 12385 },
+  { 0, 17076, 13694, 7600, 0, -7600, -13694, -17076 },
+  { 0, 15781, 3898, -10921, -17515, -10921, 3898, 15781 },
+  { 0, 13694, -7600, -17076, 0, 17076, 7600, -13694 },
+  { 0, 10921, -15781, -3898, 17515, -3898, -15781, 10921 },
+  { 0, 7600, -17076, 13694, 0, -13694, 17076, -7600 },
+  { 0, 3898, -10921, 15781, -17515, 15781, -10921, 3898 },
+};
+
+// LGT8 name: lgt8_100_000w1
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_100_000w1[8][8] = {
+  { 32768, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 12385, 12385, 12385, 12385, 12385, 12385, 12385 },
+  { 0, 17076, 13694, 7600, 0, -7600, -13694, -17076 },
+  { 0, 15781, 3898, -10921, -17515, -10921, 3898, 15781 },
+  { 0, 13694, -7600, -17076, 0, 17076, 7600, -13694 },
+  { 0, 10921, -15781, -3898, 17515, -3898, -15781, 10921 },
+  { 0, 7600, -17076, 13694, 0, -13694, 17076, -7600 },
+  { 0, 3898, -10921, 15781, -17515, 15781, -10921, 3898 },
+};
+
+// LGT8 name: lgt8_060_000w1
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_060_000w1[8][8] = {
+  { 32768, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 12385, 12385, 12385, 12385, 12385, 12385, 12385 },
+  { 0, 17076, 13694, 7600, 0, -7600, -13694, -17076 },
+  { 0, 15781, 3898, -10921, -17515, -10921, 3898, 15781 },
+  { 0, 13694, -7600, -17076, 0, 17076, 7600, -13694 },
+  { 0, 10921, -15781, -3898, 17515, -3898, -15781, 10921 },
+  { 0, 7600, -17076, 13694, 0, -13694, 17076, -7600 },
+  { 0, 3898, -10921, 15781, -17515, 15781, -10921, 3898 },
+};
+
+// LGT8 name: lgt8_000w1
+// Self loops: 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 0.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_000w1[8][8] = {
+  { 32768, 0, 0, 0, 0, 0, 0, 0 },
+  { 0, 12385, 12385, 12385, 12385, 12385, 12385, 12385 },
+  { 0, 17076, 13694, 7600, 0, -7600, -13694, -17076 },
+  { 0, 15781, 3898, -10921, -17515, -10921, 3898, 15781 },
+  { 0, 13694, -7600, -17076, 0, 17076, 7600, -13694 },
+  { 0, 10921, -15781, -3898, 17515, -3898, -15781, 10921 },
+  { 0, 7600, -17076, 13694, 0, -13694, 17076, -7600 },
+  { 0, 3898, -10921, 15781, -17515, 15781, -10921, 3898 },
+};
+
+// LGT8 name: lgt8_060
+// Self loops: 0.600, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_060[8][8] = {
+  { 4295, 6746, 8999, 10987, 12653, 13947, 14832, 15280 },
+  { 11303, 15101, 14912, 10786, 3812, -4168, -11047, -15010 },
+  { 15051, 13208, 1823, -10879, -15721, -9207, 3959, 14265 },
+  { 15871, 3800, -13441, -12395, 5516, 15922, 4665, -12939 },
+  { 14630, -7269, -13926, 8618, 13091, -9886, -12133, 11062 },
+  { 12008, -14735, 180, 14586, -12245, -4458, 15932, -8720 },
+  { 8472, -15623, 14088, -4721, -7272, 15221, -14708, 6018 },
+  { 4372, -9862, 13927, -15981, 15727, -13202, 8770, -3071 },
+};
+
+// LGT8 name: lgt8_100
+// Self loops: 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_100[8][8] = {
+  { 2921, 5742, 8368, 10708, 12684, 14228, 15288, 15827 },
+  { 8368, 14228, 15827, 12684, 5742, -2921, -10708, -15288 },
+  { 12684, 15288, 5742, -8368, -15827, -10708, 2921, 14228 },
+  { 15288, 8368, -10708, -14228, 2921, 15827, 5742, -12684 },
+  { 15827, -2921, -15288, 5742, 14228, -8368, -12684, 10708 },
+  { 14228, -12684, -2921, 15288, -10708, -5742, 15827, -8368 },
+  { 10708, -15827, 12684, -2921, -8368, 15288, -14228, 5742 },
+  { 5742, -10708, 14228, -15827, 15288, -12684, 8368, -2921 },
 };
+#endif  // CONFIG_LGT_FROM_PRED
 
+#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
 // LGT4 name: lgt4_170
 // Self loops: 1.700, 0.000, 0.000, 0.000
 // Edges: 1.000, 1.000, 1.000
@@ -118,18 +707,14 @@ static const tran_high_t lgt4_170[4][4] = {
   { 14138, -14420, 10663, -3920 },
 };
 
-// LGT8 name: lgt8_150
-// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
-// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
-static const tran_high_t lgt8_150[8][8] = {
-  { 2075, 5110, 7958, 10511, 12677, 14376, 15544, 16140 },
-  { 6114, 13307, 16196, 13845, 7015, -2084, -10509, -15534 },
-  { 9816, 16163, 8717, -6168, -15790, -11936, 2104, 14348 },
-  { 12928, 12326, -7340, -15653, 242, 15763, 6905, -12632 },
-  { 15124, 3038, -16033, 1758, 15507, -6397, -13593, 10463 },
-  { 15895, -7947, -7947, 15895, -7947, -7947, 15895, -7947 },
-  { 14325, -15057, 9030, 1050, -10659, 15483, -13358, 5236 },
-  { 9054, -12580, 14714, -15220, 14043, -11312, 7330, -2537 },
+// LGT4 name: lgt4_140
+// Self loops: 1.400, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000
+static const tran_high_t lgt4_140[4][4] = {
+  { 4206, 9518, 13524, 15674 },
+  { 11552, 14833, 1560, -13453 },
+  { 15391, -1906, -14393, 9445 },
+  { 12201, -14921, 12016, -4581 },
 };
 
 // LGT8 name: lgt8_170
@@ -145,5 +730,19 @@ static const tran_high_t lgt8_170[8][8] = {
   { 15533, -13869, 6559, 3421, -12009, 15707, -13011, 5018 },
   { 11357, -13726, 14841, -14600, 13025, -10259, 6556, -2254 },
 };
-#endif  // CONFIG_LGT
+
+// LGT8 name: lgt8_150
+// Self loops: 1.500, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000
+// Edges: 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000
+static const tran_high_t lgt8_150[8][8] = {
+  { 2075, 5110, 7958, 10511, 12677, 14376, 15544, 16140 },
+  { 6114, 13307, 16196, 13845, 7015, -2084, -10509, -15534 },
+  { 9816, 16163, 8717, -6168, -15790, -11936, 2104, 14348 },
+  { 12928, 12326, -7340, -15653, 242, 15763, 6905, -12632 },
+  { 15124, 3038, -16033, 1758, 15507, -6397, -13593, 10463 },
+  { 15895, -7947, -7947, 15895, -7947, -7947, 15895, -7947 },
+  { 14325, -15057, 9030, 1050, -10659, 15483, -13358, 5236 },
+  { 9054, -12580, 14714, -15220, 14043, -11312, 7330, -2537 },
+};
+#endif  // CONFIG_LGT || CONFIG_LGT_FROM_PRED
 #endif  // AOM_DSP_TXFM_COMMON_H_
diff --git a/third_party/aom/aom_dsp/variance.c b/third_party/aom/aom_dsp/variance.c
index a4c3616e7..3c99aa155 100644
--- a/third_party/aom/aom_dsp/variance.c
+++ b/third_party/aom/aom_dsp/variance.c
@@ -256,7 +256,13 @@ VARIANCES(4, 16)
 VARIANCES(16, 4)
 VARIANCES(8, 32)
 VARIANCES(32, 8)
-#endif
+VARIANCES(16, 64)
+VARIANCES(64, 16)
+#if CONFIG_EXT_PARTITION
+VARIANCES(32, 128)
+VARIANCES(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
 
 GET_VAR(16, 16)
 GET_VAR(8, 8)
@@ -661,7 +667,13 @@ HIGHBD_VARIANCES(4, 16)
 HIGHBD_VARIANCES(16, 4)
 HIGHBD_VARIANCES(8, 32)
 HIGHBD_VARIANCES(32, 8)
-#endif
+HIGHBD_VARIANCES(16, 64)
+HIGHBD_VARIANCES(64, 16)
+#if CONFIG_EXT_PARTITION
+HIGHBD_VARIANCES(32, 128)
+HIGHBD_VARIANCES(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
 
 HIGHBD_GET_VAR(8)
 HIGHBD_GET_VAR(16)
@@ -761,7 +773,7 @@ void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if CONFIG_AV1 && CONFIG_EXT_INTER
+#if CONFIG_AV1
 void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
                           int height, const uint8_t *ref, int ref_stride,
                           const uint8_t *mask, int mask_stride,
@@ -848,7 +860,13 @@ MASK_SUBPIX_VAR(4, 16)
 MASK_SUBPIX_VAR(16, 4)
 MASK_SUBPIX_VAR(8, 32)
 MASK_SUBPIX_VAR(32, 8)
-#endif
+MASK_SUBPIX_VAR(16, 64)
+MASK_SUBPIX_VAR(64, 16)
+#if CONFIG_EXT_PARTITION
+MASK_SUBPIX_VAR(32, 128)
+MASK_SUBPIX_VAR(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_HIGHBITDEPTH
 void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
@@ -985,9 +1003,15 @@ HIGHBD_MASK_SUBPIX_VAR(4, 16)
 HIGHBD_MASK_SUBPIX_VAR(16, 4)
 HIGHBD_MASK_SUBPIX_VAR(8, 32)
 HIGHBD_MASK_SUBPIX_VAR(32, 8)
-#endif
+HIGHBD_MASK_SUBPIX_VAR(16, 64)
+HIGHBD_MASK_SUBPIX_VAR(64, 16)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASK_SUBPIX_VAR(32, 128)
+HIGHBD_MASK_SUBPIX_VAR(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
+#endif  // CONFIG_AV1
 
 #if CONFIG_AV1 && CONFIG_MOTION_VAR
 static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
@@ -1094,7 +1118,17 @@ OBMC_VAR(8, 32)
 OBMC_SUBPIX_VAR(8, 32)
 OBMC_VAR(32, 8)
 OBMC_SUBPIX_VAR(32, 8)
-#endif
+OBMC_VAR(16, 64)
+OBMC_SUBPIX_VAR(16, 64)
+OBMC_VAR(64, 16)
+OBMC_SUBPIX_VAR(64, 16)
+#if CONFIG_EXT_PARTITION
+OBMC_VAR(32, 128)
+OBMC_SUBPIX_VAR(32, 128)
+OBMC_VAR(128, 32)
+OBMC_SUBPIX_VAR(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_HIGHBITDEPTH
 static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
@@ -1287,6 +1321,16 @@ HIGHBD_OBMC_VAR(8, 32)
 HIGHBD_OBMC_SUBPIX_VAR(8, 32)
 HIGHBD_OBMC_VAR(32, 8)
 HIGHBD_OBMC_SUBPIX_VAR(32, 8)
-#endif
+HIGHBD_OBMC_VAR(16, 64)
+HIGHBD_OBMC_SUBPIX_VAR(16, 64)
+HIGHBD_OBMC_VAR(64, 16)
+HIGHBD_OBMC_SUBPIX_VAR(64, 16)
+#if CONFIG_EXT_PARTITION
+HIGHBD_OBMC_VAR(32, 128)
+HIGHBD_OBMC_SUBPIX_VAR(32, 128)
+HIGHBD_OBMC_VAR(128, 32)
+HIGHBD_OBMC_SUBPIX_VAR(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_AV1 && CONFIG_MOTION_VAR
diff --git a/third_party/aom/aom_dsp/variance.h b/third_party/aom/aom_dsp/variance.h
index 20f0895cb..a193df467 100644
--- a/third_party/aom/aom_dsp/variance.h
+++ b/third_party/aom/aom_dsp/variance.h
@@ -54,7 +54,7 @@ typedef unsigned int (*aom_subp_avg_variance_fn_t)(
     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
     int b_stride, unsigned int *sse, const uint8_t *second_pred);
 
-#if CONFIG_AV1 && CONFIG_EXT_INTER
+#if CONFIG_AV1
 typedef unsigned int (*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
                                             const uint8_t *ref, int ref_stride,
                                             const uint8_t *second_pred,
@@ -64,7 +64,7 @@ typedef unsigned int (*aom_masked_subpixvariance_fn_t)(
     const uint8_t *src, int src_stride, int xoffset, int yoffset,
     const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
     const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
-#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
+#endif  // CONFIG_AV1
 
 #if CONFIG_AV1 && CONFIG_MOTION_VAR
 typedef unsigned int (*aom_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride,
@@ -90,10 +90,8 @@ typedef struct aom_variance_vtable {
   aom_sad_multi_fn_t sdx3f;
   aom_sad_multi_fn_t sdx8f;
   aom_sad_multi_d_fn_t sdx4df;
-#if CONFIG_EXT_INTER
   aom_masked_sad_fn_t msdf;
   aom_masked_subpixvariance_fn_t msvf;
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR
   aom_obmc_sad_fn_t osdf;
   aom_obmc_variance_fn_t ovf;
diff --git a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
index 357f37401..8688fb544 100644
--- a/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
+++ b/third_party/aom/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
@@ -346,9 +346,15 @@ cglobal filter_block1d16_%1, 6, 6, 14, LOCAL_VARS_SIZE, \
     psraw         m0, 7
     psraw         m4, 7
 %ifidn %1, h8_add_src
+%if ARCH_X86=1 && CONFIG_PIC=1
+    pcmpeqb       m2, m2                  ;all ones
+    psrlw         m2, 8                   ;even_byte_mask
+%else
+    mova          m2, [GLOBAL(even_byte_mask)]
+%endif
     movu          m5, [srcq]
     mova          m7, m5
-    pand          m5, [even_byte_mask]
+    pand          m5, m2
     psrlw         m7, 8
     paddsw        m0, m5
     paddsw        m4, m7
diff --git a/third_party/aom/aom_dsp/x86/common_avx2.h b/third_party/aom/aom_dsp/x86/common_avx2.h
new file mode 100644
index 000000000..5f9596a74
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/common_avx2.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_X86_COMMON_AVX2_H
+#define AOM_DSP_X86_COMMON_AVX2_H
+
+#include <immintrin.h>
+
+#include "./aom_config.h"
+
+// Note: in and out could have the same value
+static INLINE void mm256_transpose_16x16(const __m256i *in, __m256i *out) {
+  __m256i tr0_0 = _mm256_unpacklo_epi16(in[0], in[1]);
+  __m256i tr0_1 = _mm256_unpackhi_epi16(in[0], in[1]);
+  __m256i tr0_2 = _mm256_unpacklo_epi16(in[2], in[3]);
+  __m256i tr0_3 = _mm256_unpackhi_epi16(in[2], in[3]);
+  __m256i tr0_4 = _mm256_unpacklo_epi16(in[4], in[5]);
+  __m256i tr0_5 = _mm256_unpackhi_epi16(in[4], in[5]);
+  __m256i tr0_6 = _mm256_unpacklo_epi16(in[6], in[7]);
+  __m256i tr0_7 = _mm256_unpackhi_epi16(in[6], in[7]);
+
+  __m256i tr0_8 = _mm256_unpacklo_epi16(in[8], in[9]);
+  __m256i tr0_9 = _mm256_unpackhi_epi16(in[8], in[9]);
+  __m256i tr0_a = _mm256_unpacklo_epi16(in[10], in[11]);
+  __m256i tr0_b = _mm256_unpackhi_epi16(in[10], in[11]);
+  __m256i tr0_c = _mm256_unpacklo_epi16(in[12], in[13]);
+  __m256i tr0_d = _mm256_unpackhi_epi16(in[12], in[13]);
+  __m256i tr0_e = _mm256_unpacklo_epi16(in[14], in[15]);
+  __m256i tr0_f = _mm256_unpackhi_epi16(in[14], in[15]);
+
+  // 00 10 01 11 02 12 03 13  08 18 09 19 0a 1a 0b 1b
+  // 04 14 05 15 06 16 07 17  0c 1c 0d 1d 0e 1e 0f 1f
+  // 20 30 21 31 22 32 23 33  28 38 29 39 2a 3a 2b 3b
+  // 24 34 25 35 26 36 27 37  2c 3c 2d 3d 2e 3e 2f 3f
+  // 40 50 41 51 42 52 43 53  48 58 49 59 4a 5a 4b 5b
+  // 44 54 45 55 46 56 47 57  4c 5c 4d 5d 4e 5e 4f 5f
+  // 60 70 61 71 62 72 63 73  68 78 69 79 6a 7a 6b 7b
+  // 64 74 65 75 66 76 67 77  6c 7c 6d 7d 6e 7e 6f 7f
+
+  // 80 90 81 91 82 92 83 93  88 98 89 99 8a 9a 8b 9b
+  // 84 94 85 95 86 96 87 97  8c 9c 8d 9d 8e 9e 8f 9f
+  // a0 b0 a1 b1 a2 b2 a3 b3  a8 b8 a9 b9 aa ba ab bb
+  // a4 b4 a5 b5 a6 b6 a7 b7  ac bc ad bd ae be af bf
+  // c0 d0 c1 d1 c2 d2 c3 d3  c8 d8 c9 d9 ca da cb db
+  // c4 d4 c5 d5 c6 d6 c7 d7  cc dc cd dd ce de cf df
+  // e0 f0 e1 f1 e2 f2 e3 f3  e8 f8 e9 f9 ea fa eb fb
+  // e4 f4 e5 f5 e6 f6 e7 f7  ec fc ed fd ee fe ef ff
+
+  __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_2);
+  __m256i tr1_1 = _mm256_unpackhi_epi32(tr0_0, tr0_2);
+  __m256i tr1_2 = _mm256_unpacklo_epi32(tr0_1, tr0_3);
+  __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_1, tr0_3);
+  __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_4, tr0_6);
+  __m256i tr1_5 = _mm256_unpackhi_epi32(tr0_4, tr0_6);
+  __m256i tr1_6 = _mm256_unpacklo_epi32(tr0_5, tr0_7);
+  __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_5, tr0_7);
+
+  __m256i tr1_8 = _mm256_unpacklo_epi32(tr0_8, tr0_a);
+  __m256i tr1_9 = _mm256_unpackhi_epi32(tr0_8, tr0_a);
+  __m256i tr1_a = _mm256_unpacklo_epi32(tr0_9, tr0_b);
+  __m256i tr1_b = _mm256_unpackhi_epi32(tr0_9, tr0_b);
+  __m256i tr1_c = _mm256_unpacklo_epi32(tr0_c, tr0_e);
+  __m256i tr1_d = _mm256_unpackhi_epi32(tr0_c, tr0_e);
+  __m256i tr1_e = _mm256_unpacklo_epi32(tr0_d, tr0_f);
+  __m256i tr1_f = _mm256_unpackhi_epi32(tr0_d, tr0_f);
+
+  // 00 10 20 30 01 11 21 31  08 18 28 38 09 19 29 39
+  // 02 12 22 32 03 13 23 33  0a 1a 2a 3a 0b 1b 2b 3b
+  // 04 14 24 34 05 15 25 35  0c 1c 2c 3c 0d 1d 2d 3d
+  // 06 16 26 36 07 17 27 37  0e 1e 2e 3e 0f 1f 2f 3f
+  // 40 50 60 70 41 51 61 71  48 58 68 78 49 59 69 79
+  // 42 52 62 72 43 53 63 73  4a 5a 6a 7a 4b 5b 6b 7b
+  // 44 54 64 74 45 55 65 75  4c 5c 6c 7c 4d 5d 6d 7d
+  // 46 56 66 76 47 57 67 77  4e 5e 6e 7e 4f 5f 6f 7f
+
+  // 80 90 a0 b0 81 91 a1 b1  88 98 a8 b8 89 99 a9 b9
+  // 82 92 a2 b2 83 93 a3 b3  8a 9a aa ba 8b 9b ab bb
+  // 84 94 a4 b4 85 95 a5 b5  8c 9c ac bc 8d 9d ad bd
+  // 86 96 a6 b6 87 97 a7 b7  8e ae 9e be 8f 9f af bf
+  // c0 d0 e0 f0 c1 d1 e1 f1  c8 d8 e8 f8 c9 d9 e9 f9
+  // c2 d2 e2 f2 c3 d3 e3 f3  ca da ea fa cb db eb fb
+  // c4 d4 e4 f4 c5 d5 e5 f5  cc dc ef fc cd dd ed fd
+  // c6 d6 e6 f6 c7 d7 e7 f7  ce de ee fe cf df ef ff
+
+  tr0_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4);
+  tr0_1 = _mm256_unpackhi_epi64(tr1_0, tr1_4);
+  tr0_2 = _mm256_unpacklo_epi64(tr1_1, tr1_5);
+  tr0_3 = _mm256_unpackhi_epi64(tr1_1, tr1_5);
+  tr0_4 = _mm256_unpacklo_epi64(tr1_2, tr1_6);
+  tr0_5 = _mm256_unpackhi_epi64(tr1_2, tr1_6);
+  tr0_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7);
+  tr0_7 = _mm256_unpackhi_epi64(tr1_3, tr1_7);
+
+  tr0_8 = _mm256_unpacklo_epi64(tr1_8, tr1_c);
+  tr0_9 = _mm256_unpackhi_epi64(tr1_8, tr1_c);
+  tr0_a = _mm256_unpacklo_epi64(tr1_9, tr1_d);
+  tr0_b = _mm256_unpackhi_epi64(tr1_9, tr1_d);
+  tr0_c = _mm256_unpacklo_epi64(tr1_a, tr1_e);
+  tr0_d = _mm256_unpackhi_epi64(tr1_a, tr1_e);
+  tr0_e = _mm256_unpacklo_epi64(tr1_b, tr1_f);
+  tr0_f = _mm256_unpackhi_epi64(tr1_b, tr1_f);
+
+  // 00 10 20 30 40 50 60 70  08 18 28 38 48 58 68 78
+  // 01 11 21 31 41 51 61 71  09 19 29 39 49 59 69 79
+  // 02 12 22 32 42 52 62 72  0a 1a 2a 3a 4a 5a 6a 7a
+  // 03 13 23 33 43 53 63 73  0b 1b 2b 3b 4b 5b 6b 7b
+  // 04 14 24 34 44 54 64 74  0c 1c 2c 3c 4c 5c 6c 7c
+  // 05 15 25 35 45 55 65 75  0d 1d 2d 3d 4d 5d 6d 7d
+  // 06 16 26 36 46 56 66 76  0e 1e 2e 3e 4e 5e 6e 7e
+  // 07 17 27 37 47 57 67 77  0f 1f 2f 3f 4f 5f 6f 7f
+
+  // 80 90 a0 b0 c0 d0 e0 f0  88 98 a8 b8 c8 d8 e8 f8
+  // 81 91 a1 b1 c1 d1 e1 f1  89 99 a9 b9 c9 d9 e9 f9
+  // 82 92 a2 b2 c2 d2 e2 f2  8a 9a aa ba ca da ea fa
+  // 83 93 a3 b3 c3 d3 e3 f3  8b 9b ab bb cb db eb fb
+  // 84 94 a4 b4 c4 d4 e4 f4  8c 9c ac bc cc dc ef fc
+  // 85 95 a5 b5 c5 d5 e5 f5  8d 9d ad bd cd dd ed fd
+  // 86 96 a6 b6 c6 d6 e6 f6  8e ae 9e be ce de ee fe
+  // 87 97 a7 b7 c7 d7 e7 f7  8f 9f af bf cf df ef ff
+
+  out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20);  // 0010 0000
+  out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31);  // 0011 0001
+  out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20);
+  out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31);
+  out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20);
+  out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31);
+  out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20);
+  out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31);
+
+  out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20);
+  out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31);
+  out[5] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x20);
+  out[13] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x31);
+  out[6] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x20);
+  out[14] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x31);
+  out[7] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x20);
+  out[15] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x31);
+}
+#endif
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_avx2.h b/third_party/aom/aom_dsp/x86/fwd_txfm_avx2.h
index d3aceae00..86df4a6f6 100644
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_avx2.h
+++ b/third_party/aom/aom_dsp/x86/fwd_txfm_avx2.h
@@ -15,21 +15,21 @@
 #include "./aom_config.h"
 
 static INLINE void storeu_output_avx2(const __m256i *coeff, tran_low_t *out) {
-#if CONFIG_HIGHBITDEPTH
-  const __m256i zero = _mm256_setzero_si256();
-  const __m256i sign = _mm256_cmpgt_epi16(zero, *coeff);
+  if (sizeof(tran_low_t) == 4) {
+    const __m256i zero = _mm256_setzero_si256();
+    const __m256i sign = _mm256_cmpgt_epi16(zero, *coeff);
 
-  __m256i x0 = _mm256_unpacklo_epi16(*coeff, sign);
-  __m256i x1 = _mm256_unpackhi_epi16(*coeff, sign);
+    __m256i x0 = _mm256_unpacklo_epi16(*coeff, sign);
+    __m256i x1 = _mm256_unpackhi_epi16(*coeff, sign);
 
-  __m256i y0 = _mm256_permute2x128_si256(x0, x1, 0x20);
-  __m256i y1 = _mm256_permute2x128_si256(x0, x1, 0x31);
+    __m256i y0 = _mm256_permute2x128_si256(x0, x1, 0x20);
+    __m256i y1 = _mm256_permute2x128_si256(x0, x1, 0x31);
 
-  _mm256_storeu_si256((__m256i *)out, y0);
-  _mm256_storeu_si256((__m256i *)(out + 8), y1);
-#else
-  _mm256_storeu_si256((__m256i *)out, *coeff);
-#endif
+    _mm256_storeu_si256((__m256i *)out, y0);
+    _mm256_storeu_si256((__m256i *)(out + 8), y1);
+  } else {
+    _mm256_storeu_si256((__m256i *)out, *coeff);
+  }
 }
 
 #endif  // AOM_DSP_X86_FWD_TXFM_AVX2_H
diff --git a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h b/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h
index 26b2db2e0..58e8971dd 100644
--- a/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h
+++ b/third_party/aom/aom_dsp/x86/fwd_txfm_sse2.h
@@ -247,16 +247,16 @@ static INLINE int k_check_epi32_overflow_32(
 }
 
 static INLINE void store_output(const __m128i *poutput, tran_low_t *dst_ptr) {
-#if CONFIG_HIGHBITDEPTH
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
-  __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
-  __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
-  _mm_store_si128((__m128i *)(dst_ptr), out0);
-  _mm_store_si128((__m128i *)(dst_ptr + 4), out1);
-#else
-  _mm_store_si128((__m128i *)(dst_ptr), *poutput);
-#endif  // CONFIG_HIGHBITDEPTH
+  if (sizeof(tran_low_t) == 4) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
+    __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
+    __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
+    _mm_store_si128((__m128i *)(dst_ptr), out0);
+    _mm_store_si128((__m128i *)(dst_ptr + 4), out1);
+  } else {
+    _mm_store_si128((__m128i *)(dst_ptr), *poutput);
+  }
 }
 
 static INLINE __m128i mult_round_shift(const __m128i *pin0, const __m128i *pin1,
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
new file mode 100644
index 000000000..41b55c985
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_avx2.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+
+// -----------------------------------------------------------------------------
+// D45E_PRED
+/*
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+*/
+static INLINE __m256i avg3_epu16(const __m256i *x, const __m256i *y,
+                                 const __m256i *z) {
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i a = _mm256_avg_epu16(*x, *z);
+  const __m256i b =
+      _mm256_subs_epu16(a, _mm256_and_si256(_mm256_xor_si256(*x, *z), one));
+  return _mm256_avg_epu16(b, *y);
+}
+
+static INLINE void d45e_w16(const __m256i *a0, const __m256i *a1,
+                            const __m256i *a2, uint16_t **dst,
+                            ptrdiff_t stride) {
+  const __m256i y = avg3_epu16(a0, a1, a2);
+  _mm256_storeu_si256((__m256i *)*dst, y);
+  *dst += stride;
+}
+
+void aom_highbd_d45e_predictor_16x8_avx2(uint16_t *dst, ptrdiff_t stride,
+                                         const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m256i x0 = _mm256_loadu_si256((const __m256i *)above);
+  __m256i x1 = _mm256_loadu_si256((const __m256i *)(above + 1));
+  __m256i x2 = _mm256_loadu_si256((const __m256i *)(above + 2));
+
+  d45e_w16(&x0, &x1, &x2, &dst, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x1, &x2, &x0, &dst, stride);
+
+    x1 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x2, &x0, &x1, &dst, stride);
+
+    x2 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x0, &x1, &x2, &dst, stride);
+  } while (i < 9);
+
+  x0 = _mm256_loadu_si256((const __m256i *)(above + 9));
+  x0 = _mm256_insert_epi16(x0, above[23], 15);
+  const __m256i y = avg3_epu16(&x1, &x2, &x0);
+  _mm256_storeu_si256((__m256i *)dst, y);
+}
+
+void aom_highbd_d45e_predictor_16x16_avx2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m256i x0 = _mm256_loadu_si256((const __m256i *)above);
+  __m256i x1 = _mm256_loadu_si256((const __m256i *)(above + 1));
+  __m256i x2 = _mm256_loadu_si256((const __m256i *)(above + 2));
+
+  d45e_w16(&x0, &x1, &x2, &dst, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x1, &x2, &x0, &dst, stride);
+
+    x1 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x2, &x0, &x1, &dst, stride);
+
+    x2 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x0, &x1, &x2, &dst, stride);
+  } while (i < 15);
+
+  x0 = _mm256_loadu_si256((const __m256i *)(above + 15));
+  d45e_w16(&x1, &x2, &x0, &dst, stride);
+
+  x1 = _mm256_loadu_si256((const __m256i *)(above + 16));
+  d45e_w16(&x2, &x0, &x1, &dst, stride);
+
+  x2 = _mm256_loadu_si256((const __m256i *)(above + 17));
+  x2 = _mm256_insert_epi16(x2, above[31], 15);
+  const __m256i y = avg3_epu16(&x0, &x1, &x2);
+  _mm256_storeu_si256((__m256i *)dst, y);
+}
+
+void aom_highbd_d45e_predictor_16x32_avx2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m256i x0 = _mm256_loadu_si256((const __m256i *)above);
+  __m256i x1 = _mm256_loadu_si256((const __m256i *)(above + 1));
+  __m256i x2 = _mm256_loadu_si256((const __m256i *)(above + 2));
+
+  d45e_w16(&x0, &x1, &x2, &dst, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x1, &x2, &x0, &dst, stride);
+
+    x1 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x2, &x0, &x1, &dst, stride);
+
+    x2 = _mm256_loadu_si256((const __m256i *)(above + i++));
+    d45e_w16(&x0, &x1, &x2, &dst, stride);
+  } while (i < 33);
+
+  x0 = _mm256_loadu_si256((const __m256i *)(above + 33));
+  x0 = _mm256_insert_epi16(x0, above[47], 15);
+  const __m256i y = avg3_epu16(&x1, &x2, &x0);
+  _mm256_storeu_si256((__m256i *)dst, y);
+}
+
+void aom_highbd_d45e_predictor_32x16_avx2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m256i x0 = _mm256_loadu_si256((const __m256i *)above);
+  __m256i x1 = _mm256_loadu_si256((const __m256i *)(above + 1));
+  __m256i x2 = _mm256_loadu_si256((const __m256i *)(above + 2));
+  __m256i y0 = _mm256_loadu_si256((const __m256i *)(above + 16));
+  __m256i y1 = _mm256_loadu_si256((const __m256i *)(above + 17));
+  __m256i y2 = _mm256_loadu_si256((const __m256i *)(above + 18));
+
+  uint16_t *dst1 = dst;
+  uint16_t *dst2 = dst + 16;
+
+  d45e_w16(&x0, &x1, &x2, &dst1, stride);
+  d45e_w16(&y0, &y1, &y2, &dst2, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x1, &x2, &x0, &dst1, stride);
+    y0 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y1, &y2, &y0, &dst2, stride);
+
+    x1 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x2, &x0, &x1, &dst1, stride);
+    y1 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y2, &y0, &y1, &dst2, stride);
+
+    x2 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x0, &x1, &x2, &dst1, stride);
+    y2 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y0, &y1, &y2, &dst2, stride);
+  } while (i < 15);
+
+  x0 = _mm256_loadu_si256((const __m256i *)(above + 15));
+  d45e_w16(&x1, &x2, &x0, &dst1, stride);
+  y0 = _mm256_loadu_si256((const __m256i *)(above + 16 + 15));
+  d45e_w16(&y1, &y2, &y0, &dst2, stride);
+
+  x1 = _mm256_loadu_si256((const __m256i *)(above + 16));
+  d45e_w16(&x2, &x0, &x1, &dst1, stride);
+  y1 = _mm256_loadu_si256((const __m256i *)(above + 16 + 16));
+  d45e_w16(&y2, &y0, &y1, &dst2, stride);
+
+  x2 = _mm256_loadu_si256((const __m256i *)(above + 17));
+  __m256i u = avg3_epu16(&x0, &x1, &x2);
+  _mm256_storeu_si256((__m256i *)dst1, u);
+
+  y2 = _mm256_loadu_si256((const __m256i *)(above + 16 + 17));
+  y2 = _mm256_insert_epi16(y2, above[47], 15);
+  u = avg3_epu16(&y0, &y1, &y2);
+  _mm256_storeu_si256((__m256i *)dst2, u);
+}
+
+void aom_highbd_d45e_predictor_32x32_avx2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m256i x0 = _mm256_loadu_si256((const __m256i *)above);
+  __m256i x1 = _mm256_loadu_si256((const __m256i *)(above + 1));
+  __m256i x2 = _mm256_loadu_si256((const __m256i *)(above + 2));
+  __m256i y0 = _mm256_loadu_si256((const __m256i *)(above + 16));
+  __m256i y1 = _mm256_loadu_si256((const __m256i *)(above + 17));
+  __m256i y2 = _mm256_loadu_si256((const __m256i *)(above + 18));
+
+  uint16_t *dst1 = dst;
+  uint16_t *dst2 = dst + 16;
+
+  d45e_w16(&x0, &x1, &x2, &dst1, stride);
+  d45e_w16(&y0, &y1, &y2, &dst2, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x1, &x2, &x0, &dst1, stride);
+    y0 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y1, &y2, &y0, &dst2, stride);
+
+    x1 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x2, &x0, &x1, &dst1, stride);
+    y1 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y2, &y0, &y1, &dst2, stride);
+
+    x2 = _mm256_loadu_si256((const __m256i *)(above + i));
+    d45e_w16(&x0, &x1, &x2, &dst1, stride);
+    y2 = _mm256_loadu_si256((const __m256i *)(above + 16 + i++));
+    d45e_w16(&y0, &y1, &y2, &dst2, stride);
+  } while (i < 33);
+
+  x0 = _mm256_loadu_si256((const __m256i *)(above + 33));
+  __m256i u = avg3_epu16(&x1, &x2, &x0);
+  _mm256_storeu_si256((__m256i *)dst1, u);
+
+  y0 = _mm256_loadu_si256((const __m256i *)(above + 16 + 33));
+  y0 = _mm256_insert_epi16(y0, above[63], 15);
+  u = avg3_epu16(&y1, &y2, &y0);
+  _mm256_storeu_si256((__m256i *)dst2, u);
+}
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.asm
index 5d84ef8a7..91b3d126c 100644
--- a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.asm
@@ -257,200 +257,3 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
   dec             nlines4d
   jnz .loop
   REP_RET
-
-INIT_XMM sse2
-cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
-  movd                  m1, [aboveq-2]
-  movq                  m0, [aboveq]
-  pshuflw               m1, m1, 0x0
-  movlhps               m0, m0         ; t1 t2 t3 t4 t1 t2 t3 t4
-  movlhps               m1, m1         ; tl tl tl tl tl tl tl tl
-  ; Get the values to compute the maximum value at this bit depth
-  pcmpeqw               m3, m3
-  movd                  m4, bpsd
-  psubw                 m0, m1         ; t1-tl t2-tl t3-tl t4-tl
-  psllw                 m3, m4
-  pcmpeqw               m2, m2
-  pxor                  m4, m4         ; min possible value
-  pxor                  m3, m2         ; max possible value
-  mova                  m1, [leftq]
-  pshuflw               m2, m1, 0x0
-  pshuflw               m5, m1, 0x55
-  movlhps               m2, m5         ; l1 l1 l1 l1 l2 l2 l2 l2
-  paddw                 m2, m0
-  ;Clamp to the bit-depth
-  pminsw                m2, m3
-  pmaxsw                m2, m4
-  ;Store the values
-  movq    [dstq          ], m2
-  movhpd  [dstq+strideq*2], m2
-  lea                 dstq, [dstq+strideq*4]
-  pshuflw               m2, m1, 0xaa
-  pshuflw               m5, m1, 0xff
-  movlhps               m2, m5
-  paddw                 m2, m0
-  ;Clamp to the bit-depth
-  pminsw                m2, m3
-  pmaxsw                m2, m4
-  ;Store the values
-  movq    [dstq          ], m2
-  movhpd  [dstq+strideq*2], m2
-  RET
-
-INIT_XMM sse2
-cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
-  movd                  m1, [aboveq-2]
-  mova                  m0, [aboveq]
-  pshuflw               m1, m1, 0x0
-  ; Get the values to compute the maximum value at this bit depth
-  mov                 oned, 1
-  pxor                  m3, m3
-  pxor                  m4, m4
-  pinsrw                m3, oned, 0
-  pinsrw                m4, bpsd, 0
-  pshuflw               m3, m3, 0x0
-  DEFINE_ARGS dst, stride, line, left
-  punpcklqdq            m3, m3
-  mov                lineq, -4
-  mova                  m2, m3
-  punpcklqdq            m1, m1
-  psllw                 m3, m4
-  add                leftq, 16
-  psubw                 m3, m2 ; max possible value
-  pxor                  m4, m4 ; min possible value
-  psubw                 m0, m1
-.loop:
-  movd                  m1, [leftq+lineq*4]
-  movd                  m2, [leftq+lineq*4+2]
-  pshuflw               m1, m1, 0x0
-  pshuflw               m2, m2, 0x0
-  punpcklqdq            m1, m1
-  punpcklqdq            m2, m2
-  paddw                 m1, m0
-  paddw                 m2, m0
-  ;Clamp to the bit-depth
-  pminsw                m1, m3
-  pminsw                m2, m3
-  pmaxsw                m1, m4
-  pmaxsw                m2, m4
-  ;Store the values
-  mova      [dstq          ], m1
-  mova      [dstq+strideq*2], m2
-  lea                 dstq, [dstq+strideq*4]
-  inc                lineq
-  jnz .loop
-  REP_RET
-
-INIT_XMM sse2
-cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
-  movd                  m2, [aboveq-2]
-  mova                  m0, [aboveq]
-  mova                  m1, [aboveq+16]
-  pshuflw               m2, m2, 0x0
-  ; Get the values to compute the maximum value at this bit depth
-  pcmpeqw               m3, m3
-  movd                  m4, bpsd
-  punpcklqdq            m2, m2
-  psllw                 m3, m4
-  pcmpeqw               m5, m5
-  pxor                  m4, m4         ; min possible value
-  pxor                  m3, m5         ; max possible value
-  DEFINE_ARGS dst, stride, line, left
-  mov                lineq, -8
-  psubw                 m0, m2
-  psubw                 m1, m2
-.loop:
-  movd                  m7, [leftq]
-  pshuflw               m5, m7, 0x0
-  pshuflw               m2, m7, 0x55
-  punpcklqdq            m5, m5         ; l1 l1 l1 l1 l1 l1 l1 l1
-  punpcklqdq            m2, m2         ; l2 l2 l2 l2 l2 l2 l2 l2
-  paddw                 m6, m5, m0     ; t1-tl+l1 to t4-tl+l1
-  paddw                 m5, m1         ; t5-tl+l1 to t8-tl+l1
-  pminsw                m6, m3
-  pminsw                m5, m3
-  pmaxsw                m6, m4         ; Clamp to the bit-depth
-  pmaxsw                m5, m4
-  mova   [dstq           ], m6
-  mova   [dstq        +16], m5
-  paddw                 m6, m2, m0
-  paddw                 m2, m1
-  pminsw                m6, m3
-  pminsw                m2, m3
-  pmaxsw                m6, m4
-  pmaxsw                m2, m4
-  mova   [dstq+strideq*2 ], m6
-  mova [dstq+strideq*2+16], m2
-  lea                 dstq, [dstq+strideq*4]
-  inc                lineq
-  lea                leftq, [leftq+4]
-
-  jnz .loop
-  REP_RET
-
-INIT_XMM sse2
-cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps
-  movd                  m0, [aboveq-2]
-  mova                  m1, [aboveq]
-  mova                  m2, [aboveq+16]
-  mova                  m3, [aboveq+32]
-  mova                  m4, [aboveq+48]
-  pshuflw               m0, m0, 0x0
-  ; Get the values to compute the maximum value at this bit depth
-  pcmpeqw               m5, m5
-  movd                  m6, bpsd
-  psllw                 m5, m6
-  pcmpeqw               m7, m7
-  pxor                  m6, m6         ; min possible value
-  pxor                  m5, m7         ; max possible value
-  punpcklqdq            m0, m0
-  DEFINE_ARGS dst, stride, line, left
-  mov                lineq, -16
-  psubw                 m1, m0
-  psubw                 m2, m0
-  psubw                 m3, m0
-  psubw                 m4, m0
-.loop:
-  movd                  m7, [leftq]
-  pshuflw               m7, m7, 0x0
-  punpcklqdq            m7, m7         ; l1 l1 l1 l1 l1 l1 l1 l1
-  paddw                 m0, m7, m1
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq           ], m0
-  paddw                 m0, m7, m2
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq        +16], m0
-  paddw                 m0, m7, m3
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq        +32], m0
-  paddw                 m0, m7, m4
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq        +48], m0
-  movd                  m7, [leftq+2]
-  pshuflw               m7, m7, 0x0
-  punpcklqdq            m7, m7         ; l2 l2 l2 l2 l2 l2 l2 l2
-  paddw                 m0, m7, m1
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq+strideq*2 ], m0
-  paddw                 m0, m7, m2
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq+strideq*2+16], m0
-  paddw                 m0, m7, m3
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq+strideq*2+32], m0
-  paddw                 m0, m7, m4
-  pminsw                m0, m5
-  pmaxsw                m0, m6
-  mova   [dstq+strideq*2+48], m0
-  lea                 dstq, [dstq+strideq*4]
-  lea                leftq, [leftq+4]
-  inc                lineq
-  jnz .loop
-  REP_RET
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c
new file mode 100644
index 000000000..691e166cf
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -0,0 +1,1256 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+
+// -----------------------------------------------------------------------------
+// H_PRED
+
+void aom_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  const __m128i left_u16 = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
+  const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
+  const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
+  const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
+  (void)above;
+  (void)bd;
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+}
+
+void aom_highbd_h_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  aom_highbd_h_predictor_4x4_sse2(dst, stride, above, left, bd);
+  dst += stride << 2;
+  left += 4;
+  aom_highbd_h_predictor_4x4_sse2(dst, stride, above, left, bd);
+}
+
+void aom_highbd_h_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
+  const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
+  const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
+  const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
+  const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
+  (void)above;
+  (void)bd;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3));
+}
+
+void aom_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
+  const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
+  const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
+  const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
+  const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
+  const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
+  const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
+  const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
+  const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
+  (void)above;
+  (void)bd;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6));
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7));
+}
+
+void aom_highbd_h_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  aom_highbd_h_predictor_8x8_sse2(dst, stride, above, left, bd);
+  dst += stride << 3;
+  left += 8;
+  aom_highbd_h_predictor_8x8_sse2(dst, stride, above, left, bd);
+}
+
+static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride,
+                                       const __m128i *row) {
+  const __m128i val = _mm_unpacklo_epi64(*row, *row);
+  _mm_store_si128((__m128i *)*dst, val);
+  _mm_store_si128((__m128i *)(*dst + 8), val);
+  *dst += stride;
+}
+
+static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride,
+                                       const __m128i *row) {
+  const __m128i val = _mm_unpackhi_epi64(*row, *row);
+  _mm_store_si128((__m128i *)(*dst), val);
+  _mm_store_si128((__m128i *)(*dst + 8), val);
+  *dst += stride;
+}
+
+static INLINE void h_predictor_16x8(uint16_t *dst, ptrdiff_t stride,
+                                    const uint16_t *left) {
+  const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
+  const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
+  const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
+  const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
+  const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
+  const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
+  const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
+  const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
+  const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
+  h_store_16_unpacklo(&dst, stride, &row0);
+  h_store_16_unpacklo(&dst, stride, &row1);
+  h_store_16_unpacklo(&dst, stride, &row2);
+  h_store_16_unpacklo(&dst, stride, &row3);
+  h_store_16_unpackhi(&dst, stride, &row4);
+  h_store_16_unpackhi(&dst, stride, &row5);
+  h_store_16_unpackhi(&dst, stride, &row6);
+  h_store_16_unpackhi(&dst, stride, &row7);
+}
+
+void aom_highbd_h_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  (void)above;
+  (void)bd;
+  h_predictor_16x8(dst, stride, left);
+}
+
+void aom_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  int i;
+  (void)above;
+  (void)bd;
+
+  for (i = 0; i < 2; i++, left += 8) {
+    h_predictor_16x8(dst, stride, left);
+    dst += stride << 3;
+  }
+}
+
+void aom_highbd_h_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  int i;
+  (void)above;
+  (void)bd;
+
+  for (i = 0; i < 4; i++, left += 8) {
+    h_predictor_16x8(dst, stride, left);
+    dst += stride << 3;
+  }
+}
+
+static INLINE void h_store_32_unpacklo(uint16_t **dst, const ptrdiff_t stride,
+                                       const __m128i *row) {
+  const __m128i val = _mm_unpacklo_epi64(*row, *row);
+  _mm_store_si128((__m128i *)(*dst), val);
+  _mm_store_si128((__m128i *)(*dst + 8), val);
+  _mm_store_si128((__m128i *)(*dst + 16), val);
+  _mm_store_si128((__m128i *)(*dst + 24), val);
+  *dst += stride;
+}
+
+static INLINE void h_store_32_unpackhi(uint16_t **dst, const ptrdiff_t stride,
+                                       const __m128i *row) {
+  const __m128i val = _mm_unpackhi_epi64(*row, *row);
+  _mm_store_si128((__m128i *)(*dst), val);
+  _mm_store_si128((__m128i *)(*dst + 8), val);
+  _mm_store_si128((__m128i *)(*dst + 16), val);
+  _mm_store_si128((__m128i *)(*dst + 24), val);
+  *dst += stride;
+}
+
+static INLINE void h_predictor_32x8(uint16_t *dst, ptrdiff_t stride,
+                                    const uint16_t *left) {
+  const __m128i left_u16 = _mm_load_si128((const __m128i *)left);
+  const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0);
+  const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55);
+  const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa);
+  const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff);
+  const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0);
+  const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55);
+  const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa);
+  const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff);
+  h_store_32_unpacklo(&dst, stride, &row0);
+  h_store_32_unpacklo(&dst, stride, &row1);
+  h_store_32_unpacklo(&dst, stride, &row2);
+  h_store_32_unpacklo(&dst, stride, &row3);
+  h_store_32_unpackhi(&dst, stride, &row4);
+  h_store_32_unpackhi(&dst, stride, &row5);
+  h_store_32_unpackhi(&dst, stride, &row6);
+  h_store_32_unpackhi(&dst, stride, &row7);
+}
+
+void aom_highbd_h_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  int i;
+  (void)above;
+  (void)bd;
+
+  for (i = 0; i < 2; i++, left += 8) {
+    h_predictor_32x8(dst, stride, left);
+    dst += stride << 3;
+  }
+}
+
+void aom_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  int i;
+  (void)above;
+  (void)bd;
+
+  for (i = 0; i < 4; i++, left += 8) {
+    h_predictor_32x8(dst, stride, left);
+    dst += stride << 3;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// DC_TOP, DC_LEFT, DC_128
+
+// 4x4
+
+static INLINE __m128i dc_sum_4(const uint16_t *ref) {
+  const __m128i _dcba = _mm_loadl_epi64((const __m128i *)ref);
+  const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe);
+  const __m128i a = _mm_add_epi16(_dcba, _xxdc);
+  return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1));
+}
+
+static INLINE void dc_store_4x4(uint16_t *dst, ptrdiff_t stride,
+                                const __m128i *dc) {
+  const __m128i dc_dup = _mm_shufflelo_epi16(*dc, 0x0);
+  int i;
+  for (i = 0; i < 4; ++i, dst += stride) {
+    _mm_storel_epi64((__m128i *)dst, dc_dup);
+  }
+}
+
+void aom_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i two = _mm_cvtsi32_si128(2);
+  const __m128i sum = dc_sum_4(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
+  (void)above;
+  (void)bd;
+  dc_store_4x4(dst, stride, &dc);
+}
+
+void aom_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i two = _mm_cvtsi32_si128(2);
+  const __m128i sum = dc_sum_4(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
+  (void)left;
+  (void)bd;
+  dc_store_4x4(dst, stride, &dc);
+}
+
+void aom_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_4x4(dst, stride, &dc_dup);
+}
+
+// -----------------------------------------------------------------------------
+// 4x8
+
+static INLINE void dc_store_4x8(uint16_t *dst, ptrdiff_t stride,
+                                const __m128i *dc) {
+  const __m128i dc_dup = _mm_shufflelo_epi16(*dc, 0x0);
+  int i;
+  for (i = 0; i < 8; ++i, dst += stride) {
+    _mm_storel_epi64((__m128i *)dst, dc_dup);
+  }
+}
+
+// Shared with DC 8xh
+static INLINE __m128i dc_sum_8(const uint16_t *ref) {
+  const __m128i ref_u16 = _mm_load_si128((const __m128i *)ref);
+  const __m128i _dcba = _mm_add_epi16(ref_u16, _mm_srli_si128(ref_u16, 8));
+  const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe);
+  const __m128i a = _mm_add_epi16(_dcba, _xxdc);
+
+  return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1));
+}
+
+void aom_highbd_dc_left_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i sum = dc_sum_8(left);
+  const __m128i four = _mm_cvtsi32_si128(4);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
+  (void)above;
+  (void)bd;
+  dc_store_4x8(dst, stride, &dc);
+}
+
+void aom_highbd_dc_top_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i two = _mm_cvtsi32_si128(2);
+  const __m128i sum = dc_sum_4(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
+  (void)left;
+  (void)bd;
+  dc_store_4x8(dst, stride, &dc);
+}
+
+void aom_highbd_dc_128_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_4x8(dst, stride, &dc_dup);
+}
+
+// -----------------------------------------------------------------------------
+// 8xh
+
+static INLINE void dc_store_8xh(uint16_t *dst, ptrdiff_t stride, int height,
+                                const __m128i *dc) {
+  const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
+  const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
+  int i;
+  for (i = 0; i < height; ++i, dst += stride) {
+    _mm_store_si128((__m128i *)dst, dc_dup);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// DC_TOP
+
+static INLINE void dc_top_predictor_8xh(uint16_t *dst, ptrdiff_t stride,
+                                        int height, const uint16_t *above) {
+  const __m128i four = _mm_cvtsi32_si128(4);
+  const __m128i sum = dc_sum_8(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
+  dc_store_8xh(dst, stride, height, &dc);
+}
+
+void aom_highbd_dc_top_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  dc_top_predictor_8xh(dst, stride, 4, above);
+}
+
+void aom_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  dc_top_predictor_8xh(dst, stride, 8, above);
+}
+
+void aom_highbd_dc_top_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  dc_top_predictor_8xh(dst, stride, 16, above);
+}
+
+// -----------------------------------------------------------------------------
+// DC_LEFT
+
+void aom_highbd_dc_left_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i two = _mm_cvtsi32_si128(2);
+  const __m128i sum = dc_sum_4(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2);
+  (void)above;
+  (void)bd;
+  dc_store_8xh(dst, stride, 4, &dc);
+}
+
+void aom_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i four = _mm_cvtsi32_si128(4);
+  const __m128i sum = dc_sum_8(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
+  (void)above;
+  (void)bd;
+  dc_store_8xh(dst, stride, 8, &dc);
+}
+
+// Shared with DC 16xh
+static INLINE __m128i dc_sum_16(const uint16_t *ref) {
+  const __m128i sum_lo = dc_sum_8(ref);
+  const __m128i sum_hi = dc_sum_8(ref + 8);
+  return _mm_add_epi16(sum_lo, sum_hi);
+}
+
+void aom_highbd_dc_left_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)above;
+  (void)bd;
+  dc_store_8xh(dst, stride, 16, &dc);
+}
+
+// -----------------------------------------------------------------------------
+// DC_128
+
+static INLINE void dc_128_predictor_8xh(uint16_t *dst, ptrdiff_t stride,
+                                        int height, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  dc_store_8xh(dst, stride, height, &dc_dup);
+}
+
+void aom_highbd_dc_128_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)above;
+  (void)left;
+  dc_128_predictor_8xh(dst, stride, 4, bd);
+}
+
+void aom_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *above,
+                                          const uint16_t *left, int bd) {
+  (void)above;
+  (void)left;
+  dc_128_predictor_8xh(dst, stride, 8, bd);
+}
+
+void aom_highbd_dc_128_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  (void)above;
+  (void)left;
+  dc_128_predictor_8xh(dst, stride, 16, bd);
+}
+
+// -----------------------------------------------------------------------------
+// 16xh
+
+static INLINE void dc_store_16xh(uint16_t *dst, ptrdiff_t stride, int height,
+                                 const __m128i *dc) {
+  const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
+  const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
+  int i;
+  for (i = 0; i < height; ++i, dst += stride) {
+    _mm_store_si128((__m128i *)dst, dc_dup);
+    _mm_store_si128((__m128i *)(dst + 8), dc_dup);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// DC_LEFT
+
+void aom_highbd_dc_left_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i four = _mm_cvtsi32_si128(4);
+  const __m128i sum = dc_sum_8(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3);
+  (void)above;
+  (void)bd;
+  dc_store_16xh(dst, stride, 8, &dc);
+}
+
+void aom_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)above;
+  (void)bd;
+  dc_store_16xh(dst, stride, 16, &dc);
+}
+
+// Shared with 32xh
+static INLINE __m128i dc_sum_32(const uint16_t *ref) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i sum_a = dc_sum_16(ref);
+  const __m128i sum_b = dc_sum_16(ref + 16);
+  // 12 bit bd will outrange, so expand to 32 bit before adding final total
+  return _mm_add_epi32(_mm_unpacklo_epi16(sum_a, zero),
+                       _mm_unpacklo_epi16(sum_b, zero));
+}
+
+void aom_highbd_dc_left_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i sixteen = _mm_cvtsi32_si128(16);
+  const __m128i sum = dc_sum_32(left);
+  const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
+  (void)above;
+  (void)bd;
+  dc_store_16xh(dst, stride, 32, &dc);
+}
+
+// -----------------------------------------------------------------------------
+// DC_TOP
+
+void aom_highbd_dc_top_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)left;
+  (void)bd;
+  dc_store_16xh(dst, stride, 8, &dc);
+}
+
+void aom_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)left;
+  (void)bd;
+  dc_store_16xh(dst, stride, 16, &dc);
+}
+
+void aom_highbd_dc_top_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(above);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)left;
+  (void)bd;
+  dc_store_16xh(dst, stride, 32, &dc);
+}
+
+// -----------------------------------------------------------------------------
+// DC_128
+
+void aom_highbd_dc_128_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_16xh(dst, stride, 8, &dc_dup);
+}
+
+void aom_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_16xh(dst, stride, 16, &dc_dup);
+}
+
+void aom_highbd_dc_128_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_16xh(dst, stride, 32, &dc_dup);
+}
+
+// -----------------------------------------------------------------------------
+// 32xh
+
+static INLINE void dc_store_32xh(uint16_t *dst, ptrdiff_t stride, int height,
+                                 const __m128i *dc) {
+  const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0);
+  const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
+  int i;
+  for (i = 0; i < height; ++i, dst += stride) {
+    _mm_store_si128((__m128i *)dst, dc_dup);
+    _mm_store_si128((__m128i *)(dst + 8), dc_dup);
+    _mm_store_si128((__m128i *)(dst + 16), dc_dup);
+    _mm_store_si128((__m128i *)(dst + 24), dc_dup);
+  }
+}
+
+void aom_highbd_dc_left_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i eight = _mm_cvtsi32_si128(8);
+  const __m128i sum = dc_sum_16(left);
+  const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4);
+  (void)above;
+  (void)bd;
+  dc_store_32xh(dst, stride, 16, &dc);
+}
+
+void aom_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                             const uint16_t *above,
+                                             const uint16_t *left, int bd) {
+  const __m128i sixteen = _mm_cvtsi32_si128(16);
+  const __m128i sum = dc_sum_32(left);
+  const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
+  (void)above;
+  (void)bd;
+  dc_store_32xh(dst, stride, 32, &dc);
+}
+
+void aom_highbd_dc_top_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i sixteen = _mm_cvtsi32_si128(16);
+  const __m128i sum = dc_sum_32(above);
+  const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
+  (void)left;
+  (void)bd;
+  dc_store_32xh(dst, stride, 16, &dc);
+}
+
+void aom_highbd_dc_128_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_32xh(dst, stride, 16, &dc_dup);
+}
+
+void aom_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i sixteen = _mm_cvtsi32_si128(16);
+  const __m128i sum = dc_sum_32(above);
+  const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5);
+  (void)left;
+  (void)bd;
+  dc_store_32xh(dst, stride, 32, &dc);
+}
+
+void aom_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *above,
+                                            const uint16_t *left, int bd) {
+  const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1));
+  const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0);
+  (void)above;
+  (void)left;
+  dc_store_32xh(dst, stride, 32, &dc_dup);
+}
+
+// -----------------------------------------------------------------------------
+// V_PRED
+
+void aom_highbd_v_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above_u16 = _mm_loadl_epi64((const __m128i *)above);
+  int i;
+  for (i = 0; i < 2; ++i) {
+    _mm_storel_epi64((__m128i *)dst, above_u16);
+    _mm_storel_epi64((__m128i *)(dst + stride), above_u16);
+    _mm_storel_epi64((__m128i *)(dst + 2 * stride), above_u16);
+    _mm_storel_epi64((__m128i *)(dst + 3 * stride), above_u16);
+    dst += stride << 2;
+  }
+}
+
+void aom_highbd_v_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *above,
+                                     const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above_u16 = _mm_load_si128((const __m128i *)above);
+  _mm_store_si128((__m128i *)dst, above_u16);
+  _mm_store_si128((__m128i *)(dst + stride), above_u16);
+  _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16);
+  _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16);
+}
+
+void aom_highbd_v_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above_u16 = _mm_load_si128((const __m128i *)above);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    _mm_store_si128((__m128i *)dst, above_u16);
+    _mm_store_si128((__m128i *)(dst + stride), above_u16);
+    _mm_store_si128((__m128i *)(dst + 2 * stride), above_u16);
+    _mm_store_si128((__m128i *)(dst + 3 * stride), above_u16);
+    dst += stride << 2;
+  }
+}
+
+void aom_highbd_v_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
+  const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
+  int i;
+  for (i = 0; i < 2; ++i) {
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+  }
+}
+
+void aom_highbd_v_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
+  const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
+  int i;
+  for (i = 0; i < 8; ++i) {
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    dst += stride;
+  }
+}
+
+void aom_highbd_v_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  const __m128i above0_u16 = _mm_load_si128((const __m128i *)above);
+  const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
+  const __m128i above2_u16 = _mm_load_si128((const __m128i *)(above + 16));
+  const __m128i above3_u16 = _mm_load_si128((const __m128i *)(above + 24));
+  int i;
+  for (i = 0; i < 4; ++i) {
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_store_si128((__m128i *)(dst + 16), above2_u16);
+    _mm_store_si128((__m128i *)(dst + 24), above3_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_store_si128((__m128i *)(dst + 16), above2_u16);
+    _mm_store_si128((__m128i *)(dst + 24), above3_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_store_si128((__m128i *)(dst + 16), above2_u16);
+    _mm_store_si128((__m128i *)(dst + 24), above3_u16);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, above0_u16);
+    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_store_si128((__m128i *)(dst + 16), above2_u16);
+    _mm_store_si128((__m128i *)(dst + 24), above3_u16);
+    dst += stride;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// DC_PRED
+
+void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  (void)bd;
+  const __m128i sum_above = dc_sum_4(above);
+  const __m128i sum_left = dc_sum_8(left);
+  const __m128i sum = _mm_add_epi16(sum_above, sum_left);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 >>= 16;
+  sum32 += 6;
+  sum32 /= 12;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    _mm_storel_epi64((__m128i *)dst, row);
+    dst += stride;
+    _mm_storel_epi64((__m128i *)dst, row);
+    dst += stride;
+  }
+}
+
+void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                      const uint16_t *above,
+                                      const uint16_t *left, int bd) {
+  (void)bd;
+  const __m128i sum_left = dc_sum_4(left);
+  const __m128i sum_above = dc_sum_8(above);
+  const __m128i sum = _mm_add_epi16(sum_above, sum_left);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 >>= 16;
+  sum32 += 6;
+  sum32 /= 12;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+
+  _mm_store_si128((__m128i *)dst, row);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row);
+}
+
+void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  (void)bd;
+  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_above = dc_sum_8(above);
+  const __m128i zero = _mm_setzero_si128();
+  sum_left = _mm_unpacklo_epi16(sum_left, zero);
+  sum_above = _mm_unpacklo_epi16(sum_above, zero);
+  const __m128i sum = _mm_add_epi32(sum_left, sum_above);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 += 12;
+  sum32 /= 24;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+  }
+}
+
+void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                       const uint16_t *above,
+                                       const uint16_t *left, int bd) {
+  (void)bd;
+  __m128i sum_left = dc_sum_8(left);
+  __m128i sum_above = dc_sum_16(above);
+  const __m128i zero = _mm_setzero_si128();
+  sum_left = _mm_unpacklo_epi16(sum_left, zero);
+  sum_above = _mm_unpacklo_epi16(sum_above, zero);
+  const __m128i sum = _mm_add_epi32(sum_left, sum_above);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 += 12;
+  sum32 /= 24;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  int i;
+  for (i = 0; i < 2; ++i) {
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+  }
+}
+
+void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  (void)bd;
+  __m128i sum_left = dc_sum_32(left);
+  __m128i sum_above = dc_sum_16(above);
+  const __m128i zero = _mm_setzero_si128();
+  sum_above = _mm_unpacklo_epi16(sum_above, zero);
+  const __m128i sum = _mm_add_epi32(sum_left, sum_above);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 += 24;
+  sum32 /= 48;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  int i;
+  for (i = 0; i < 8; ++i) {
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    dst += stride;
+  }
+}
+
+void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  (void)bd;
+  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_above = dc_sum_32(above);
+  const __m128i zero = _mm_setzero_si128();
+  sum_left = _mm_unpacklo_epi16(sum_left, zero);
+  const __m128i sum = _mm_add_epi32(sum_left, sum_above);
+  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  sum32 += 24;
+  sum32 /= 48;
+  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_store_si128((__m128i *)(dst + 16), row);
+    _mm_store_si128((__m128i *)(dst + 24), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_store_si128((__m128i *)(dst + 16), row);
+    _mm_store_si128((__m128i *)(dst + 24), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_store_si128((__m128i *)(dst + 16), row);
+    _mm_store_si128((__m128i *)(dst + 24), row);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, row);
+    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_store_si128((__m128i *)(dst + 16), row);
+    _mm_store_si128((__m128i *)(dst + 24), row);
+    dst += stride;
+  }
+}
+
+// -----------------------------------------------------------------------------
+/*
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+*/
+static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y,
+                                 const __m128i *z) {
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i a = _mm_avg_epu16(*x, *z);
+  const __m128i b =
+      _mm_subs_epu16(a, _mm_and_si128(_mm_xor_si128(*x, *z), one));
+  return _mm_avg_epu16(b, *y);
+}
+
+void aom_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const __m128i XXXXABCD = _mm_loadu_si128((const __m128i *)(above - 4));
+  const __m128i KXXXABCD = _mm_insert_epi16(XXXXABCD, K, 0);
+  const __m128i KJXXABCD = _mm_insert_epi16(KXXXABCD, J, 1);
+  const __m128i KJIXABCD = _mm_insert_epi16(KJXXABCD, I, 2);
+  const __m128i JIXABCD0 = _mm_srli_si128(KJIXABCD, 2);
+  const __m128i IXABCD00 = _mm_srli_si128(KJIXABCD, 4);
+  const __m128i avg2 = _mm_avg_epu16(KJIXABCD, JIXABCD0);
+  const __m128i avg3 = avg3_epu16(&KJIXABCD, &JIXABCD0, &IXABCD00);
+  const __m128i row0 = _mm_srli_si128(avg2, 6);
+  const __m128i row1 = _mm_srli_si128(avg3, 4);
+  const __m128i row2 = _mm_srli_si128(avg2, 4);
+  const __m128i row3 = _mm_srli_si128(avg3, 2);
+  (void)bd;
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+
+  dst -= stride;
+  dst[0] = _mm_extract_epi16(avg3, 1);
+  dst[stride] = _mm_extract_epi16(avg3, 0);
+}
+
+void aom_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int L = left[3];
+  const __m128i XXXXABCD = _mm_loadu_si128((const __m128i *)(above - 4));
+  const __m128i KXXXABCD = _mm_insert_epi16(XXXXABCD, K, 0);
+  const __m128i KJXXABCD = _mm_insert_epi16(KXXXABCD, J, 1);
+  const __m128i KJIXABCD = _mm_insert_epi16(KJXXABCD, I, 2);
+  const __m128i JIXABCD0 = _mm_srli_si128(KJIXABCD, 2);
+  const __m128i LKJIXABC = _mm_insert_epi16(_mm_slli_si128(KJIXABCD, 2), L, 0);
+  const __m128i avg3 = avg3_epu16(&JIXABCD0, &KJIXABCD, &LKJIXABC);
+  const __m128i row0 = _mm_srli_si128(avg3, 6);
+  const __m128i row1 = _mm_srli_si128(avg3, 4);
+  const __m128i row2 = _mm_srli_si128(avg3, 2);
+  const __m128i row3 = avg3;
+  (void)bd;
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+}
+
+void aom_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  const int I = left[0];
+  const int J = left[1];
+  const int K = left[2];
+  const int L = left[3];
+  const __m128i XXXXXABC = _mm_loadu_si128((const __m128i *)(above - 5));
+  const __m128i LXXXXABC = _mm_insert_epi16(XXXXXABC, L, 0);
+  const __m128i LKXXXABC = _mm_insert_epi16(LXXXXABC, K, 1);
+  const __m128i LKJXXABC = _mm_insert_epi16(LKXXXABC, J, 2);
+  const __m128i LKJIXABC = _mm_insert_epi16(LKJXXABC, I, 3);
+  const __m128i KJIXABC0 = _mm_srli_si128(LKJIXABC, 2);
+  const __m128i JIXABC00 = _mm_srli_si128(LKJIXABC, 4);
+  const __m128i avg3 = avg3_epu16(&LKJIXABC, &KJIXABC0, &JIXABC00);
+  const __m128i avg2 = _mm_avg_epu16(LKJIXABC, KJIXABC0);
+  const __m128i row3 = _mm_unpacklo_epi16(avg2, avg3);
+  const __m128i row2 = _mm_srli_si128(row3, 4);
+  const __m128i row1 = _mm_srli_si128(row3, 8);
+  const __m128i row0 = _mm_srli_si128(avg3, 4);
+  (void)bd;
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst[0] = _mm_extract_epi16(avg2, 3);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+}
+
+void aom_highbd_d45e_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  const __m128i ABCDEFGH = _mm_loadu_si128((const __m128i *)above);
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2);
+  __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 4);
+  CDEFGH00 = _mm_insert_epi16(CDEFGH00, above[7], 6);
+  const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGH0, &CDEFGH00);
+  (void)left;
+  (void)bd;
+  _mm_storel_epi64((__m128i *)dst, avg3);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 2));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 4));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 6));
+}
+
+void aom_highbd_d45e_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m128i h76543210 = _mm_load_si128((const __m128i *)above);
+  __m128i hx7654321 = _mm_srli_si128(h76543210, 2);
+  __m128i h87654321 = _mm_insert_epi16(hx7654321, above[8], 7);
+  __m128i hx8765432 = _mm_srli_si128(h87654321, 2);
+  __m128i h98765432 = _mm_insert_epi16(hx8765432, above[9], 7);
+  __m128i avg3 = avg3_epu16(&h76543210, &h87654321, &h98765432);
+  _mm_storel_epi64((__m128i *)dst, avg3);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 2));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 4));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 6));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 8));
+  dst += stride;
+
+  // hcba98765
+  h76543210 = _mm_loadu_si128((const __m128i *)((above + 5)));
+  h76543210 = _mm_insert_epi16(h76543210, above[11], 7);
+  // hxcba9876
+  hx7654321 = _mm_srli_si128(h76543210, 2);
+  // hxxcba987
+  hx8765432 = _mm_srli_si128(h76543210, 4);
+  avg3 = avg3_epu16(&h76543210, &hx7654321, &hx8765432);
+  _mm_storel_epi64((__m128i *)dst, avg3);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 2));
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 4));
+}
+
+void aom_highbd_d45e_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m128i x0 = _mm_load_si128((const __m128i *)above);
+  __m128i x1 = _mm_loadu_si128((const __m128i *)(above + 1));
+  __m128i x2 = _mm_loadu_si128((const __m128i *)(above + 2));
+  __m128i y = avg3_epu16(&x0, &x1, &x2);
+  _mm_store_si128((__m128i *)dst, y);
+  dst += stride;
+
+  x0 = _mm_loadu_si128((const __m128i *)(above + 3));
+  y = avg3_epu16(&x1, &x2, &x0);
+  _mm_store_si128((__m128i *)dst, y);
+  dst += stride;
+
+  x1 = _mm_loadu_si128((const __m128i *)(above + 4));
+  y = avg3_epu16(&x2, &x0, &x1);
+  _mm_store_si128((__m128i *)dst, y);
+  dst += stride;
+
+  x2 = _mm_loadu_si128((const __m128i *)(above + 5));
+  x2 = _mm_insert_epi16(x2, above[11], 7);
+  y = avg3_epu16(&x0, &x1, &x2);
+  _mm_store_si128((__m128i *)dst, y);
+}
+
+static INLINE void d45e_w8(const __m128i *a0, const __m128i *a1,
+                           const __m128i *a2, uint16_t **dst,
+                           ptrdiff_t stride) {
+  const __m128i y = avg3_epu16(a0, a1, a2);
+  _mm_storeu_si128((__m128i *)*dst, y);
+  *dst += stride;
+}
+
+void aom_highbd_d45e_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride,
+                                        const uint16_t *above,
+                                        const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m128i x0 = _mm_load_si128((const __m128i *)above);
+  __m128i x1 = _mm_loadu_si128((const __m128i *)(above + 1));
+  __m128i x2 = _mm_loadu_si128((const __m128i *)(above + 2));
+
+  d45e_w8(&x0, &x1, &x2, &dst, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x1, &x2, &x0, &dst, stride);
+
+    x1 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x2, &x0, &x1, &dst, stride);
+
+    x2 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x0, &x1, &x2, &dst, stride);
+  } while (i < 9);
+
+  x0 = _mm_loadu_si128((const __m128i *)(above + 9));
+  x0 = _mm_insert_epi16(x0, above[15], 7);
+  const __m128i y = avg3_epu16(&x1, &x2, &x0);
+  _mm_store_si128((__m128i *)dst, y);
+}
+
+void aom_highbd_d45e_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
+                                         const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  (void)left;
+  (void)bd;
+  __m128i x0 = _mm_load_si128((const __m128i *)above);
+  __m128i x1 = _mm_loadu_si128((const __m128i *)(above + 1));
+  __m128i x2 = _mm_loadu_si128((const __m128i *)(above + 2));
+
+  d45e_w8(&x0, &x1, &x2, &dst, stride);
+
+  int i = 3;
+  do {
+    x0 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x1, &x2, &x0, &dst, stride);
+
+    x1 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x2, &x0, &x1, &dst, stride);
+
+    x2 = _mm_loadu_si128((const __m128i *)(above + i++));
+    d45e_w8(&x0, &x1, &x2, &dst, stride);
+  } while (i < 15);
+
+  x0 = _mm_loadu_si128((const __m128i *)(above + 15));
+  __m128i y = avg3_epu16(&x1, &x2, &x0);
+  _mm_store_si128((__m128i *)dst, y);
+  dst += stride;
+
+  x1 = _mm_loadu_si128((const __m128i *)(above + 16));
+  y = avg3_epu16(&x2, &x0, &x1);
+  _mm_store_si128((__m128i *)dst, y);
+  dst += stride;
+
+  x2 = _mm_loadu_si128((const __m128i *)(above + 17));
+  x2 = _mm_insert_epi16(x2, above[23], 7);
+  y = avg3_epu16(&x0, &x1, &x2);
+  _mm_store_si128((__m128i *)dst, y);
+}
diff --git a/third_party/aom/aom_dsp/x86/highbd_intrapred_ssse3.c b/third_party/aom/aom_dsp/x86/highbd_intrapred_ssse3.c
new file mode 100644
index 000000000..b089a3f43
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/highbd_intrapred_ssse3.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+
+// -----------------------------------------------------------------------------
+/*
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+*/
+static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y,
+                                 const __m128i *z) {
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i a = _mm_avg_epu16(*x, *z);
+  const __m128i b =
+      _mm_subs_epu16(a, _mm_and_si128(_mm_xor_si128(*x, *z), one));
+  return _mm_avg_epu16(b, *y);
+}
+
+DECLARE_ALIGNED(16, static const uint8_t, rotate_right_epu16[16]) = {
+  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1
+};
+
+static INLINE __m128i rotr_epu16(__m128i *a, const __m128i *rotrw) {
+  *a = _mm_shuffle_epi8(*a, *rotrw);
+  return *a;
+}
+
+void aom_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                         const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above);
+  const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left);
+  const __m128i IXABCDEF =
+      _mm_alignr_epi8(XABCDEFG, _mm_slli_si128(IJKLMNOP, 14), 14);
+  const __m128i avg3 = avg3_epu16(&ABCDEFGH, &XABCDEFG, &IXABCDEF);
+  const __m128i avg2 = _mm_avg_epu16(ABCDEFGH, XABCDEFG);
+  const __m128i XIJKLMNO =
+      _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14);
+  const __m128i JKLMNOP0 = _mm_srli_si128(IJKLMNOP, 2);
+  __m128i avg3_left = avg3_epu16(&XIJKLMNO, &IJKLMNOP, &JKLMNOP0);
+  __m128i rowa = avg2;
+  __m128i rowb = avg3;
+  int i;
+  (void)bd;
+  for (i = 0; i < 8; i += 2) {
+    _mm_store_si128((__m128i *)dst, rowa);
+    dst += stride;
+    _mm_store_si128((__m128i *)dst, rowb);
+    dst += stride;
+    rowa = _mm_alignr_epi8(rowa, rotr_epu16(&avg3_left, &rotrw), 14);
+    rowb = _mm_alignr_epi8(rowb, rotr_epu16(&avg3_left, &rotrw), 14);
+  }
+}
+
+void aom_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i B0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i A0 = _mm_load_si128((const __m128i *)above);
+  const __m128i B1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8));
+  const __m128i avg2_0 = _mm_avg_epu16(A0, B0);
+  const __m128i avg2_1 = _mm_avg_epu16(A1, B1);
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i C0 = _mm_alignr_epi8(B0, _mm_slli_si128(L0, 14), 14);
+  const __m128i C1 = _mm_alignr_epi8(B1, B0, 14);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(B0, 14), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i L0_ = _mm_alignr_epi8(L1, L0, 2);
+  const __m128i L1_ = _mm_srli_si128(L1, 2);
+  __m128i rowa_0 = avg2_0;
+  __m128i rowa_1 = avg2_1;
+  __m128i rowb_0 = avg3_0;
+  __m128i rowb_1 = avg3_1;
+  __m128i avg3_left[2];
+  int i, j;
+  (void)bd;
+  avg3_left[0] = avg3_epu16(&XL0, &L0, &L0_);
+  avg3_left[1] = avg3_epu16(&XL1, &L1, &L1_);
+  for (i = 0; i < 2; ++i) {
+    __m128i avg_left = avg3_left[i];
+    for (j = 0; j < 8; j += 2) {
+      _mm_store_si128((__m128i *)dst, rowa_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowa_1);
+      dst += stride;
+      _mm_store_si128((__m128i *)dst, rowb_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowb_1);
+      dst += stride;
+      rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14);
+      rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14);
+      rowb_1 = _mm_alignr_epi8(rowb_1, rowb_0, 14);
+      rowb_0 = _mm_alignr_epi8(rowb_0, rotr_epu16(&avg_left, &rotrw), 14);
+    }
+  }
+}
+
+void aom_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i A0 = _mm_load_si128((const __m128i *)above);
+  const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8));
+  const __m128i A2 = _mm_load_si128((const __m128i *)(above + 16));
+  const __m128i A3 = _mm_load_si128((const __m128i *)(above + 24));
+  const __m128i B0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i B1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i B2 = _mm_loadu_si128((const __m128i *)(above + 15));
+  const __m128i B3 = _mm_loadu_si128((const __m128i *)(above + 23));
+  const __m128i avg2_0 = _mm_avg_epu16(A0, B0);
+  const __m128i avg2_1 = _mm_avg_epu16(A1, B1);
+  const __m128i avg2_2 = _mm_avg_epu16(A2, B2);
+  const __m128i avg2_3 = _mm_avg_epu16(A3, B3);
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16));
+  const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24));
+  const __m128i C0 = _mm_alignr_epi8(B0, _mm_slli_si128(L0, 14), 14);
+  const __m128i C1 = _mm_alignr_epi8(B1, B0, 14);
+  const __m128i C2 = _mm_alignr_epi8(B2, B1, 14);
+  const __m128i C3 = _mm_alignr_epi8(B3, B2, 14);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2);
+  const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3);
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(B0, 14), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14);
+  const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14);
+  const __m128i L0_ = _mm_alignr_epi8(L1, L0, 2);
+  const __m128i L1_ = _mm_alignr_epi8(L2, L1, 2);
+  const __m128i L2_ = _mm_alignr_epi8(L3, L2, 2);
+  const __m128i L3_ = _mm_srli_si128(L3, 2);
+  __m128i rowa_0 = avg2_0;
+  __m128i rowa_1 = avg2_1;
+  __m128i rowa_2 = avg2_2;
+  __m128i rowa_3 = avg2_3;
+  __m128i rowb_0 = avg3_0;
+  __m128i rowb_1 = avg3_1;
+  __m128i rowb_2 = avg3_2;
+  __m128i rowb_3 = avg3_3;
+  __m128i avg3_left[4];
+  int i, j;
+  (void)bd;
+  avg3_left[0] = avg3_epu16(&XL0, &L0, &L0_);
+  avg3_left[1] = avg3_epu16(&XL1, &L1, &L1_);
+  avg3_left[2] = avg3_epu16(&XL2, &L2, &L2_);
+  avg3_left[3] = avg3_epu16(&XL3, &L3, &L3_);
+  for (i = 0; i < 4; ++i) {
+    __m128i avg_left = avg3_left[i];
+    for (j = 0; j < 8; j += 2) {
+      _mm_store_si128((__m128i *)dst, rowa_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowa_1);
+      _mm_store_si128((__m128i *)(dst + 16), rowa_2);
+      _mm_store_si128((__m128i *)(dst + 24), rowa_3);
+      dst += stride;
+      _mm_store_si128((__m128i *)dst, rowb_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowb_1);
+      _mm_store_si128((__m128i *)(dst + 16), rowb_2);
+      _mm_store_si128((__m128i *)(dst + 24), rowb_3);
+      dst += stride;
+      rowa_3 = _mm_alignr_epi8(rowa_3, rowa_2, 14);
+      rowa_2 = _mm_alignr_epi8(rowa_2, rowa_1, 14);
+      rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14);
+      rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14);
+      rowb_3 = _mm_alignr_epi8(rowb_3, rowb_2, 14);
+      rowb_2 = _mm_alignr_epi8(rowb_2, rowb_1, 14);
+      rowb_1 = _mm_alignr_epi8(rowb_1, rowb_0, 14);
+      rowb_0 = _mm_alignr_epi8(rowb_0, rotr_epu16(&avg_left, &rotrw), 14);
+    }
+  }
+}
+
+void aom_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                         const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above);
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left);
+  const __m128i XIJKLMNO =
+      _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14);
+  const __m128i AXIJKLMN =
+      _mm_alignr_epi8(XIJKLMNO, _mm_slli_si128(ABCDEFGH, 14), 14);
+  const __m128i avg3 = avg3_epu16(&XABCDEFG, &ABCDEFGH, &BCDEFGH0);
+  __m128i avg3_left = avg3_epu16(&IJKLMNOP, &XIJKLMNO, &AXIJKLMN);
+  __m128i rowa = avg3;
+  int i;
+  (void)bd;
+  for (i = 0; i < 8; ++i) {
+    rowa = _mm_alignr_epi8(rowa, rotr_epu16(&avg3_left, &rotrw), 14);
+    _mm_store_si128((__m128i *)dst, rowa);
+    dst += stride;
+  }
+}
+
+void aom_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i B0 = _mm_load_si128((const __m128i *)above);
+  const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i B1 = _mm_load_si128((const __m128i *)(above + 8));
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i C0 = _mm_alignr_epi8(B1, B0, 2);
+  const __m128i C1 = _mm_srli_si128(B1, 2);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i L0_ = _mm_alignr_epi8(XL0, _mm_slli_si128(B0, 14), 14);
+  const __m128i L1_ = _mm_alignr_epi8(XL1, XL0, 14);
+  __m128i rowa_0 = avg3_0;
+  __m128i rowa_1 = avg3_1;
+  __m128i avg3_left[2];
+  int i, j;
+  (void)bd;
+  avg3_left[0] = avg3_epu16(&L0, &XL0, &L0_);
+  avg3_left[1] = avg3_epu16(&L1, &XL1, &L1_);
+  for (i = 0; i < 2; ++i) {
+    __m128i avg_left = avg3_left[i];
+    for (j = 0; j < 8; ++j) {
+      rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14);
+      rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14);
+      _mm_store_si128((__m128i *)dst, rowa_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowa_1);
+      dst += stride;
+    }
+  }
+}
+
+void aom_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16);
+  const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i A2 = _mm_loadu_si128((const __m128i *)(above + 15));
+  const __m128i A3 = _mm_loadu_si128((const __m128i *)(above + 23));
+  const __m128i B0 = _mm_load_si128((const __m128i *)above);
+  const __m128i B1 = _mm_load_si128((const __m128i *)(above + 8));
+  const __m128i B2 = _mm_load_si128((const __m128i *)(above + 16));
+  const __m128i B3 = _mm_load_si128((const __m128i *)(above + 24));
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16));
+  const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24));
+  const __m128i C0 = _mm_alignr_epi8(B1, B0, 2);
+  const __m128i C1 = _mm_alignr_epi8(B2, B1, 2);
+  const __m128i C2 = _mm_alignr_epi8(B3, B2, 2);
+  const __m128i C3 = _mm_srli_si128(B3, 2);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2);
+  const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3);
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14);
+  const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14);
+  const __m128i L0_ = _mm_alignr_epi8(XL0, _mm_slli_si128(B0, 14), 14);
+  const __m128i L1_ = _mm_alignr_epi8(XL1, XL0, 14);
+  const __m128i L2_ = _mm_alignr_epi8(XL2, XL1, 14);
+  const __m128i L3_ = _mm_alignr_epi8(XL3, XL2, 14);
+  __m128i rowa_0 = avg3_0;
+  __m128i rowa_1 = avg3_1;
+  __m128i rowa_2 = avg3_2;
+  __m128i rowa_3 = avg3_3;
+  __m128i avg3_left[4];
+  int i, j;
+  (void)bd;
+  avg3_left[0] = avg3_epu16(&L0, &XL0, &L0_);
+  avg3_left[1] = avg3_epu16(&L1, &XL1, &L1_);
+  avg3_left[2] = avg3_epu16(&L2, &XL2, &L2_);
+  avg3_left[3] = avg3_epu16(&L3, &XL3, &L3_);
+  for (i = 0; i < 4; ++i) {
+    __m128i avg_left = avg3_left[i];
+    for (j = 0; j < 8; ++j) {
+      rowa_3 = _mm_alignr_epi8(rowa_3, rowa_2, 14);
+      rowa_2 = _mm_alignr_epi8(rowa_2, rowa_1, 14);
+      rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14);
+      rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14);
+      _mm_store_si128((__m128i *)dst, rowa_0);
+      _mm_store_si128((__m128i *)(dst + 8), rowa_1);
+      _mm_store_si128((__m128i *)(dst + 16), rowa_2);
+      _mm_store_si128((__m128i *)(dst + 24), rowa_3);
+      dst += stride;
+    }
+  }
+}
+
+void aom_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                         const uint16_t *above,
+                                         const uint16_t *left, int bd) {
+  const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i ABCDEFG0 = _mm_srli_si128(XABCDEFG, 2);
+  const __m128i BCDEFG00 = _mm_srli_si128(XABCDEFG, 4);
+  const __m128i avg3 = avg3_epu16(&BCDEFG00, &ABCDEFG0, &XABCDEFG);
+  const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left);
+  const __m128i XIJKLMNO =
+      _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14);
+  const __m128i AXIJKLMN =
+      _mm_alignr_epi8(XIJKLMNO, _mm_slli_si128(XABCDEFG, 12), 14);
+  const __m128i avg3_left = avg3_epu16(&IJKLMNOP, &XIJKLMNO, &AXIJKLMN);
+  const __m128i avg2_left = _mm_avg_epu16(IJKLMNOP, XIJKLMNO);
+  const __m128i avg2_avg3_lo = _mm_unpacklo_epi16(avg2_left, avg3_left);
+  const __m128i avg2_avg3_hi = _mm_unpackhi_epi16(avg2_left, avg3_left);
+  const __m128i row0 =
+      _mm_alignr_epi8(avg3, _mm_slli_si128(avg2_avg3_lo, 12), 12);
+  const __m128i row1 =
+      _mm_alignr_epi8(row0, _mm_slli_si128(avg2_avg3_lo, 8), 12);
+  const __m128i row2 =
+      _mm_alignr_epi8(row1, _mm_slli_si128(avg2_avg3_lo, 4), 12);
+  const __m128i row3 = _mm_alignr_epi8(row2, avg2_avg3_lo, 12);
+  const __m128i row4 =
+      _mm_alignr_epi8(row3, _mm_slli_si128(avg2_avg3_hi, 12), 12);
+  const __m128i row5 =
+      _mm_alignr_epi8(row4, _mm_slli_si128(avg2_avg3_hi, 8), 12);
+  const __m128i row6 =
+      _mm_alignr_epi8(row5, _mm_slli_si128(avg2_avg3_hi, 4), 12);
+  const __m128i row7 = _mm_alignr_epi8(row6, avg2_avg3_hi, 12);
+  (void)bd;
+  _mm_store_si128((__m128i *)dst, row0);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row1);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row2);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row3);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row4);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row5);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row6);
+  dst += stride;
+  _mm_store_si128((__m128i *)dst, row7);
+}
+
+void aom_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i B0 = _mm_alignr_epi8(A1, A0, 2);
+  const __m128i B1 = _mm_srli_si128(A1, 2);
+  const __m128i C0 = _mm_alignr_epi8(A1, A0, 4);
+  const __m128i C1 = _mm_srli_si128(A1, 4);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14);
+  const __m128i AXL0 = _mm_alignr_epi8(XL0, _mm_slli_si128(A0, 12), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i AXL1 = _mm_alignr_epi8(L1, L0, 12);
+  const __m128i avg3_left_0 = avg3_epu16(&L0, &XL0, &AXL0);
+  const __m128i avg2_left_0 = _mm_avg_epu16(L0, XL0);
+  const __m128i avg3_left_1 = avg3_epu16(&L1, &XL1, &AXL1);
+  const __m128i avg2_left_1 = _mm_avg_epu16(L1, XL1);
+  __m128i row_0 = avg3_0;
+  __m128i row_1 = avg3_1;
+  __m128i avg2_avg3_left[2][2];
+  int i, j;
+  (void)bd;
+
+  avg2_avg3_left[0][0] = _mm_unpacklo_epi16(avg2_left_0, avg3_left_0);
+  avg2_avg3_left[0][1] = _mm_unpackhi_epi16(avg2_left_0, avg3_left_0);
+  avg2_avg3_left[1][0] = _mm_unpacklo_epi16(avg2_left_1, avg3_left_1);
+  avg2_avg3_left[1][1] = _mm_unpackhi_epi16(avg2_left_1, avg3_left_1);
+
+  for (j = 0; j < 2; ++j) {
+    for (i = 0; i < 2; ++i) {
+      const __m128i avg2_avg3 = avg2_avg3_left[j][i];
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 12), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      dst += stride;
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 8), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      dst += stride;
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 4), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      dst += stride;
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, avg2_avg3, 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      dst += stride;
+    }
+  }
+}
+
+void aom_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int bd) {
+  const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1));
+  const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7));
+  const __m128i A2 = _mm_loadu_si128((const __m128i *)(above + 15));
+  const __m128i A3 = _mm_loadu_si128((const __m128i *)(above + 23));
+  const __m128i B0 = _mm_alignr_epi8(A1, A0, 2);
+  const __m128i B1 = _mm_alignr_epi8(A2, A1, 2);
+  const __m128i B2 = _mm_alignr_epi8(A3, A2, 2);
+  const __m128i B3 = _mm_srli_si128(A3, 2);
+  const __m128i C0 = _mm_alignr_epi8(A1, A0, 4);
+  const __m128i C1 = _mm_alignr_epi8(A2, A1, 4);
+  const __m128i C2 = _mm_alignr_epi8(A3, A2, 4);
+  const __m128i C3 = _mm_srli_si128(A3, 4);
+  const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0);
+  const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1);
+  const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2);
+  const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3);
+  const __m128i L0 = _mm_load_si128((const __m128i *)left);
+  const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8));
+  const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16));
+  const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24));
+  const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14);
+  const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14);
+  const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14);
+  const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14);
+  const __m128i AXL0 = _mm_alignr_epi8(XL0, _mm_slli_si128(A0, 12), 14);
+  const __m128i AXL1 = _mm_alignr_epi8(L1, L0, 12);
+  const __m128i AXL2 = _mm_alignr_epi8(L2, L1, 12);
+  const __m128i AXL3 = _mm_alignr_epi8(L3, L2, 12);
+  const __m128i avg3_left_0 = avg3_epu16(&L0, &XL0, &AXL0);
+  const __m128i avg3_left_1 = avg3_epu16(&L1, &XL1, &AXL1);
+  const __m128i avg3_left_2 = avg3_epu16(&L2, &XL2, &AXL2);
+  const __m128i avg3_left_3 = avg3_epu16(&L3, &XL3, &AXL3);
+  const __m128i avg2_left_0 = _mm_avg_epu16(L0, XL0);
+  const __m128i avg2_left_1 = _mm_avg_epu16(L1, XL1);
+  const __m128i avg2_left_2 = _mm_avg_epu16(L2, XL2);
+  const __m128i avg2_left_3 = _mm_avg_epu16(L3, XL3);
+  __m128i row_0 = avg3_0;
+  __m128i row_1 = avg3_1;
+  __m128i row_2 = avg3_2;
+  __m128i row_3 = avg3_3;
+  __m128i avg2_avg3_left[4][2];
+  int i, j;
+  (void)bd;
+
+  avg2_avg3_left[0][0] = _mm_unpacklo_epi16(avg2_left_0, avg3_left_0);
+  avg2_avg3_left[0][1] = _mm_unpackhi_epi16(avg2_left_0, avg3_left_0);
+  avg2_avg3_left[1][0] = _mm_unpacklo_epi16(avg2_left_1, avg3_left_1);
+  avg2_avg3_left[1][1] = _mm_unpackhi_epi16(avg2_left_1, avg3_left_1);
+  avg2_avg3_left[2][0] = _mm_unpacklo_epi16(avg2_left_2, avg3_left_2);
+  avg2_avg3_left[2][1] = _mm_unpackhi_epi16(avg2_left_2, avg3_left_2);
+  avg2_avg3_left[3][0] = _mm_unpacklo_epi16(avg2_left_3, avg3_left_3);
+  avg2_avg3_left[3][1] = _mm_unpackhi_epi16(avg2_left_3, avg3_left_3);
+
+  for (j = 0; j < 4; ++j) {
+    for (i = 0; i < 2; ++i) {
+      const __m128i avg2_avg3 = avg2_avg3_left[j][i];
+      row_3 = _mm_alignr_epi8(row_3, row_2, 12);
+      row_2 = _mm_alignr_epi8(row_2, row_1, 12);
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 12), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      _mm_store_si128((__m128i *)(dst + 16), row_2);
+      _mm_store_si128((__m128i *)(dst + 24), row_3);
+      dst += stride;
+      row_3 = _mm_alignr_epi8(row_3, row_2, 12);
+      row_2 = _mm_alignr_epi8(row_2, row_1, 12);
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 8), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      _mm_store_si128((__m128i *)(dst + 16), row_2);
+      _mm_store_si128((__m128i *)(dst + 24), row_3);
+      dst += stride;
+      row_3 = _mm_alignr_epi8(row_3, row_2, 12);
+      row_2 = _mm_alignr_epi8(row_2, row_1, 12);
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 4), 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      _mm_store_si128((__m128i *)(dst + 16), row_2);
+      _mm_store_si128((__m128i *)(dst + 24), row_3);
+      dst += stride;
+      row_3 = _mm_alignr_epi8(row_3, row_2, 12);
+      row_2 = _mm_alignr_epi8(row_2, row_1, 12);
+      row_1 = _mm_alignr_epi8(row_1, row_0, 12);
+      row_0 = _mm_alignr_epi8(row_0, avg2_avg3, 12);
+      _mm_store_si128((__m128i *)dst, row_0);
+      _mm_store_si128((__m128i *)(dst + 8), row_1);
+      _mm_store_si128((__m128i *)(dst + 16), row_2);
+      _mm_store_si128((__m128i *)(dst + 24), row_3);
+      dst += stride;
+    }
+  }
+}
diff --git a/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c b/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c
new file mode 100644
index 000000000..94c68885c
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/highbd_loopfilter_avx2.c
@@ -0,0 +1,873 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+#include "aom_dsp/x86/common_avx2.h"
+#include "aom_dsp/x86/lpf_common_sse2.h"
+#include "aom/aom_integer.h"
+
+#if !CONFIG_PARALLEL_DEBLOCKING || !CONFIG_CB4X4
+static INLINE void get_limit(const uint8_t *bl, const uint8_t *l,
+                             const uint8_t *t, int bd, __m256i *blt,
+                             __m256i *lt, __m256i *thr) {
+  const int shift = bd - 8;
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)bl), zero);
+  __m256i y = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
+  *blt = _mm256_slli_epi16(y, shift);
+
+  x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)l), zero);
+  y = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
+  *lt = _mm256_slli_epi16(y, shift);
+
+  x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)t), zero);
+  y = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
+  *thr = _mm256_slli_epi16(y, shift);
+}
+
+static INLINE void load_highbd_pixel(const uint16_t *s, int size, int pitch,
+                                     __m256i *p, __m256i *q) {
+  int i;
+  for (i = 0; i < size; i++) {
+    p[i] = _mm256_loadu_si256((__m256i *)(s - (i + 1) * pitch));
+    q[i] = _mm256_loadu_si256((__m256i *)(s + i * pitch));
+  }
+}
+
+static INLINE void highbd_hev_mask(const __m256i *p, const __m256i *q,
+                                   const __m256i *t, __m256i *hev) {
+  const __m256i abs_p1p0 = _mm256_abs_epi16(_mm256_sub_epi16(p[1], p[0]));
+  const __m256i abs_q1q0 = _mm256_abs_epi16(_mm256_sub_epi16(q[1], q[0]));
+  __m256i h = _mm256_max_epi16(abs_p1p0, abs_q1q0);
+  h = _mm256_subs_epu16(h, *t);
+
+  const __m256i ffff = _mm256_set1_epi16(0xFFFF);
+  const __m256i zero = _mm256_setzero_si256();
+  *hev = _mm256_xor_si256(_mm256_cmpeq_epi16(h, zero), ffff);
+}
+
+static INLINE void highbd_filter_mask(const __m256i *p, const __m256i *q,
+                                      const __m256i *l, const __m256i *bl,
+                                      __m256i *mask) {
+  __m256i abs_p0q0 = _mm256_abs_epi16(_mm256_sub_epi16(p[0], q[0]));
+  __m256i abs_p1q1 = _mm256_abs_epi16(_mm256_sub_epi16(p[1], q[1]));
+  abs_p0q0 = _mm256_adds_epu16(abs_p0q0, abs_p0q0);
+  abs_p1q1 = _mm256_srli_epi16(abs_p1q1, 1);
+
+  const __m256i zero = _mm256_setzero_si256();
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i ffff = _mm256_set1_epi16(0xFFFF);
+  __m256i max = _mm256_subs_epu16(_mm256_adds_epu16(abs_p0q0, abs_p1q1), *bl);
+  max = _mm256_xor_si256(_mm256_cmpeq_epi16(max, zero), ffff);
+  max = _mm256_and_si256(max, _mm256_adds_epu16(*l, one));
+
+  int i;
+  for (i = 1; i < 4; ++i) {
+    max = _mm256_max_epi16(max,
+                           _mm256_abs_epi16(_mm256_sub_epi16(p[i], p[i - 1])));
+    max = _mm256_max_epi16(max,
+                           _mm256_abs_epi16(_mm256_sub_epi16(q[i], q[i - 1])));
+  }
+  max = _mm256_subs_epu16(max, *l);
+  *mask = _mm256_cmpeq_epi16(max, zero);  // return ~mask
+}
+
+static INLINE void flat_mask_internal(const __m256i *th, const __m256i *p,
+                                      const __m256i *q, int bd, int start,
+                                      int end, __m256i *flat) {
+  __m256i max = _mm256_setzero_si256();
+  int i;
+  for (i = start; i < end; ++i) {
+    max = _mm256_max_epi16(max, _mm256_abs_epi16(_mm256_sub_epi16(p[i], p[0])));
+    max = _mm256_max_epi16(max, _mm256_abs_epi16(_mm256_sub_epi16(q[i], q[0])));
+  }
+
+  __m256i ft;
+  if (bd == 8)
+    ft = _mm256_subs_epu16(max, *th);
+  else if (bd == 10)
+    ft = _mm256_subs_epu16(max, _mm256_slli_epi16(*th, 2));
+  else  // bd == 12
+    ft = _mm256_subs_epu16(max, _mm256_slli_epi16(*th, 4));
+
+  const __m256i zero = _mm256_setzero_si256();
+  *flat = _mm256_cmpeq_epi16(ft, zero);
+}
+
+// Note:
+//  Access p[3-1], p[0], and q[3-1], q[0]
+static INLINE void highbd_flat_mask4(const __m256i *th, const __m256i *p,
+                                     const __m256i *q, __m256i *flat, int bd) {
+  // check the distance 1,2,3 against 0
+  flat_mask_internal(th, p, q, bd, 1, 4, flat);
+}
+
+// Note:
+//  access p[7-4], p[0], and q[7-4], q[0]
+static INLINE void highbd_flat_mask5(const __m256i *th, const __m256i *p,
+                                     const __m256i *q, __m256i *flat, int bd) {
+  flat_mask_internal(th, p, q, bd, 4, 8, flat);
+}
+
+static INLINE void pixel_clamp(const __m256i *min, const __m256i *max,
+                               __m256i *pixel) {
+  __m256i clamped, mask;
+
+  mask = _mm256_cmpgt_epi16(*pixel, *max);
+  clamped = _mm256_andnot_si256(mask, *pixel);
+  mask = _mm256_and_si256(mask, *max);
+  clamped = _mm256_or_si256(mask, clamped);
+
+  mask = _mm256_cmpgt_epi16(clamped, *min);
+  clamped = _mm256_and_si256(mask, clamped);
+  mask = _mm256_andnot_si256(mask, *min);
+  *pixel = _mm256_or_si256(clamped, mask);
+}
+
+static INLINE void highbd_filter4(__m256i *p, __m256i *q, const __m256i *mask,
+                                  const __m256i *th, int bd, __m256i *ps,
+                                  __m256i *qs) {
+  __m256i t80;
+  if (bd == 8)
+    t80 = _mm256_set1_epi16(0x80);
+  else if (bd == 10)
+    t80 = _mm256_set1_epi16(0x200);
+  else  // bd == 12
+    t80 = _mm256_set1_epi16(0x800);
+
+  __m256i ps0 = _mm256_subs_epi16(p[0], t80);
+  __m256i ps1 = _mm256_subs_epi16(p[1], t80);
+  __m256i qs0 = _mm256_subs_epi16(q[0], t80);
+  __m256i qs1 = _mm256_subs_epi16(q[1], t80);
+
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i pmax = _mm256_subs_epi16(
+      _mm256_subs_epi16(_mm256_slli_epi16(one, bd), one), t80);
+  const __m256i zero = _mm256_setzero_si256();
+  const __m256i pmin = _mm256_subs_epi16(zero, t80);
+
+  __m256i filter = _mm256_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filter);
+
+  __m256i hev;
+  highbd_hev_mask(p, q, th, &hev);
+  filter = _mm256_and_si256(filter, hev);
+
+  const __m256i x = _mm256_subs_epi16(qs0, ps0);
+  filter = _mm256_adds_epi16(filter, x);
+  filter = _mm256_adds_epi16(filter, x);
+  filter = _mm256_adds_epi16(filter, x);
+  pixel_clamp(&pmin, &pmax, &filter);
+  filter = _mm256_and_si256(filter, *mask);
+
+  const __m256i t3 = _mm256_set1_epi16(3);
+  const __m256i t4 = _mm256_set1_epi16(4);
+
+  __m256i filter1 = _mm256_adds_epi16(filter, t4);
+  __m256i filter2 = _mm256_adds_epi16(filter, t3);
+  pixel_clamp(&pmin, &pmax, &filter1);
+  pixel_clamp(&pmin, &pmax, &filter2);
+  filter1 = _mm256_srai_epi16(filter1, 3);
+  filter2 = _mm256_srai_epi16(filter2, 3);
+
+  qs0 = _mm256_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &qs0);
+  ps0 = _mm256_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &ps0);
+
+  qs[0] = _mm256_adds_epi16(qs0, t80);
+  ps[0] = _mm256_adds_epi16(ps0, t80);
+
+  filter = _mm256_adds_epi16(filter1, one);
+  filter = _mm256_srai_epi16(filter, 1);
+  filter = _mm256_andnot_si256(hev, filter);
+
+  qs1 = _mm256_subs_epi16(qs1, filter);
+  pixel_clamp(&pmin, &pmax, &qs1);
+  ps1 = _mm256_adds_epi16(ps1, filter);
+  pixel_clamp(&pmin, &pmax, &ps1);
+
+  qs[1] = _mm256_adds_epi16(qs1, t80);
+  ps[1] = _mm256_adds_epi16(ps1, t80);
+}
+#endif  // #if !CONFIG_PARALLEL_DEBLOCKING || !CONFIG_CB4X4
+
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int p,
+                                            const uint8_t *blt,
+                                            const uint8_t *lt,
+                                            const uint8_t *thr, int bd) {
+  aom_highbd_lpf_horizontal_edge_16_sse2(s, p, blt, lt, thr, bd);
+}
+
+void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int p,
+                                          const uint8_t *blt, const uint8_t *lt,
+                                          const uint8_t *thr, int bd) {
+  aom_highbd_lpf_vertical_16_dual_sse2(s, p, blt, lt, thr, bd);
+}
+
+void aom_highbd_lpf_horizontal_4_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  aom_highbd_lpf_horizontal_4_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
+                                        limit1, thresh1, bd);
+}
+
+void aom_highbd_lpf_horizontal_8_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  aom_highbd_lpf_horizontal_8_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
+                                        limit1, thresh1, bd);
+}
+
+void aom_highbd_lpf_vertical_4_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  aom_highbd_lpf_vertical_4_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
+                                      limit1, thresh1, bd);
+}
+
+void aom_highbd_lpf_vertical_8_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  aom_highbd_lpf_vertical_8_dual_sse2(s, p, blimit0, limit0, thresh0, blimit1,
+                                      limit1, thresh1, bd);
+}
+#else
+void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int pitch,
+                                            const uint8_t *blt,
+                                            const uint8_t *lt,
+                                            const uint8_t *thr, int bd) {
+  __m256i blimit, limit, thresh;
+  get_limit(blt, lt, thr, bd, &blimit, &limit, &thresh);
+
+  __m256i p[8], q[8];
+  load_highbd_pixel(s, 8, pitch, p, q);
+
+  __m256i mask;
+  highbd_filter_mask(p, q, &limit, &blimit, &mask);
+
+  __m256i flat, flat2;
+  const __m256i one = _mm256_set1_epi16(1);
+  highbd_flat_mask4(&one, p, q, &flat, bd);
+  highbd_flat_mask5(&one, p, q, &flat2, bd);
+
+  flat = _mm256_and_si256(flat, mask);
+  flat2 = _mm256_and_si256(flat2, flat);
+
+  __m256i ps[2], qs[2];
+  highbd_filter4(p, q, &mask, &thresh, bd, ps, qs);
+
+  // flat and wide flat calculations
+  __m256i flat_p[3], flat_q[3];
+  __m256i flat2_p[7], flat2_q[7];
+  {
+    const __m256i eight = _mm256_set1_epi16(8);
+    const __m256i four = _mm256_set1_epi16(4);
+
+    __m256i sum_p = _mm256_add_epi16(_mm256_add_epi16(p[6], p[5]),
+                                     _mm256_add_epi16(p[4], p[3]));
+    __m256i sum_q = _mm256_add_epi16(_mm256_add_epi16(q[6], q[5]),
+                                     _mm256_add_epi16(q[4], q[3]));
+
+    __m256i sum_lp = _mm256_add_epi16(p[0], _mm256_add_epi16(p[2], p[1]));
+    sum_p = _mm256_add_epi16(sum_p, sum_lp);
+
+    __m256i sum_lq = _mm256_add_epi16(q[0], _mm256_add_epi16(q[2], q[1]));
+    sum_q = _mm256_add_epi16(sum_q, sum_lq);
+    sum_p = _mm256_add_epi16(eight, _mm256_add_epi16(sum_p, sum_q));
+    sum_lp = _mm256_add_epi16(four, _mm256_add_epi16(sum_lp, sum_lq));
+
+    flat2_p[0] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_p, _mm256_add_epi16(p[7], p[0])), 4);
+    flat2_q[0] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_p, _mm256_add_epi16(q[7], q[0])), 4);
+    flat_p[0] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lp, _mm256_add_epi16(p[3], p[0])), 3);
+    flat_q[0] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lp, _mm256_add_epi16(q[3], q[0])), 3);
+
+    __m256i sum_p7 = _mm256_add_epi16(p[7], p[7]);
+    __m256i sum_q7 = _mm256_add_epi16(q[7], q[7]);
+    __m256i sum_p3 = _mm256_add_epi16(p[3], p[3]);
+    __m256i sum_q3 = _mm256_add_epi16(q[3], q[3]);
+
+    sum_q = _mm256_sub_epi16(sum_p, p[6]);
+    sum_p = _mm256_sub_epi16(sum_p, q[6]);
+    flat2_p[1] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_p, _mm256_add_epi16(sum_p7, p[1])), 4);
+    flat2_q[1] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_q, _mm256_add_epi16(sum_q7, q[1])), 4);
+
+    sum_lq = _mm256_sub_epi16(sum_lp, p[2]);
+    sum_lp = _mm256_sub_epi16(sum_lp, q[2]);
+    flat_p[1] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lp, _mm256_add_epi16(sum_p3, p[1])), 3);
+    flat_q[1] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lq, _mm256_add_epi16(sum_q3, q[1])), 3);
+
+    sum_p7 = _mm256_add_epi16(sum_p7, p[7]);
+    sum_q7 = _mm256_add_epi16(sum_q7, q[7]);
+    sum_p3 = _mm256_add_epi16(sum_p3, p[3]);
+    sum_q3 = _mm256_add_epi16(sum_q3, q[3]);
+
+    sum_p = _mm256_sub_epi16(sum_p, q[5]);
+    sum_q = _mm256_sub_epi16(sum_q, p[5]);
+    flat2_p[2] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_p, _mm256_add_epi16(sum_p7, p[2])), 4);
+    flat2_q[2] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_q, _mm256_add_epi16(sum_q7, q[2])), 4);
+
+    sum_lp = _mm256_sub_epi16(sum_lp, q[1]);
+    sum_lq = _mm256_sub_epi16(sum_lq, p[1]);
+    flat_p[2] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lp, _mm256_add_epi16(sum_p3, p[2])), 3);
+    flat_q[2] = _mm256_srli_epi16(
+        _mm256_add_epi16(sum_lq, _mm256_add_epi16(sum_q3, q[2])), 3);
+
+    int i;
+    for (i = 3; i < 7; ++i) {
+      sum_p7 = _mm256_add_epi16(sum_p7, p[7]);
+      sum_q7 = _mm256_add_epi16(sum_q7, q[7]);
+      sum_p = _mm256_sub_epi16(sum_p, q[7 - i]);
+      sum_q = _mm256_sub_epi16(sum_q, p[7 - i]);
+      flat2_p[i] = _mm256_srli_epi16(
+          _mm256_add_epi16(sum_p, _mm256_add_epi16(sum_p7, p[i])), 4);
+      flat2_q[i] = _mm256_srli_epi16(
+          _mm256_add_epi16(sum_q, _mm256_add_epi16(sum_q7, q[i])), 4);
+    }
+  }
+
+  // highbd_filter8
+  p[2] = _mm256_andnot_si256(flat, p[2]);
+  //  p2 remains unchanged if !(flat && mask)
+  flat_p[2] = _mm256_and_si256(flat, flat_p[2]);
+  //  when (flat && mask)
+  p[2] = _mm256_or_si256(p[2], flat_p[2]);  // full list of p2 values
+  q[2] = _mm256_andnot_si256(flat, q[2]);
+  flat_q[2] = _mm256_and_si256(flat, flat_q[2]);
+  q[2] = _mm256_or_si256(q[2], flat_q[2]);  // full list of q2 values
+
+  int i;
+  for (i = 1; i >= 0; i--) {
+    ps[i] = _mm256_andnot_si256(flat, ps[i]);
+    flat_p[i] = _mm256_and_si256(flat, flat_p[i]);
+    p[i] = _mm256_or_si256(ps[i], flat_p[i]);
+    qs[i] = _mm256_andnot_si256(flat, qs[i]);
+    flat_q[i] = _mm256_and_si256(flat, flat_q[i]);
+    q[i] = _mm256_or_si256(qs[i], flat_q[i]);
+  }
+
+  // highbd_filter16
+
+  for (i = 6; i >= 0; i--) {
+    //  p[i] remains unchanged if !(flat2 && flat && mask)
+    p[i] = _mm256_andnot_si256(flat2, p[i]);
+    flat2_p[i] = _mm256_and_si256(flat2, flat2_p[i]);
+    //  get values for when (flat2 && flat && mask)
+    p[i] = _mm256_or_si256(p[i], flat2_p[i]);  // full list of p values
+
+    q[i] = _mm256_andnot_si256(flat2, q[i]);
+    flat2_q[i] = _mm256_and_si256(flat2, flat2_q[i]);
+    q[i] = _mm256_or_si256(q[i], flat2_q[i]);
+    _mm256_storeu_si256((__m256i *)(s - (i + 1) * pitch), p[i]);
+    _mm256_storeu_si256((__m256i *)(s + i * pitch), q[i]);
+  }
+}
+
+static INLINE void highbd_transpose16x16(uint16_t *src, int src_p,
+                                         uint16_t *dst, int dst_p) {
+  __m256i x[16];
+  int i;
+  for (i = 0; i < 16; ++i) {
+    x[i] = _mm256_loadu_si256((const __m256i *)src);
+    src += src_p;
+  }
+  mm256_transpose_16x16(x, x);
+  for (i = 0; i < 16; ++i) {
+    _mm256_storeu_si256((__m256i *)dst, x[i]);
+    dst += dst_p;
+  }
+}
+
+void aom_highbd_lpf_vertical_16_dual_avx2(uint16_t *s, int p,
+                                          const uint8_t *blimit,
+                                          const uint8_t *limit,
+                                          const uint8_t *thresh, int bd) {
+  DECLARE_ALIGNED(16, uint16_t, t_dst[256]);
+
+  //  Transpose 16x16
+  highbd_transpose16x16(s - 8, p, t_dst, 16);
+
+  //  Loop filtering
+  aom_highbd_lpf_horizontal_edge_16_avx2(t_dst + 8 * 16, 16, blimit, limit,
+                                         thresh, bd);
+
+  //  Transpose back
+  highbd_transpose16x16(t_dst, 16, s - 8, p);
+}
+
+static INLINE void get_dual_limit(const uint8_t *b0, const uint8_t *l0,
+                                  const uint8_t *t0, const uint8_t *b1,
+                                  const uint8_t *l1, const uint8_t *t1, int bd,
+                                  __m256i *blt, __m256i *lt, __m256i *thr) {
+  const __m128i z128 = _mm_setzero_si128();
+  const __m128i blimit0 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)b0), z128);
+  const __m128i limit0 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)l0), z128);
+  const __m128i thresh0 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)t0), z128);
+  const __m128i blimit1 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)b1), z128);
+  const __m128i limit1 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)l1), z128);
+  const __m128i thresh1 =
+      _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)t1), z128);
+
+  *blt = _mm256_inserti128_si256(_mm256_castsi128_si256(blimit0), blimit1, 1);
+  *lt = _mm256_inserti128_si256(_mm256_castsi128_si256(limit0), limit1, 1);
+  *thr = _mm256_inserti128_si256(_mm256_castsi128_si256(thresh0), thresh1, 1);
+
+  int shift = bd - 8;
+  *blt = _mm256_slli_epi16(*blt, shift);
+  *lt = _mm256_slli_epi16(*lt, shift);
+  *thr = _mm256_slli_epi16(*thr, shift);
+}
+
+void aom_highbd_lpf_horizontal_4_dual_avx2(
+    uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
+    const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
+    const uint8_t *_thresh1, int bd) {
+  __m256i p3 = _mm256_loadu_si256((__m256i *)(s - 4 * p));
+  __m256i p2 = _mm256_loadu_si256((__m256i *)(s - 3 * p));
+  __m256i p1 = _mm256_loadu_si256((__m256i *)(s - 2 * p));
+  __m256i p0 = _mm256_loadu_si256((__m256i *)(s - 1 * p));
+  __m256i q0 = _mm256_loadu_si256((__m256i *)(s - 0 * p));
+  __m256i q1 = _mm256_loadu_si256((__m256i *)(s + 1 * p));
+  __m256i q2 = _mm256_loadu_si256((__m256i *)(s + 2 * p));
+  __m256i q3 = _mm256_loadu_si256((__m256i *)(s + 3 * p));
+
+  const __m256i abs_p1p0 = _mm256_abs_epi16(_mm256_sub_epi16(p1, p0));
+  const __m256i abs_q1q0 = _mm256_abs_epi16(_mm256_sub_epi16(q1, q0));
+
+  __m256i abs_p0q0 = _mm256_abs_epi16(_mm256_sub_epi16(p0, q0));
+  __m256i abs_p1q1 = _mm256_abs_epi16(_mm256_sub_epi16(p1, q1));
+
+  __m256i blimit, limit, thresh;
+  get_dual_limit(_blimit0, _limit0, _thresh0, _blimit1, _limit1, _thresh1, bd,
+                 &blimit, &limit, &thresh);
+
+  __m256i t80, tff80, tffe0, t1f, t7f;
+  if (bd == 8) {
+    t80 = _mm256_set1_epi16(0x80);
+    tff80 = _mm256_set1_epi16(0xff80);
+    tffe0 = _mm256_set1_epi16(0xffe0);
+    t1f = _mm256_srli_epi16(_mm256_set1_epi16(0x1fff), 8);
+    t7f = _mm256_srli_epi16(_mm256_set1_epi16(0x7fff), 8);
+  } else if (bd == 10) {
+    t80 = _mm256_slli_epi16(_mm256_set1_epi16(0x80), 2);
+    tff80 = _mm256_slli_epi16(_mm256_set1_epi16(0xff80), 2);
+    tffe0 = _mm256_slli_epi16(_mm256_set1_epi16(0xffe0), 2);
+    t1f = _mm256_srli_epi16(_mm256_set1_epi16(0x1fff), 6);
+    t7f = _mm256_srli_epi16(_mm256_set1_epi16(0x7fff), 6);
+  } else {  // bd == 12
+    t80 = _mm256_slli_epi16(_mm256_set1_epi16(0x80), 4);
+    tff80 = _mm256_slli_epi16(_mm256_set1_epi16(0xff80), 4);
+    tffe0 = _mm256_slli_epi16(_mm256_set1_epi16(0xffe0), 4);
+    t1f = _mm256_srli_epi16(_mm256_set1_epi16(0x1fff), 4);
+    t7f = _mm256_srli_epi16(_mm256_set1_epi16(0x7fff), 4);
+  }
+
+  __m256i ps1 =
+      _mm256_subs_epi16(_mm256_loadu_si256((__m256i *)(s - 2 * p)), t80);
+  __m256i ps0 =
+      _mm256_subs_epi16(_mm256_loadu_si256((__m256i *)(s - 1 * p)), t80);
+  __m256i qs0 =
+      _mm256_subs_epi16(_mm256_loadu_si256((__m256i *)(s + 0 * p)), t80);
+  __m256i qs1 =
+      _mm256_subs_epi16(_mm256_loadu_si256((__m256i *)(s + 1 * p)), t80);
+
+  // filter_mask and hev_mask
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i flat = _mm256_max_epi16(abs_p1p0, abs_q1q0);
+  __m256i hev = _mm256_subs_epu16(flat, thresh);
+  const __m256i ffff = _mm256_set1_epi16(0xFFFF);
+  hev = _mm256_xor_si256(_mm256_cmpeq_epi16(hev, zero), ffff);
+
+  abs_p0q0 = _mm256_adds_epu16(abs_p0q0, abs_p0q0);
+  abs_p1q1 = _mm256_srli_epi16(abs_p1q1, 1);
+  __m256i mask =
+      _mm256_subs_epu16(_mm256_adds_epu16(abs_p0q0, abs_p1q1), blimit);
+  mask = _mm256_xor_si256(_mm256_cmpeq_epi16(mask, zero), ffff);
+  // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit) * -1;
+  // So taking maximums continues to work:
+  const __m256i one = _mm256_set1_epi16(1);
+  mask = _mm256_and_si256(mask, _mm256_adds_epu16(limit, one));
+  mask = _mm256_max_epi16(flat, mask);
+  // mask |= (abs(p1 - p0) > limit) * -1;
+  // mask |= (abs(q1 - q0) > limit) * -1;
+  __m256i work = _mm256_max_epi16(
+      _mm256_or_si256(_mm256_subs_epu16(p2, p1), _mm256_subs_epu16(p1, p2)),
+      _mm256_or_si256(_mm256_subs_epu16(p3, p2), _mm256_subs_epu16(p2, p3)));
+  mask = _mm256_max_epi16(work, mask);
+  work = _mm256_max_epi16(
+      _mm256_or_si256(_mm256_subs_epu16(q2, q1), _mm256_subs_epu16(q1, q2)),
+      _mm256_or_si256(_mm256_subs_epu16(q3, q2), _mm256_subs_epu16(q2, q3)));
+  mask = _mm256_max_epi16(work, mask);
+  mask = _mm256_subs_epu16(mask, limit);
+  mask = _mm256_cmpeq_epi16(mask, zero);
+
+  // filter4
+  const __m256i pmax = _mm256_subs_epi16(
+      _mm256_subs_epi16(_mm256_slli_epi16(one, bd), one), t80);
+  const __m256i pmin = _mm256_subs_epi16(zero, t80);
+
+  __m256i filt = _mm256_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filt);
+  filt = _mm256_and_si256(filt, hev);
+  __m256i work_a = _mm256_subs_epi16(qs0, ps0);
+  filt = _mm256_adds_epi16(filt, work_a);
+  filt = _mm256_adds_epi16(filt, work_a);
+  filt = _mm256_adds_epi16(filt, work_a);
+  pixel_clamp(&pmin, &pmax, &filt);
+
+  // (aom_filter + 3 * (qs0 - ps0)) & mask
+  filt = _mm256_and_si256(filt, mask);
+
+  const __m256i t4 = _mm256_set1_epi16(4);
+  const __m256i t3 = _mm256_set1_epi16(3);
+
+  __m256i filter1 = _mm256_adds_epi16(filt, t4);
+  pixel_clamp(&pmin, &pmax, &filter1);
+  __m256i filter2 = _mm256_adds_epi16(filt, t3);
+  pixel_clamp(&pmin, &pmax, &filter2);
+
+  // Filter1 >> 3
+  work_a = _mm256_cmpgt_epi16(zero, filter1);  // get the values that are <0
+  filter1 = _mm256_srli_epi16(filter1, 3);
+  work_a = _mm256_and_si256(work_a, tffe0);    // sign bits for the values < 0
+  filter1 = _mm256_and_si256(filter1, t1f);    // clamp the range
+  filter1 = _mm256_or_si256(filter1, work_a);  // reinsert the sign bits
+
+  // Filter2 >> 3
+  work_a = _mm256_cmpgt_epi16(zero, filter2);
+  filter2 = _mm256_srli_epi16(filter2, 3);
+  work_a = _mm256_and_si256(work_a, tffe0);
+  filter2 = _mm256_and_si256(filter2, t1f);
+  filter2 = _mm256_or_si256(filter2, work_a);
+
+  // filt >> 1
+  // equivalent to shifting 0x1f left by bitdepth - 8
+  // and setting new bits to 1
+  filt = _mm256_adds_epi16(filter1, one);
+  work_a = _mm256_cmpgt_epi16(zero, filt);
+  filt = _mm256_srli_epi16(filt, 1);
+  work_a = _mm256_and_si256(work_a, tff80);
+  filt = _mm256_and_si256(filt, t7f);
+  filt = _mm256_or_si256(filt, work_a);
+
+  filt = _mm256_andnot_si256(hev, filt);
+
+  filter1 = _mm256_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &filter1);
+  q0 = _mm256_adds_epi16(filter1, t80);
+
+  filter1 = _mm256_subs_epi16(qs1, filt);
+  pixel_clamp(&pmin, &pmax, &filter1);
+  q1 = _mm256_adds_epi16(filter1, t80);
+
+  filter2 = _mm256_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &filter2);
+  p0 = _mm256_adds_epi16(filter2, t80);
+
+  filter2 = _mm256_adds_epi16(ps1, filt);
+  pixel_clamp(&pmin, &pmax, &filter2);
+  p1 = _mm256_adds_epi16(filter2, t80);
+
+  _mm256_storeu_si256((__m256i *)(s - 2 * p), p1);
+  _mm256_storeu_si256((__m256i *)(s - 1 * p), p0);
+  _mm256_storeu_si256((__m256i *)(s + 0 * p), q0);
+  _mm256_storeu_si256((__m256i *)(s + 1 * p), q1);
+}
+
+void aom_highbd_lpf_horizontal_8_dual_avx2(
+    uint16_t *s, int p, const uint8_t *_blimit0, const uint8_t *_limit0,
+    const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
+    const uint8_t *_thresh1, int bd) {
+  DECLARE_ALIGNED(16, uint16_t, flat_op2[16]);
+  DECLARE_ALIGNED(16, uint16_t, flat_op1[16]);
+  DECLARE_ALIGNED(16, uint16_t, flat_op0[16]);
+  DECLARE_ALIGNED(16, uint16_t, flat_oq2[16]);
+  DECLARE_ALIGNED(16, uint16_t, flat_oq1[16]);
+  DECLARE_ALIGNED(16, uint16_t, flat_oq0[16]);
+
+  __m256i p3 = _mm256_loadu_si256((__m256i *)(s - 4 * p));
+  __m256i q3 = _mm256_loadu_si256((__m256i *)(s + 3 * p));
+  __m256i p2 = _mm256_loadu_si256((__m256i *)(s - 3 * p));
+  __m256i q2 = _mm256_loadu_si256((__m256i *)(s + 2 * p));
+  __m256i p1 = _mm256_loadu_si256((__m256i *)(s - 2 * p));
+  __m256i q1 = _mm256_loadu_si256((__m256i *)(s + 1 * p));
+  __m256i p0 = _mm256_loadu_si256((__m256i *)(s - 1 * p));
+  __m256i q0 = _mm256_loadu_si256((__m256i *)(s + 0 * p));
+
+  __m256i blimit, limit, thresh;
+  get_dual_limit(_blimit0, _limit0, _thresh0, _blimit1, _limit1, _thresh1, bd,
+                 &blimit, &limit, &thresh);
+
+  __m256i t80;
+  if (bd == 8) {
+    t80 = _mm256_set1_epi16(0x80);
+  } else if (bd == 10) {
+    t80 = _mm256_set1_epi16(0x200);
+  } else {  // bd == 12
+    t80 = _mm256_set1_epi16(0x800);
+  }
+
+  __m256i ps1, ps0, qs0, qs1;
+  ps1 = _mm256_subs_epi16(p1, t80);
+  ps0 = _mm256_subs_epi16(p0, t80);
+  qs0 = _mm256_subs_epi16(q0, t80);
+  qs1 = _mm256_subs_epi16(q1, t80);
+
+  // filter_mask and hev_mask
+  __m256i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
+  abs_p1p0 = _mm256_abs_epi16(_mm256_sub_epi16(p1, p0));
+  abs_q1q0 = _mm256_abs_epi16(_mm256_sub_epi16(q1, q0));
+
+  abs_p0q0 = _mm256_abs_epi16(_mm256_sub_epi16(p0, q0));
+  abs_p1q1 = _mm256_abs_epi16(_mm256_sub_epi16(p1, q1));
+  __m256i flat = _mm256_max_epi16(abs_p1p0, abs_q1q0);
+  __m256i hev = _mm256_subs_epu16(flat, thresh);
+  const __m256i zero = _mm256_set1_epi16(0);
+  const __m256i ffff = _mm256_set1_epi16(0xFFFF);
+  hev = _mm256_xor_si256(_mm256_cmpeq_epi16(hev, zero), ffff);
+
+  abs_p0q0 = _mm256_adds_epu16(abs_p0q0, abs_p0q0);
+  abs_p1q1 = _mm256_srli_epi16(abs_p1q1, 1);
+  __m256i mask =
+      _mm256_subs_epu16(_mm256_adds_epu16(abs_p0q0, abs_p1q1), blimit);
+  mask = _mm256_xor_si256(_mm256_cmpeq_epi16(mask, zero), ffff);
+  // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit) * -1;
+  // So taking maximums continues to work:
+
+  const __m256i one = _mm256_set1_epi16(1);
+  mask = _mm256_and_si256(mask, _mm256_adds_epu16(limit, one));
+  mask = _mm256_max_epi16(abs_p1p0, mask);
+  // mask |= (abs(p1 - p0) > limit) * -1;
+  mask = _mm256_max_epi16(abs_q1q0, mask);
+  // mask |= (abs(q1 - q0) > limit) * -1;
+
+  work = _mm256_max_epi16(_mm256_abs_epi16(_mm256_sub_epi16(p2, p1)),
+                          _mm256_abs_epi16(_mm256_sub_epi16(q2, q1)));
+  mask = _mm256_max_epi16(work, mask);
+  work = _mm256_max_epi16(_mm256_abs_epi16(_mm256_sub_epi16(p3, p2)),
+                          _mm256_abs_epi16(_mm256_sub_epi16(q3, q2)));
+  mask = _mm256_max_epi16(work, mask);
+  mask = _mm256_subs_epu16(mask, limit);
+  mask = _mm256_cmpeq_epi16(mask, zero);
+
+  // flat_mask4
+  flat = _mm256_max_epi16(_mm256_abs_epi16(_mm256_sub_epi16(p2, p0)),
+                          _mm256_abs_epi16(_mm256_sub_epi16(q2, q0)));
+  work = _mm256_max_epi16(_mm256_abs_epi16(_mm256_sub_epi16(p3, p0)),
+                          _mm256_abs_epi16(_mm256_sub_epi16(q3, q0)));
+  flat = _mm256_max_epi16(work, flat);
+  flat = _mm256_max_epi16(abs_p1p0, flat);
+  flat = _mm256_max_epi16(abs_q1q0, flat);
+
+  if (bd == 8)
+    flat = _mm256_subs_epu16(flat, one);
+  else if (bd == 10)
+    flat = _mm256_subs_epu16(flat, _mm256_slli_epi16(one, 2));
+  else  // bd == 12
+    flat = _mm256_subs_epu16(flat, _mm256_slli_epi16(one, 4));
+
+  flat = _mm256_cmpeq_epi16(flat, zero);
+  flat = _mm256_and_si256(flat, mask);  // flat & mask
+
+  // Added before shift for rounding part of ROUND_POWER_OF_TWO
+  __m256i workp_a, workp_b, workp_shft;
+  workp_a =
+      _mm256_add_epi16(_mm256_add_epi16(p3, p3), _mm256_add_epi16(p2, p1));
+  const __m256i four = _mm256_set1_epi16(4);
+  workp_a = _mm256_add_epi16(_mm256_add_epi16(workp_a, four), p0);
+  workp_b = _mm256_add_epi16(_mm256_add_epi16(q0, p2), p3);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_op2[0], workp_shft);
+
+  workp_b = _mm256_add_epi16(_mm256_add_epi16(q0, q1), p1);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_op1[0], workp_shft);
+
+  workp_a = _mm256_add_epi16(_mm256_sub_epi16(workp_a, p3), q2);
+  workp_b = _mm256_add_epi16(_mm256_sub_epi16(workp_b, p1), p0);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_op0[0], workp_shft);
+
+  workp_a = _mm256_add_epi16(_mm256_sub_epi16(workp_a, p3), q3);
+  workp_b = _mm256_add_epi16(_mm256_sub_epi16(workp_b, p0), q0);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_oq0[0], workp_shft);
+
+  workp_a = _mm256_add_epi16(_mm256_sub_epi16(workp_a, p2), q3);
+  workp_b = _mm256_add_epi16(_mm256_sub_epi16(workp_b, q0), q1);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_oq1[0], workp_shft);
+
+  workp_a = _mm256_add_epi16(_mm256_sub_epi16(workp_a, p1), q3);
+  workp_b = _mm256_add_epi16(_mm256_sub_epi16(workp_b, q1), q2);
+  workp_shft = _mm256_srli_epi16(_mm256_add_epi16(workp_a, workp_b), 3);
+  _mm256_storeu_si256((__m256i *)&flat_oq2[0], workp_shft);
+
+  // lp filter
+  const __m256i pmax = _mm256_subs_epi16(
+      _mm256_subs_epi16(_mm256_slli_epi16(one, bd), one), t80);
+  const __m256i pmin = _mm256_subs_epi16(zero, t80);
+
+  __m256i filt, filter1, filter2, work_a;
+  filt = _mm256_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filt);
+  filt = _mm256_and_si256(filt, hev);
+  work_a = _mm256_subs_epi16(qs0, ps0);
+  filt = _mm256_adds_epi16(filt, work_a);
+  filt = _mm256_adds_epi16(filt, work_a);
+  filt = _mm256_adds_epi16(filt, work_a);
+  // (aom_filter + 3 * (qs0 - ps0)) & mask
+  pixel_clamp(&pmin, &pmax, &filt);
+  filt = _mm256_and_si256(filt, mask);
+
+  const __m256i t4 = _mm256_set1_epi16(4);
+  const __m256i t3 = _mm256_set1_epi16(3);
+
+  filter1 = _mm256_adds_epi16(filt, t4);
+  filter2 = _mm256_adds_epi16(filt, t3);
+
+  // Filter1 >> 3
+  pixel_clamp(&pmin, &pmax, &filter1);
+  filter1 = _mm256_srai_epi16(filter1, 3);
+
+  // Filter2 >> 3
+  pixel_clamp(&pmin, &pmax, &filter2);
+  filter2 = _mm256_srai_epi16(filter2, 3);
+
+  // filt >> 1
+  filt = _mm256_adds_epi16(filter1, one);
+  filt = _mm256_srai_epi16(filt, 1);
+  // filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
+  filt = _mm256_andnot_si256(hev, filt);
+
+  work_a = _mm256_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &work_a);
+  work_a = _mm256_adds_epi16(work_a, t80);
+  q0 = _mm256_loadu_si256((__m256i *)flat_oq0);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  q0 = _mm256_and_si256(flat, q0);
+  q0 = _mm256_or_si256(work_a, q0);
+
+  work_a = _mm256_subs_epi16(qs1, filt);
+  pixel_clamp(&pmin, &pmax, &work_a);
+  work_a = _mm256_adds_epi16(work_a, t80);
+  q1 = _mm256_loadu_si256((__m256i *)flat_oq1);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  q1 = _mm256_and_si256(flat, q1);
+  q1 = _mm256_or_si256(work_a, q1);
+
+  work_a = _mm256_loadu_si256((__m256i *)(s + 2 * p));
+  q2 = _mm256_loadu_si256((__m256i *)flat_oq2);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  q2 = _mm256_and_si256(flat, q2);
+  q2 = _mm256_or_si256(work_a, q2);
+
+  work_a = _mm256_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &work_a);
+  work_a = _mm256_adds_epi16(work_a, t80);
+  p0 = _mm256_loadu_si256((__m256i *)flat_op0);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  p0 = _mm256_and_si256(flat, p0);
+  p0 = _mm256_or_si256(work_a, p0);
+
+  work_a = _mm256_adds_epi16(ps1, filt);
+  pixel_clamp(&pmin, &pmax, &work_a);
+  work_a = _mm256_adds_epi16(work_a, t80);
+  p1 = _mm256_loadu_si256((__m256i *)flat_op1);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  p1 = _mm256_and_si256(flat, p1);
+  p1 = _mm256_or_si256(work_a, p1);
+
+  work_a = _mm256_loadu_si256((__m256i *)(s - 3 * p));
+  p2 = _mm256_loadu_si256((__m256i *)flat_op2);
+  work_a = _mm256_andnot_si256(flat, work_a);
+  p2 = _mm256_and_si256(flat, p2);
+  p2 = _mm256_or_si256(work_a, p2);
+
+  _mm256_storeu_si256((__m256i *)(s - 3 * p), p2);
+  _mm256_storeu_si256((__m256i *)(s - 2 * p), p1);
+  _mm256_storeu_si256((__m256i *)(s - 1 * p), p0);
+  _mm256_storeu_si256((__m256i *)(s + 0 * p), q0);
+  _mm256_storeu_si256((__m256i *)(s + 1 * p), q1);
+  _mm256_storeu_si256((__m256i *)(s + 2 * p), q2);
+}
+
+void aom_highbd_lpf_vertical_4_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]);
+  uint16_t *src[2];
+  uint16_t *dst[2];
+
+  // Transpose 8x16
+  highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
+
+  // Loop filtering
+  aom_highbd_lpf_horizontal_4_dual_avx2(t_dst + 4 * 16, 16, blimit0, limit0,
+                                        thresh0, blimit1, limit1, thresh1, bd);
+  src[0] = t_dst;
+  src[1] = t_dst + 8;
+  dst[0] = s - 4;
+  dst[1] = s - 4 + p * 8;
+
+  // Transpose back
+  highbd_transpose(src, 16, dst, p, 2);
+}
+
+void aom_highbd_lpf_vertical_8_dual_avx2(
+    uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+    const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+    const uint8_t *thresh1, int bd) {
+  DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]);
+  uint16_t *src[2];
+  uint16_t *dst[2];
+
+  // Transpose 8x16
+  highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
+
+  // Loop filtering
+  aom_highbd_lpf_horizontal_8_dual_avx2(t_dst + 4 * 16, 16, blimit0, limit0,
+                                        thresh0, blimit1, limit1, thresh1, bd);
+  src[0] = t_dst;
+  src[1] = t_dst + 8;
+
+  dst[0] = s - 4;
+  dst[1] = s - 4 + p * 8;
+
+  // Transpose back
+  highbd_transpose(src, 16, dst, p, 2);
+}
+#endif  // CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
diff --git a/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c b/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c
index 76369871b..0a399edf2 100644
--- a/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_loopfilter_sse2.c
@@ -12,135 +12,135 @@
 #include <emmintrin.h>  // SSE2
 
 #include "./aom_dsp_rtcd.h"
-#include "aom_ports/mem.h"
+#include "aom_dsp/x86/lpf_common_sse2.h"
 #include "aom_ports/emmintrin_compat.h"
+#include "aom_ports/mem.h"
 
-static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
-  __m128i ubounded;
-  __m128i lbounded;
-  __m128i retval;
+static INLINE void pixel_clamp(const __m128i *min, const __m128i *max,
+                               __m128i *pixel) {
+  __m128i clamped, mask;
 
-  const __m128i zero = _mm_set1_epi16(0);
-  const __m128i one = _mm_set1_epi16(1);
-  __m128i t80, max, min;
+  mask = _mm_cmpgt_epi16(*pixel, *max);
+  clamped = _mm_andnot_si128(mask, *pixel);
+  mask = _mm_and_si128(mask, *max);
+  clamped = _mm_or_si128(mask, clamped);
 
-  if (bd == 8) {
-    t80 = _mm_set1_epi16(0x80);
-    max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 8), one), t80);
-  } else if (bd == 10) {
-    t80 = _mm_set1_epi16(0x200);
-    max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 10), one), t80);
-  } else {  // bd == 12
-    t80 = _mm_set1_epi16(0x800);
-    max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 12), one), t80);
-  }
+  mask = _mm_cmpgt_epi16(clamped, *min);
+  clamped = _mm_and_si128(mask, clamped);
+  mask = _mm_andnot_si128(mask, *min);
+  *pixel = _mm_or_si128(clamped, mask);
+}
 
-  min = _mm_subs_epi16(zero, t80);
+static INLINE void get_limit(const uint8_t *bl, const uint8_t *l,
+                             const uint8_t *t, int bd, __m128i *blt,
+                             __m128i *lt, __m128i *thr) {
+  const int shift = bd - 8;
+  const __m128i zero = _mm_setzero_si128();
 
-  ubounded = _mm_cmpgt_epi16(value, max);
-  lbounded = _mm_cmplt_epi16(value, min);
-  retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value);
-  ubounded = _mm_and_si128(ubounded, max);
-  lbounded = _mm_and_si128(lbounded, min);
-  retval = _mm_or_si128(retval, ubounded);
-  retval = _mm_or_si128(retval, lbounded);
-  return retval;
-}
+  __m128i x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)bl), zero);
+  *blt = _mm_slli_epi16(x, shift);
 
-// TODO(debargha, peter): Break up large functions into smaller ones
-// in this file.
-void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
-                                           const uint8_t *_blimit,
-                                           const uint8_t *_limit,
-                                           const uint8_t *_thresh, int bd) {
-  const __m128i zero = _mm_set1_epi16(0);
-  const __m128i one = _mm_set1_epi16(1);
-  __m128i blimit, limit, thresh;
-  __m128i q7, p7, q6, p6, q5, p5, q4, p4, q3, p3, q2, p2, q1, p1, q0, p0;
-  __m128i mask, hev, flat, flat2, abs_p1p0, abs_q1q0;
-  __m128i ps1, qs1, ps0, qs0;
-  __m128i abs_p0q0, abs_p1q1, ffff, work;
-  __m128i filt, work_a, filter1, filter2;
-  __m128i flat2_q6, flat2_p6, flat2_q5, flat2_p5, flat2_q4, flat2_p4;
-  __m128i flat2_q3, flat2_p3, flat2_q2, flat2_p2, flat2_q1, flat2_p1;
-  __m128i flat2_q0, flat2_p0;
-  __m128i flat_q2, flat_p2, flat_q1, flat_p1, flat_q0, flat_p0;
-  __m128i pixelFilter_p, pixelFilter_q;
-  __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0;
-  __m128i sum_p7, sum_q7, sum_p3, sum_q3;
-  __m128i t4, t3, t80, t1;
-  __m128i eight, four;
+  x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)l), zero);
+  *lt = _mm_slli_epi16(x, shift);
 
-  if (bd == 8) {
-    blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
-    limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
-    thresh = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero);
-  } else if (bd == 10) {
-    blimit = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 2);
-    limit = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 2);
-    thresh = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 2);
-  } else {  // bd == 12
-    blimit = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero), 4);
-    limit = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero), 4);
-    thresh = _mm_slli_epi16(
-        _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_thresh), zero), 4);
+  x = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)t), zero);
+  *thr = _mm_slli_epi16(x, shift);
+}
+
+static INLINE void load_highbd_pixel(const uint16_t *s, int size, int pitch,
+                                     __m128i *p, __m128i *q) {
+  int i;
+  for (i = 0; i < size; i++) {
+    p[i] = _mm_loadu_si128((__m128i *)(s - (i + 1) * pitch));
+    q[i] = _mm_loadu_si128((__m128i *)(s + i * pitch));
   }
+}
+// _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1));
+static INLINE void highbd_hev_mask(const __m128i *p, const __m128i *q,
+                                   const __m128i *t, __m128i *hev) {
+  const __m128i abs_p1p0 =
+      _mm_or_si128(_mm_subs_epu16(p[1], p[0]), _mm_subs_epu16(p[0], p[1]));
+  const __m128i abs_q1q0 =
+      _mm_or_si128(_mm_subs_epu16(q[1], q[0]), _mm_subs_epu16(q[0], q[1]));
+  __m128i h = _mm_max_epi16(abs_p1p0, abs_q1q0);
+  h = _mm_subs_epu16(h, *t);
 
-  q4 = _mm_load_si128((__m128i *)(s + 4 * p));
-  p4 = _mm_load_si128((__m128i *)(s - 5 * p));
-  q3 = _mm_load_si128((__m128i *)(s + 3 * p));
-  p3 = _mm_load_si128((__m128i *)(s - 4 * p));
-  q2 = _mm_load_si128((__m128i *)(s + 2 * p));
-  p2 = _mm_load_si128((__m128i *)(s - 3 * p));
-  q1 = _mm_load_si128((__m128i *)(s + 1 * p));
-  p1 = _mm_load_si128((__m128i *)(s - 2 * p));
-  q0 = _mm_load_si128((__m128i *)(s + 0 * p));
-  p0 = _mm_load_si128((__m128i *)(s - 1 * p));
-
-  //  highbd_filter_mask
-  abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1));
-  abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1));
+  const __m128i ffff = _mm_set1_epi16(0xFFFF);
+  const __m128i zero = _mm_setzero_si128();
+  *hev = _mm_xor_si128(_mm_cmpeq_epi16(h, zero), ffff);
+}
 
-  ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0);
+static INLINE void highbd_filter_mask(const __m128i *p, const __m128i *q,
+                                      const __m128i *l, const __m128i *bl,
+                                      __m128i *mask) {
+  __m128i abs_p0q0 =
+      _mm_or_si128(_mm_subs_epu16(p[0], q[0]), _mm_subs_epu16(q[0], p[0]));
+  __m128i abs_p1q1 =
+      _mm_or_si128(_mm_subs_epu16(p[1], q[1]), _mm_subs_epu16(q[1], p[1]));
+  abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);
+  abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);
 
-  abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0));
-  abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1));
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i ffff = _mm_set1_epi16(0xFFFF);
+  __m128i max = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), *bl);
+  max = _mm_xor_si128(_mm_cmpeq_epi16(max, zero), ffff);
+  max = _mm_and_si128(max, _mm_adds_epu16(*l, one));
+
+  int i;
+  for (i = 1; i < 4; ++i) {
+    max = _mm_max_epi16(max, _mm_or_si128(_mm_subs_epu16(p[i], p[i - 1]),
+                                          _mm_subs_epu16(p[i - 1], p[i])));
+    max = _mm_max_epi16(max, _mm_or_si128(_mm_subs_epu16(q[i], q[i - 1]),
+                                          _mm_subs_epu16(q[i - 1], q[i])));
+  }
+  max = _mm_subs_epu16(max, *l);
+  *mask = _mm_cmpeq_epi16(max, zero);  // return ~mask
+}
 
-  //  highbd_hev_mask (in C code this is actually called from highbd_filter4)
-  flat = _mm_max_epi16(abs_p1p0, abs_q1q0);
-  hev = _mm_subs_epu16(flat, thresh);
-  hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff);
+static INLINE void flat_mask_internal(const __m128i *th, const __m128i *p,
+                                      const __m128i *q, int bd, int start,
+                                      int end, __m128i *flat) {
+  __m128i max = _mm_setzero_si128();
+  int i;
+  for (i = start; i < end; ++i) {
+    max = _mm_max_epi16(max, _mm_or_si128(_mm_subs_epu16(p[i], p[0]),
+                                          _mm_subs_epu16(p[0], p[i])));
+    max = _mm_max_epi16(max, _mm_or_si128(_mm_subs_epu16(q[i], q[0]),
+                                          _mm_subs_epu16(q[0], q[i])));
+  }
 
-  abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0);  // abs(p0 - q0) * 2
-  abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1);         // abs(p1 - q1) / 2
-  mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit);
-  mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff);
-  mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one));
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)),
-      _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1)));
-  mask = _mm_max_epi16(work, mask);
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)),
-      _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2)));
-  mask = _mm_max_epi16(work, mask);
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)),
-      _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3)));
-  mask = _mm_max_epi16(work, mask);
+  __m128i ft;
+  if (bd == 8)
+    ft = _mm_subs_epu16(max, *th);
+  else if (bd == 10)
+    ft = _mm_subs_epu16(max, _mm_slli_epi16(*th, 2));
+  else  // bd == 12
+    ft = _mm_subs_epu16(max, _mm_slli_epi16(*th, 4));
 
-  mask = _mm_subs_epu16(mask, limit);
-  mask = _mm_cmpeq_epi16(mask, zero);  // return ~mask
+  const __m128i zero = _mm_setzero_si128();
+  *flat = _mm_cmpeq_epi16(ft, zero);
+}
 
-  // lp filter
-  // highbd_filter4
-  t4 = _mm_set1_epi16(4);
-  t3 = _mm_set1_epi16(3);
+// Note:
+//  Access p[3-1], p[0], and q[3-1], q[0]
+static INLINE void highbd_flat_mask4(const __m128i *th, const __m128i *p,
+                                     const __m128i *q, __m128i *flat, int bd) {
+  // check the distance 1,2,3 against 0
+  flat_mask_internal(th, p, q, bd, 1, 4, flat);
+}
+
+// Note:
+//  access p[7-4], p[0], and q[7-4], q[0]
+static INLINE void highbd_flat_mask5(const __m128i *th, const __m128i *p,
+                                     const __m128i *q, __m128i *flat, int bd) {
+  flat_mask_internal(th, p, q, bd, 4, 8, flat);
+}
+
+static INLINE void highbd_filter4(__m128i *p, __m128i *q, const __m128i *mask,
+                                  const __m128i *th, int bd, __m128i *ps,
+                                  __m128i *qs) {
+  __m128i t80;
   if (bd == 8)
     t80 = _mm_set1_epi16(0x80);
   else if (bd == 10)
@@ -148,340 +148,283 @@ void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
   else  // bd == 12
     t80 = _mm_set1_epi16(0x800);
 
-  t1 = _mm_set1_epi16(0x1);
+  __m128i ps0 = _mm_subs_epi16(p[0], t80);
+  __m128i ps1 = _mm_subs_epi16(p[1], t80);
+  __m128i qs0 = _mm_subs_epi16(q[0], t80);
+  __m128i qs1 = _mm_subs_epi16(q[1], t80);
 
-  ps1 = _mm_subs_epi16(p1, t80);
-  qs1 = _mm_subs_epi16(q1, t80);
-  ps0 = _mm_subs_epi16(p0, t80);
-  qs0 = _mm_subs_epi16(q0, t80);
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i pmax =
+      _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i pmin = _mm_subs_epi16(zero, t80);
 
-  filt = _mm_and_si128(signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd),
-                       hev);
-  work_a = _mm_subs_epi16(qs0, ps0);
-  filt = _mm_adds_epi16(filt, work_a);
-  filt = _mm_adds_epi16(filt, work_a);
-  filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);
-  filt = _mm_and_si128(filt, mask);
-  filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);
-  filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd);
+  __m128i filter = _mm_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filter);
 
-  // Filter1 >> 3
-  filter1 = _mm_srai_epi16(filter1, 0x3);
-  filter2 = _mm_srai_epi16(filter2, 0x3);
+  __m128i hev;
+  highbd_hev_mask(p, q, th, &hev);
+  filter = _mm_and_si128(filter, hev);
 
-  qs0 = _mm_adds_epi16(
-      signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), t80);
-  ps0 = _mm_adds_epi16(
-      signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), t80);
-  filt = _mm_adds_epi16(filter1, t1);
-  filt = _mm_srai_epi16(filt, 1);
-  filt = _mm_andnot_si128(hev, filt);
-  qs1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd),
-                       t80);
-  ps1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd),
-                       t80);
+  const __m128i x = _mm_subs_epi16(qs0, ps0);
+  filter = _mm_adds_epi16(filter, x);
+  filter = _mm_adds_epi16(filter, x);
+  filter = _mm_adds_epi16(filter, x);
+  pixel_clamp(&pmin, &pmax, &filter);
+  filter = _mm_and_si128(filter, *mask);
 
-  // end highbd_filter4
-  // loopfilter done
+  const __m128i t3 = _mm_set1_epi16(3);
+  const __m128i t4 = _mm_set1_epi16(4);
 
-  // highbd_flat_mask4
-  flat = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p2, p0), _mm_subs_epu16(p0, p2)),
-      _mm_or_si128(_mm_subs_epu16(p3, p0), _mm_subs_epu16(p0, p3)));
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(q2, q0), _mm_subs_epu16(q0, q2)),
-      _mm_or_si128(_mm_subs_epu16(q3, q0), _mm_subs_epu16(q0, q3)));
-  flat = _mm_max_epi16(work, flat);
-  work = _mm_max_epi16(abs_p1p0, abs_q1q0);
-  flat = _mm_max_epi16(work, flat);
+  __m128i filter1 = _mm_adds_epi16(filter, t4);
+  __m128i filter2 = _mm_adds_epi16(filter, t3);
+  pixel_clamp(&pmin, &pmax, &filter1);
+  pixel_clamp(&pmin, &pmax, &filter2);
+  filter1 = _mm_srai_epi16(filter1, 3);
+  filter2 = _mm_srai_epi16(filter2, 3);
 
-  if (bd == 8)
-    flat = _mm_subs_epu16(flat, one);
-  else if (bd == 10)
-    flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 2));
-  else  // bd == 12
-    flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 4));
+  qs0 = _mm_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &qs0);
+  ps0 = _mm_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &ps0);
 
-  flat = _mm_cmpeq_epi16(flat, zero);
-  // end flat_mask4
+  qs[0] = _mm_adds_epi16(qs0, t80);
+  ps[0] = _mm_adds_epi16(ps0, t80);
 
-  // flat & mask = flat && mask (as used in filter8)
-  // (because, in both vars, each block of 16 either all 1s or all 0s)
-  flat = _mm_and_si128(flat, mask);
+  filter = _mm_adds_epi16(filter1, one);
+  filter = _mm_srai_epi16(filter, 1);
+  filter = _mm_andnot_si128(hev, filter);
 
-  p5 = _mm_load_si128((__m128i *)(s - 6 * p));
-  q5 = _mm_load_si128((__m128i *)(s + 5 * p));
-  p6 = _mm_load_si128((__m128i *)(s - 7 * p));
-  q6 = _mm_load_si128((__m128i *)(s + 6 * p));
-  p7 = _mm_load_si128((__m128i *)(s - 8 * p));
-  q7 = _mm_load_si128((__m128i *)(s + 7 * p));
+  qs1 = _mm_subs_epi16(qs1, filter);
+  pixel_clamp(&pmin, &pmax, &qs1);
+  ps1 = _mm_adds_epi16(ps1, filter);
+  pixel_clamp(&pmin, &pmax, &ps1);
 
-  // highbd_flat_mask5 (arguments passed in are p0, q0, p4-p7, q4-q7
-  // but referred to as p0-p4 & q0-q4 in fn)
-  flat2 = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p4, p0), _mm_subs_epu16(p0, p4)),
-      _mm_or_si128(_mm_subs_epu16(q4, q0), _mm_subs_epu16(q0, q4)));
+  qs[1] = _mm_adds_epi16(qs1, t80);
+  ps[1] = _mm_adds_epi16(ps1, t80);
+}
 
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p5, p0), _mm_subs_epu16(p0, p5)),
-      _mm_or_si128(_mm_subs_epu16(q5, q0), _mm_subs_epu16(q0, q5)));
-  flat2 = _mm_max_epi16(work, flat2);
+typedef enum { FOUR_PIXELS, EIGHT_PIXELS } PixelOutput;
 
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p6, p0), _mm_subs_epu16(p0, p6)),
-      _mm_or_si128(_mm_subs_epu16(q6, q0), _mm_subs_epu16(q0, q6)));
-  flat2 = _mm_max_epi16(work, flat2);
+static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
+                                                   const uint8_t *blt,
+                                                   const uint8_t *lt,
+                                                   const uint8_t *thr, int bd,
+                                                   PixelOutput pixel_output) {
+  __m128i blimit, limit, thresh;
+  get_limit(blt, lt, thr, bd, &blimit, &limit, &thresh);
 
-  work = _mm_max_epi16(
-      _mm_or_si128(_mm_subs_epu16(p7, p0), _mm_subs_epu16(p0, p7)),
-      _mm_or_si128(_mm_subs_epu16(q7, q0), _mm_subs_epu16(q0, q7)));
-  flat2 = _mm_max_epi16(work, flat2);
+  __m128i p[8], q[8];
+  load_highbd_pixel(s, 8, pitch, p, q);
 
-  if (bd == 8)
-    flat2 = _mm_subs_epu16(flat2, one);
-  else if (bd == 10)
-    flat2 = _mm_subs_epu16(flat2, _mm_slli_epi16(one, 2));
-  else  // bd == 12
-    flat2 = _mm_subs_epu16(flat2, _mm_slli_epi16(one, 4));
+  __m128i mask;
+  highbd_filter_mask(p, q, &limit, &blimit, &mask);
+
+  __m128i flat, flat2;
+  const __m128i one = _mm_set1_epi16(1);
+  highbd_flat_mask4(&one, p, q, &flat, bd);
+  highbd_flat_mask5(&one, p, q, &flat2, bd);
+
+  flat = _mm_and_si128(flat, mask);
+  flat2 = _mm_and_si128(flat2, flat);
 
-  flat2 = _mm_cmpeq_epi16(flat2, zero);
-  flat2 = _mm_and_si128(flat2, flat);  // flat2 & flat & mask
-  // end highbd_flat_mask5
+  __m128i ps[2], qs[2];
+  highbd_filter4(p, q, &mask, &thresh, bd, ps, qs);
 
-  // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   // flat and wide flat calculations
-  eight = _mm_set1_epi16(8);
-  four = _mm_set1_epi16(4);
-
-  pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6, p5), _mm_add_epi16(p4, p3));
-  pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6, q5), _mm_add_epi16(q4, q3));
-
-  pixetFilter_p2p1p0 = _mm_add_epi16(p0, _mm_add_epi16(p2, p1));
-  pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0);
-
-  pixetFilter_q2q1q0 = _mm_add_epi16(q0, _mm_add_epi16(q2, q1));
-  pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0);
-  pixelFilter_p =
-      _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q));
-  pixetFilter_p2p1p0 = _mm_add_epi16(
-      four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0));
-  flat2_p0 =
-      _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7, p0)), 4);
-  flat2_q0 =
-      _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7, q0)), 4);
-  flat_p0 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(p3, p0)), 3);
-  flat_q0 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(q3, q0)), 3);
-
-  sum_p7 = _mm_add_epi16(p7, p7);
-  sum_q7 = _mm_add_epi16(q7, q7);
-  sum_p3 = _mm_add_epi16(p3, p3);
-  sum_q3 = _mm_add_epi16(q3, q3);
-
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6);
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6);
-  flat2_p1 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1)), 4);
-  flat2_q1 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1)), 4);
-
-  pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2);
-  pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2);
-  flat_p1 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p1)), 3);
-  flat_q1 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q1)), 3);
-
-  sum_p7 = _mm_add_epi16(sum_p7, p7);
-  sum_q7 = _mm_add_epi16(sum_q7, q7);
-  sum_p3 = _mm_add_epi16(sum_p3, p3);
-  sum_q3 = _mm_add_epi16(sum_q3, q3);
-
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5);
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5);
-  flat2_p2 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2)), 4);
-  flat2_q2 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2)), 4);
-
-  pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1);
-  pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1);
-  flat_p2 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p2)), 3);
-  flat_q2 = _mm_srli_epi16(
-      _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q2)), 3);
-
-  sum_p7 = _mm_add_epi16(sum_p7, p7);
-  sum_q7 = _mm_add_epi16(sum_q7, q7);
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4);
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4);
-  flat2_p3 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3)), 4);
-  flat2_q3 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3)), 4);
-
-  sum_p7 = _mm_add_epi16(sum_p7, p7);
-  sum_q7 = _mm_add_epi16(sum_q7, q7);
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3);
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3);
-  flat2_p4 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4)), 4);
-  flat2_q4 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4)), 4);
-
-  sum_p7 = _mm_add_epi16(sum_p7, p7);
-  sum_q7 = _mm_add_epi16(sum_q7, q7);
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2);
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2);
-  flat2_p5 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5)), 4);
-  flat2_q5 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5)), 4);
-
-  sum_p7 = _mm_add_epi16(sum_p7, p7);
-  sum_q7 = _mm_add_epi16(sum_q7, q7);
-  pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1);
-  pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1);
-  flat2_p6 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6)), 4);
-  flat2_q6 = _mm_srli_epi16(
-      _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6)), 4);
-
-  //  wide flat
-  //  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-  //  highbd_filter8
-  p2 = _mm_andnot_si128(flat, p2);
+  __m128i flat_p[3], flat_q[3];
+  __m128i flat2_p[7], flat2_q[7];
+  {
+    const __m128i eight = _mm_set1_epi16(8);
+    const __m128i four = _mm_set1_epi16(4);
+
+    __m128i sum_p =
+        _mm_add_epi16(_mm_add_epi16(p[6], p[5]), _mm_add_epi16(p[4], p[3]));
+    __m128i sum_q =
+        _mm_add_epi16(_mm_add_epi16(q[6], q[5]), _mm_add_epi16(q[4], q[3]));
+
+    __m128i sum_lp = _mm_add_epi16(p[0], _mm_add_epi16(p[2], p[1]));
+    sum_p = _mm_add_epi16(sum_p, sum_lp);
+
+    __m128i sum_lq = _mm_add_epi16(q[0], _mm_add_epi16(q[2], q[1]));
+    sum_q = _mm_add_epi16(sum_q, sum_lq);
+    sum_p = _mm_add_epi16(eight, _mm_add_epi16(sum_p, sum_q));
+    sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq));
+
+    flat2_p[0] =
+        _mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(p[7], p[0])), 4);
+    flat2_q[0] =
+        _mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(q[7], q[0])), 4);
+    flat_p[0] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(p[3], p[0])), 3);
+    flat_q[0] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(q[3], q[0])), 3);
+
+    __m128i sum_p7 = _mm_add_epi16(p[7], p[7]);
+    __m128i sum_q7 = _mm_add_epi16(q[7], q[7]);
+    __m128i sum_p3 = _mm_add_epi16(p[3], p[3]);
+    __m128i sum_q3 = _mm_add_epi16(q[3], q[3]);
+
+    sum_q = _mm_sub_epi16(sum_p, p[6]);
+    sum_p = _mm_sub_epi16(sum_p, q[6]);
+    flat2_p[1] =
+        _mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[1])), 4);
+    flat2_q[1] =
+        _mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[1])), 4);
+
+    sum_lq = _mm_sub_epi16(sum_lp, p[2]);
+    sum_lp = _mm_sub_epi16(sum_lp, q[2]);
+    flat_p[1] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[1])), 3);
+    flat_q[1] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[1])), 3);
+
+    sum_p7 = _mm_add_epi16(sum_p7, p[7]);
+    sum_q7 = _mm_add_epi16(sum_q7, q[7]);
+    sum_p3 = _mm_add_epi16(sum_p3, p[3]);
+    sum_q3 = _mm_add_epi16(sum_q3, q[3]);
+
+    sum_p = _mm_sub_epi16(sum_p, q[5]);
+    sum_q = _mm_sub_epi16(sum_q, p[5]);
+    flat2_p[2] =
+        _mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[2])), 4);
+    flat2_q[2] =
+        _mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[2])), 4);
+
+    sum_lp = _mm_sub_epi16(sum_lp, q[1]);
+    sum_lq = _mm_sub_epi16(sum_lq, p[1]);
+    flat_p[2] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[2])), 3);
+    flat_q[2] =
+        _mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[2])), 3);
+
+    int i;
+    for (i = 3; i < 7; ++i) {
+      sum_p7 = _mm_add_epi16(sum_p7, p[7]);
+      sum_q7 = _mm_add_epi16(sum_q7, q[7]);
+      sum_p = _mm_sub_epi16(sum_p, q[7 - i]);
+      sum_q = _mm_sub_epi16(sum_q, p[7 - i]);
+      flat2_p[i] =
+          _mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[i])), 4);
+      flat2_q[i] =
+          _mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[i])), 4);
+    }
+  }
+
+  // highbd_filter8
+  p[2] = _mm_andnot_si128(flat, p[2]);
   //  p2 remains unchanged if !(flat && mask)
-  flat_p2 = _mm_and_si128(flat, flat_p2);
+  flat_p[2] = _mm_and_si128(flat, flat_p[2]);
   //  when (flat && mask)
-  p2 = _mm_or_si128(p2, flat_p2);  // full list of p2 values
-  q2 = _mm_andnot_si128(flat, q2);
-  flat_q2 = _mm_and_si128(flat, flat_q2);
-  q2 = _mm_or_si128(q2, flat_q2);  // full list of q2 values
-
-  ps1 = _mm_andnot_si128(flat, ps1);
-  //  p1 takes the value assigned to in in filter4 if !(flat && mask)
-  flat_p1 = _mm_and_si128(flat, flat_p1);
-  //  when (flat && mask)
-  p1 = _mm_or_si128(ps1, flat_p1);  // full list of p1 values
-  qs1 = _mm_andnot_si128(flat, qs1);
-  flat_q1 = _mm_and_si128(flat, flat_q1);
-  q1 = _mm_or_si128(qs1, flat_q1);  // full list of q1 values
-
-  ps0 = _mm_andnot_si128(flat, ps0);
-  //  p0 takes the value assigned to in in filter4 if !(flat && mask)
-  flat_p0 = _mm_and_si128(flat, flat_p0);
-  //  when (flat && mask)
-  p0 = _mm_or_si128(ps0, flat_p0);  // full list of p0 values
-  qs0 = _mm_andnot_si128(flat, qs0);
-  flat_q0 = _mm_and_si128(flat, flat_q0);
-  q0 = _mm_or_si128(qs0, flat_q0);  // full list of q0 values
-  // end highbd_filter8
+  p[2] = _mm_or_si128(p[2], flat_p[2]);  // full list of p2 values
+  q[2] = _mm_andnot_si128(flat, q[2]);
+  flat_q[2] = _mm_and_si128(flat, flat_q[2]);
+  q[2] = _mm_or_si128(q[2], flat_q[2]);  // full list of q2 values
+
+  int i;
+  for (i = 1; i >= 0; i--) {
+    ps[i] = _mm_andnot_si128(flat, ps[i]);
+    flat_p[i] = _mm_and_si128(flat, flat_p[i]);
+    p[i] = _mm_or_si128(ps[i], flat_p[i]);
+    qs[i] = _mm_andnot_si128(flat, qs[i]);
+    flat_q[i] = _mm_and_si128(flat, flat_q[i]);
+    q[i] = _mm_or_si128(qs[i], flat_q[i]);
+  }
 
   // highbd_filter16
-  p6 = _mm_andnot_si128(flat2, p6);
-  //  p6 remains unchanged if !(flat2 && flat && mask)
-  flat2_p6 = _mm_and_si128(flat2, flat2_p6);
-  //  get values for when (flat2 && flat && mask)
-  p6 = _mm_or_si128(p6, flat2_p6);  // full list of p6 values
-  q6 = _mm_andnot_si128(flat2, q6);
-  //  q6 remains unchanged if !(flat2 && flat && mask)
-  flat2_q6 = _mm_and_si128(flat2, flat2_q6);
-  //  get values for when (flat2 && flat && mask)
-  q6 = _mm_or_si128(q6, flat2_q6);  // full list of q6 values
-  _mm_store_si128((__m128i *)(s - 7 * p), p6);
-  _mm_store_si128((__m128i *)(s + 6 * p), q6);
-
-  p5 = _mm_andnot_si128(flat2, p5);
-  //  p5 remains unchanged if !(flat2 && flat && mask)
-  flat2_p5 = _mm_and_si128(flat2, flat2_p5);
-  //  get values for when (flat2 && flat && mask)
-  p5 = _mm_or_si128(p5, flat2_p5);
-  //  full list of p5 values
-  q5 = _mm_andnot_si128(flat2, q5);
-  //  q5 remains unchanged if !(flat2 && flat && mask)
-  flat2_q5 = _mm_and_si128(flat2, flat2_q5);
-  //  get values for when (flat2 && flat && mask)
-  q5 = _mm_or_si128(q5, flat2_q5);
-  //  full list of q5 values
-  _mm_store_si128((__m128i *)(s - 6 * p), p5);
-  _mm_store_si128((__m128i *)(s + 5 * p), q5);
-
-  p4 = _mm_andnot_si128(flat2, p4);
-  //  p4 remains unchanged if !(flat2 && flat && mask)
-  flat2_p4 = _mm_and_si128(flat2, flat2_p4);
-  //  get values for when (flat2 && flat && mask)
-  p4 = _mm_or_si128(p4, flat2_p4);  // full list of p4 values
-  q4 = _mm_andnot_si128(flat2, q4);
-  //  q4 remains unchanged if !(flat2 && flat && mask)
-  flat2_q4 = _mm_and_si128(flat2, flat2_q4);
-  //  get values for when (flat2 && flat && mask)
-  q4 = _mm_or_si128(q4, flat2_q4);  // full list of q4 values
-  _mm_store_si128((__m128i *)(s - 5 * p), p4);
-  _mm_store_si128((__m128i *)(s + 4 * p), q4);
-
-  p3 = _mm_andnot_si128(flat2, p3);
-  //  p3 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_p3 = _mm_and_si128(flat2, flat2_p3);
-  //  get values for when (flat2 && flat && mask)
-  p3 = _mm_or_si128(p3, flat2_p3);  // full list of p3 values
-  q3 = _mm_andnot_si128(flat2, q3);
-  //  q3 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_q3 = _mm_and_si128(flat2, flat2_q3);
-  //  get values for when (flat2 && flat && mask)
-  q3 = _mm_or_si128(q3, flat2_q3);  // full list of q3 values
-  _mm_store_si128((__m128i *)(s - 4 * p), p3);
-  _mm_store_si128((__m128i *)(s + 3 * p), q3);
-
-  p2 = _mm_andnot_si128(flat2, p2);
-  //  p2 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_p2 = _mm_and_si128(flat2, flat2_p2);
-  //  get values for when (flat2 && flat && mask)
-  p2 = _mm_or_si128(p2, flat2_p2);
-  //  full list of p2 values
-  q2 = _mm_andnot_si128(flat2, q2);
-  //  q2 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_q2 = _mm_and_si128(flat2, flat2_q2);
-  //  get values for when (flat2 && flat && mask)
-  q2 = _mm_or_si128(q2, flat2_q2);  // full list of q2 values
-  _mm_store_si128((__m128i *)(s - 3 * p), p2);
-  _mm_store_si128((__m128i *)(s + 2 * p), q2);
-
-  p1 = _mm_andnot_si128(flat2, p1);
-  //  p1 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_p1 = _mm_and_si128(flat2, flat2_p1);
-  //  get values for when (flat2 && flat && mask)
-  p1 = _mm_or_si128(p1, flat2_p1);  // full list of p1 values
-  q1 = _mm_andnot_si128(flat2, q1);
-  //  q1 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_q1 = _mm_and_si128(flat2, flat2_q1);
-  //  get values for when (flat2 && flat && mask)
-  q1 = _mm_or_si128(q1, flat2_q1);  // full list of q1 values
-  _mm_store_si128((__m128i *)(s - 2 * p), p1);
-  _mm_store_si128((__m128i *)(s + 1 * p), q1);
-
-  p0 = _mm_andnot_si128(flat2, p0);
-  //  p0 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_p0 = _mm_and_si128(flat2, flat2_p0);
-  //  get values for when (flat2 && flat && mask)
-  p0 = _mm_or_si128(p0, flat2_p0);  // full list of p0 values
-  q0 = _mm_andnot_si128(flat2, q0);
-  //  q0 takes value from highbd_filter8 if !(flat2 && flat && mask)
-  flat2_q0 = _mm_and_si128(flat2, flat2_q0);
-  //  get values for when (flat2 && flat && mask)
-  q0 = _mm_or_si128(q0, flat2_q0);  // full list of q0 values
-  _mm_store_si128((__m128i *)(s - 1 * p), p0);
-  _mm_store_si128((__m128i *)(s - 0 * p), q0);
+
+  if (pixel_output == FOUR_PIXELS) {
+    for (i = 6; i >= 0; i--) {
+      //  p[i] remains unchanged if !(flat2 && flat && mask)
+      p[i] = _mm_andnot_si128(flat2, p[i]);
+      flat2_p[i] = _mm_and_si128(flat2, flat2_p[i]);
+      //  get values for when (flat2 && flat && mask)
+      p[i] = _mm_or_si128(p[i], flat2_p[i]);  // full list of p values
+
+      q[i] = _mm_andnot_si128(flat2, q[i]);
+      flat2_q[i] = _mm_and_si128(flat2, flat2_q[i]);
+      q[i] = _mm_or_si128(q[i], flat2_q[i]);
+      _mm_storel_epi64((__m128i *)(s - (i + 1) * pitch), p[i]);
+      _mm_storel_epi64((__m128i *)(s + i * pitch), q[i]);
+    }
+  } else {  // EIGHT_PIXELS
+    for (i = 6; i >= 0; i--) {
+      //  p[i] remains unchanged if !(flat2 && flat && mask)
+      p[i] = _mm_andnot_si128(flat2, p[i]);
+      flat2_p[i] = _mm_and_si128(flat2, flat2_p[i]);
+      //  get values for when (flat2 && flat && mask)
+      p[i] = _mm_or_si128(p[i], flat2_p[i]);  // full list of p values
+
+      q[i] = _mm_andnot_si128(flat2, q[i]);
+      flat2_q[i] = _mm_and_si128(flat2, flat2_q[i]);
+      q[i] = _mm_or_si128(q[i], flat2_q[i]);
+      _mm_store_si128((__m128i *)(s - (i + 1) * pitch), p[i]);
+      _mm_store_si128((__m128i *)(s + i * pitch), q[i]);
+    }
+  }
+}
+
+// Note:
+//  highbd_lpf_horz_edge_8_8p() output 8 pixels per register
+//  highbd_lpf_horz_edge_8_4p() output 4 pixels per register
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+static INLINE void highbd_lpf_horz_edge_8_4p(uint16_t *s, int pitch,
+                                             const uint8_t *blt,
+                                             const uint8_t *lt,
+                                             const uint8_t *thr, int bd) {
+  highbd_lpf_horz_edge_8_internal(s, pitch, blt, lt, thr, bd, FOUR_PIXELS);
+}
+#endif  // #if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+
+static INLINE void highbd_lpf_horz_edge_8_8p(uint16_t *s, int pitch,
+                                             const uint8_t *blt,
+                                             const uint8_t *lt,
+                                             const uint8_t *thr, int bd) {
+  highbd_lpf_horz_edge_8_internal(s, pitch, blt, lt, thr, bd, EIGHT_PIXELS);
+}
+
+void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
+                                           const uint8_t *_blimit,
+                                           const uint8_t *_limit,
+                                           const uint8_t *_thresh, int bd) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd);
+#else
+  highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd);
+#endif
 }
 
 void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
                                             const uint8_t *_blimit,
                                             const uint8_t *_limit,
                                             const uint8_t *_thresh, int bd) {
-  aom_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
-  aom_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd);
+#else
+  highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd);
+  highbd_lpf_horz_edge_8_8p(s + 8, p, _blimit, _limit, _thresh, bd);
+#endif
+}
+
+static INLINE void store_horizontal_8(const __m128i *p2, const __m128i *p1,
+                                      const __m128i *p0, const __m128i *q0,
+                                      const __m128i *q1, const __m128i *q2,
+                                      int p, uint16_t *s) {
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  _mm_storel_epi64((__m128i *)(s - 3 * p), *p2);
+  _mm_storel_epi64((__m128i *)(s - 2 * p), *p1);
+  _mm_storel_epi64((__m128i *)(s - 1 * p), *p0);
+  _mm_storel_epi64((__m128i *)(s + 0 * p), *q0);
+  _mm_storel_epi64((__m128i *)(s + 1 * p), *q1);
+  _mm_storel_epi64((__m128i *)(s + 2 * p), *q2);
+#else
+  _mm_store_si128((__m128i *)(s - 3 * p), *p2);
+  _mm_store_si128((__m128i *)(s - 2 * p), *p1);
+  _mm_store_si128((__m128i *)(s - 1 * p), *p0);
+  _mm_store_si128((__m128i *)(s + 0 * p), *q0);
+  _mm_store_si128((__m128i *)(s + 1 * p), *q1);
+  _mm_store_si128((__m128i *)(s + 2 * p), *q2);
+#endif
 }
 
 void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
@@ -497,14 +440,14 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
   const __m128i zero = _mm_set1_epi16(0);
   __m128i blimit, limit, thresh;
   __m128i mask, hev, flat;
-  __m128i p3 = _mm_load_si128((__m128i *)(s - 4 * p));
-  __m128i q3 = _mm_load_si128((__m128i *)(s + 3 * p));
-  __m128i p2 = _mm_load_si128((__m128i *)(s - 3 * p));
-  __m128i q2 = _mm_load_si128((__m128i *)(s + 2 * p));
-  __m128i p1 = _mm_load_si128((__m128i *)(s - 2 * p));
-  __m128i q1 = _mm_load_si128((__m128i *)(s + 1 * p));
-  __m128i p0 = _mm_load_si128((__m128i *)(s - 1 * p));
-  __m128i q0 = _mm_load_si128((__m128i *)(s + 0 * p));
+  __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
+  __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+  __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+  __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
+  __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
+  __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+  __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
+  __m128i q0 = _mm_loadu_si128((__m128i *)(s + 0 * p));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i ffff = _mm_cmpeq_epi16(one, one);
   __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work;
@@ -635,41 +578,48 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
   _mm_store_si128((__m128i *)&flat_oq2[0], workp_shft);
 
   // lp filter
-  filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd);
+  const __m128i pmax =
+      _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80);
+  const __m128i pmin = _mm_subs_epi16(zero, t80);
+
+  filt = _mm_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filt);
+
   filt = _mm_and_si128(filt, hev);
   work_a = _mm_subs_epi16(qs0, ps0);
   filt = _mm_adds_epi16(filt, work_a);
   filt = _mm_adds_epi16(filt, work_a);
   filt = _mm_adds_epi16(filt, work_a);
   // (aom_filter + 3 * (qs0 - ps0)) & mask
-  filt = signed_char_clamp_bd_sse2(filt, bd);
+  pixel_clamp(&pmin, &pmax, &filt);
   filt = _mm_and_si128(filt, mask);
 
   filter1 = _mm_adds_epi16(filt, t4);
   filter2 = _mm_adds_epi16(filt, t3);
 
   // Filter1 >> 3
-  filter1 = signed_char_clamp_bd_sse2(filter1, bd);
+  pixel_clamp(&pmin, &pmax, &filter1);
   filter1 = _mm_srai_epi16(filter1, 3);
 
   // Filter2 >> 3
-  filter2 = signed_char_clamp_bd_sse2(filter2, bd);
+  pixel_clamp(&pmin, &pmax, &filter2);
   filter2 = _mm_srai_epi16(filter2, 3);
 
   // filt >> 1
   filt = _mm_adds_epi16(filter1, t1);
   filt = _mm_srai_epi16(filt, 1);
-  // filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev;
   filt = _mm_andnot_si128(hev, filt);
 
-  work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd);
+  work_a = _mm_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &work_a);
   work_a = _mm_adds_epi16(work_a, t80);
   q0 = _mm_load_si128((__m128i *)flat_oq0);
   work_a = _mm_andnot_si128(flat, work_a);
   q0 = _mm_and_si128(flat, q0);
   q0 = _mm_or_si128(work_a, q0);
 
-  work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd);
+  work_a = _mm_subs_epi16(qs1, filt);
+  pixel_clamp(&pmin, &pmax, &work_a);
   work_a = _mm_adds_epi16(work_a, t80);
   q1 = _mm_load_si128((__m128i *)flat_oq1);
   work_a = _mm_andnot_si128(flat, work_a);
@@ -682,14 +632,16 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
   q2 = _mm_and_si128(flat, q2);
   q2 = _mm_or_si128(work_a, q2);
 
-  work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd);
+  work_a = _mm_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &work_a);
   work_a = _mm_adds_epi16(work_a, t80);
   p0 = _mm_load_si128((__m128i *)flat_op0);
   work_a = _mm_andnot_si128(flat, work_a);
   p0 = _mm_and_si128(flat, p0);
   p0 = _mm_or_si128(work_a, p0);
 
-  work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd);
+  work_a = _mm_adds_epi16(ps1, filt);
+  pixel_clamp(&pmin, &pmax, &work_a);
   work_a = _mm_adds_epi16(work_a, t80);
   p1 = _mm_load_si128((__m128i *)flat_op1);
   work_a = _mm_andnot_si128(flat, work_a);
@@ -702,12 +654,7 @@ void aom_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
   p2 = _mm_and_si128(flat, p2);
   p2 = _mm_or_si128(work_a, p2);
 
-  _mm_store_si128((__m128i *)(s - 3 * p), p2);
-  _mm_store_si128((__m128i *)(s - 2 * p), p1);
-  _mm_store_si128((__m128i *)(s - 1 * p), p0);
-  _mm_store_si128((__m128i *)(s + 0 * p), q0);
-  _mm_store_si128((__m128i *)(s + 1 * p), q1);
-  _mm_store_si128((__m128i *)(s + 2 * p), q2);
+  store_horizontal_8(&p2, &p1, &p0, &q0, &q1, &q2, p, s);
 }
 
 void aom_highbd_lpf_horizontal_8_dual_sse2(
@@ -725,14 +672,18 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
   const __m128i zero = _mm_set1_epi16(0);
   __m128i blimit, limit, thresh;
   __m128i mask, hev, flat;
+#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4)
   __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
   __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
+#endif
   __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * p));
   __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
   __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
   __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
+#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4)
   __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
   __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
+#endif
   const __m128i abs_p1p0 =
       _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1));
   const __m128i abs_q1q0 =
@@ -743,7 +694,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
       _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0));
   __m128i abs_p1q1 =
       _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1));
-  __m128i work;
+
   const __m128i t4 = _mm_set1_epi16(4);
   const __m128i t3 = _mm_set1_epi16(3);
   __m128i t80;
@@ -814,9 +765,9 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
   // So taking maximums continues to work:
   mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one));
   mask = _mm_max_epi16(flat, mask);
-  // mask |= (abs(p1 - p0) > limit) * -1;
-  // mask |= (abs(q1 - q0) > limit) * -1;
-  work = _mm_max_epi16(
+
+#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4)
+  __m128i work = _mm_max_epi16(
       _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)),
       _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)));
   mask = _mm_max_epi16(work, mask);
@@ -824,22 +775,32 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
       _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2)),
       _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3)));
   mask = _mm_max_epi16(work, mask);
+#endif
   mask = _mm_subs_epu16(mask, limit);
   mask = _mm_cmpeq_epi16(mask, zero);
 
   // filter4
-  filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd);
+  const __m128i pmax =
+      _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, bd), one), t80);
+  const __m128i pmin = _mm_subs_epi16(zero, t80);
+
+  filt = _mm_subs_epi16(ps1, qs1);
+  pixel_clamp(&pmin, &pmax, &filt);
   filt = _mm_and_si128(filt, hev);
   work_a = _mm_subs_epi16(qs0, ps0);
   filt = _mm_adds_epi16(filt, work_a);
   filt = _mm_adds_epi16(filt, work_a);
-  filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);
+  filt = _mm_adds_epi16(filt, work_a);
+  pixel_clamp(&pmin, &pmax, &filt);
 
   // (aom_filter + 3 * (qs0 - ps0)) & mask
   filt = _mm_and_si128(filt, mask);
 
-  filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);
-  filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd);
+  filter1 = _mm_adds_epi16(filt, t4);
+  pixel_clamp(&pmin, &pmax, &filter1);
+
+  filter2 = _mm_adds_epi16(filt, t3);
+  pixel_clamp(&pmin, &pmax, &filter2);
 
   // Filter1 >> 3
   work_a = _mm_cmpgt_epi16(zero, filter1);  // get the values that are <0
@@ -865,19 +826,32 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
 
   filt = _mm_andnot_si128(hev, filt);
 
-  q0 = _mm_adds_epi16(
-      signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), t80);
-  q1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd),
-                      t80);
-  p0 = _mm_adds_epi16(
-      signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), t80);
-  p1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd),
-                      t80);
-
+  q0 = _mm_subs_epi16(qs0, filter1);
+  pixel_clamp(&pmin, &pmax, &q0);
+  q0 = _mm_adds_epi16(q0, t80);
+
+  q1 = _mm_subs_epi16(qs1, filt);
+  pixel_clamp(&pmin, &pmax, &q1);
+  q1 = _mm_adds_epi16(q1, t80);
+
+  p0 = _mm_adds_epi16(ps0, filter2);
+  pixel_clamp(&pmin, &pmax, &p0);
+  p0 = _mm_adds_epi16(p0, t80);
+
+  p1 = _mm_adds_epi16(ps1, filt);
+  pixel_clamp(&pmin, &pmax, &p1);
+  p1 = _mm_adds_epi16(p1, t80);
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  _mm_storel_epi64((__m128i *)(s - 2 * p), p1);
+  _mm_storel_epi64((__m128i *)(s - 1 * p), p0);
+  _mm_storel_epi64((__m128i *)(s + 0 * p), q0);
+  _mm_storel_epi64((__m128i *)(s + 1 * p), q1);
+#else
   _mm_storeu_si128((__m128i *)(s - 2 * p), p1);
   _mm_storeu_si128((__m128i *)(s - 1 * p), p0);
   _mm_storeu_si128((__m128i *)(s + 0 * p), q0);
   _mm_storeu_si128((__m128i *)(s + 1 * p), q1);
+#endif
 }
 
 void aom_highbd_lpf_horizontal_4_dual_sse2(
@@ -888,118 +862,6 @@ void aom_highbd_lpf_horizontal_4_dual_sse2(
   aom_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
 }
 
-static INLINE void highbd_transpose(uint16_t *src[], int in_p, uint16_t *dst[],
-                                    int out_p, int num_8x8_to_transpose) {
-  int idx8x8 = 0;
-  __m128i p0, p1, p2, p3, p4, p5, p6, p7, x0, x1, x2, x3, x4, x5, x6, x7;
-  do {
-    uint16_t *in = src[idx8x8];
-    uint16_t *out = dst[idx8x8];
-
-    p0 =
-        _mm_loadu_si128((__m128i *)(in + 0 * in_p));  // 00 01 02 03 04 05 06 07
-    p1 =
-        _mm_loadu_si128((__m128i *)(in + 1 * in_p));  // 10 11 12 13 14 15 16 17
-    p2 =
-        _mm_loadu_si128((__m128i *)(in + 2 * in_p));  // 20 21 22 23 24 25 26 27
-    p3 =
-        _mm_loadu_si128((__m128i *)(in + 3 * in_p));  // 30 31 32 33 34 35 36 37
-    p4 =
-        _mm_loadu_si128((__m128i *)(in + 4 * in_p));  // 40 41 42 43 44 45 46 47
-    p5 =
-        _mm_loadu_si128((__m128i *)(in + 5 * in_p));  // 50 51 52 53 54 55 56 57
-    p6 =
-        _mm_loadu_si128((__m128i *)(in + 6 * in_p));  // 60 61 62 63 64 65 66 67
-    p7 =
-        _mm_loadu_si128((__m128i *)(in + 7 * in_p));  // 70 71 72 73 74 75 76 77
-    // 00 10 01 11 02 12 03 13
-    x0 = _mm_unpacklo_epi16(p0, p1);
-    // 20 30 21 31 22 32 23 33
-    x1 = _mm_unpacklo_epi16(p2, p3);
-    // 40 50 41 51 42 52 43 53
-    x2 = _mm_unpacklo_epi16(p4, p5);
-    // 60 70 61 71 62 72 63 73
-    x3 = _mm_unpacklo_epi16(p6, p7);
-    // 00 10 20 30 01 11 21 31
-    x4 = _mm_unpacklo_epi32(x0, x1);
-    // 40 50 60 70 41 51 61 71
-    x5 = _mm_unpacklo_epi32(x2, x3);
-    // 00 10 20 30 40 50 60 70
-    x6 = _mm_unpacklo_epi64(x4, x5);
-    // 01 11 21 31 41 51 61 71
-    x7 = _mm_unpackhi_epi64(x4, x5);
-
-    _mm_storeu_si128((__m128i *)(out + 0 * out_p), x6);
-    // 00 10 20 30 40 50 60 70
-    _mm_storeu_si128((__m128i *)(out + 1 * out_p), x7);
-    // 01 11 21 31 41 51 61 71
-
-    // 02 12 22 32 03 13 23 33
-    x4 = _mm_unpackhi_epi32(x0, x1);
-    // 42 52 62 72 43 53 63 73
-    x5 = _mm_unpackhi_epi32(x2, x3);
-    // 02 12 22 32 42 52 62 72
-    x6 = _mm_unpacklo_epi64(x4, x5);
-    // 03 13 23 33 43 53 63 73
-    x7 = _mm_unpackhi_epi64(x4, x5);
-
-    _mm_storeu_si128((__m128i *)(out + 2 * out_p), x6);
-    // 02 12 22 32 42 52 62 72
-    _mm_storeu_si128((__m128i *)(out + 3 * out_p), x7);
-    // 03 13 23 33 43 53 63 73
-
-    // 04 14 05 15 06 16 07 17
-    x0 = _mm_unpackhi_epi16(p0, p1);
-    // 24 34 25 35 26 36 27 37
-    x1 = _mm_unpackhi_epi16(p2, p3);
-    // 44 54 45 55 46 56 47 57
-    x2 = _mm_unpackhi_epi16(p4, p5);
-    // 64 74 65 75 66 76 67 77
-    x3 = _mm_unpackhi_epi16(p6, p7);
-    // 04 14 24 34 05 15 25 35
-    x4 = _mm_unpacklo_epi32(x0, x1);
-    // 44 54 64 74 45 55 65 75
-    x5 = _mm_unpacklo_epi32(x2, x3);
-    // 04 14 24 34 44 54 64 74
-    x6 = _mm_unpacklo_epi64(x4, x5);
-    // 05 15 25 35 45 55 65 75
-    x7 = _mm_unpackhi_epi64(x4, x5);
-
-    _mm_storeu_si128((__m128i *)(out + 4 * out_p), x6);
-    // 04 14 24 34 44 54 64 74
-    _mm_storeu_si128((__m128i *)(out + 5 * out_p), x7);
-    // 05 15 25 35 45 55 65 75
-
-    // 06 16 26 36 07 17 27 37
-    x4 = _mm_unpackhi_epi32(x0, x1);
-    // 46 56 66 76 47 57 67 77
-    x5 = _mm_unpackhi_epi32(x2, x3);
-    // 06 16 26 36 46 56 66 76
-    x6 = _mm_unpacklo_epi64(x4, x5);
-    // 07 17 27 37 47 57 67 77
-    x7 = _mm_unpackhi_epi64(x4, x5);
-
-    _mm_storeu_si128((__m128i *)(out + 6 * out_p), x6);
-    // 06 16 26 36 46 56 66 76
-    _mm_storeu_si128((__m128i *)(out + 7 * out_p), x7);
-    // 07 17 27 37 47 57 67 77
-  } while (++idx8x8 < num_8x8_to_transpose);
-}
-
-static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, int in_p,
-                                        uint16_t *out, int out_p) {
-  uint16_t *src0[1];
-  uint16_t *src1[1];
-  uint16_t *dest0[1];
-  uint16_t *dest1[1];
-  src0[0] = in0;
-  src1[0] = in1;
-  dest0[0] = out;
-  dest1[0] = out + 8;
-  highbd_transpose(src0, in_p, dest0, out_p, 1);
-  highbd_transpose(src1, in_p, dest1, out_p, 1);
-}
-
 void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit,
                                     const uint8_t *limit, const uint8_t *thresh,
                                     int bd) {
@@ -1130,10 +992,12 @@ void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p,
   highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16);
   highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
 
-  //  Loop filtering
+#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
+  highbd_lpf_horz_edge_8_8p(t_dst + 8 * 16, 16, blimit, limit, thresh, bd);
+#else
   aom_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
                                          thresh, bd);
-
+#endif
   //  Transpose back
   highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
   highbd_transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * p, p);
diff --git a/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm
index 9c3bbdd69..855bc6558 100644
--- a/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/highbd_sad4d_sse2.asm
@@ -293,4 +293,6 @@ HIGH_SADNXN4D  4, 16
 HIGH_SADNXN4D 16,  4
 HIGH_SADNXN4D  8, 32
 HIGH_SADNXN4D 32,  8
+HIGH_SADNXN4D 16, 64
+HIGH_SADNXN4D 64, 16
 %endif
diff --git a/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm b/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm
index 248b98ef5..760e68aab 100644
--- a/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/highbd_sad_sse2.asm
@@ -158,7 +158,10 @@ HIGH_SAD64XN 64 ; highbd_sad64x64_sse2
 HIGH_SAD64XN 32 ; highbd_sad64x32_sse2
 HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2
 HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2
-
+%if CONFIG_EXT_PARTITION_TYPES
+HIGH_SAD64XN 16 ; highbd_sad_64x16_sse2
+HIGH_SAD64XN 16, 1 ; highbd_sad_64x16_avg_sse2
+%endif
 
 ; unsigned int aom_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
 ;                                    uint8_t *ref, int ref_stride);
@@ -302,6 +305,8 @@ HIGH_SAD16XN  8, 1 ; highbd_sad16x8_avg_sse2
 %if CONFIG_EXT_PARTITION_TYPES
 HIGH_SAD16XN 4 ; highbd_sad_16x4_sse2
 HIGH_SAD16XN 4, 1 ; highbd_sad_16x4_avg_sse2
+HIGH_SAD16XN 64 ; highbd_sad_16x64_sse2
+HIGH_SAD16XN 64, 1 ; highbd_sad_16x64_avg_sse2
 %endif
 
 ; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
diff --git a/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c b/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c
index 7bc8a0df3..befd81269 100644
--- a/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_subtract_sse2.c
@@ -177,177 +177,94 @@ static void subtract_8x8(int16_t *diff, ptrdiff_t diff_stride,
   _mm_storeu_si128((__m128i *)(diff + 7 * diff_stride), x7);
 }
 
-static void subtract_8x16(int16_t *diff, ptrdiff_t diff_stride,
-                          const uint16_t *src, ptrdiff_t src_stride,
-                          const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 3;
-  src += src_stride << 3;
-  pred += pred_stride << 3;
-  subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_16x8(int16_t *diff, ptrdiff_t diff_stride,
-                          const uint16_t *src, ptrdiff_t src_stride,
-                          const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += 8;
-  src += 8;
-  pred += 8;
-  subtract_8x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_16x16(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_16x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 3;
-  src += src_stride << 3;
-  pred += pred_stride << 3;
-  subtract_16x8(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_16x32(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 4;
-  src += src_stride << 4;
-  pred += pred_stride << 4;
-  subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_32x16(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += 16;
-  src += 16;
-  pred += 16;
-  subtract_16x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_32x32(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_32x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 4;
-  src += src_stride << 4;
-  pred += pred_stride << 4;
-  subtract_32x16(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_32x64(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 5;
-  src += src_stride << 5;
-  pred += pred_stride << 5;
-  subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_64x32(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += 32;
-  src += 32;
-  pred += 32;
-  subtract_32x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_64x64(int16_t *diff, ptrdiff_t diff_stride,
-                           const uint16_t *src, ptrdiff_t src_stride,
-                           const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_64x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 5;
-  src += src_stride << 5;
-  pred += pred_stride << 5;
-  subtract_64x32(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_64x128(int16_t *diff, ptrdiff_t diff_stride,
-                            const uint16_t *src, ptrdiff_t src_stride,
-                            const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 6;
-  src += src_stride << 6;
-  pred += pred_stride << 6;
-  subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_128x64(int16_t *diff, ptrdiff_t diff_stride,
-                            const uint16_t *src, ptrdiff_t src_stride,
-                            const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += 64;
-  src += 64;
-  pred += 64;
-  subtract_64x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
-
-static void subtract_128x128(int16_t *diff, ptrdiff_t diff_stride,
-                             const uint16_t *src, ptrdiff_t src_stride,
-                             const uint16_t *pred, ptrdiff_t pred_stride) {
-  subtract_128x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-  diff += diff_stride << 6;
-  src += src_stride << 6;
-  pred += pred_stride << 6;
-  subtract_128x64(diff, diff_stride, src, src_stride, pred, pred_stride);
-}
+#define STACK_V(h, fun)                                                        \
+  do {                                                                         \
+    fun(diff, diff_stride, src, src_stride, pred, pred_stride);                \
+    fun(diff + diff_stride * h, diff_stride, src + src_stride * h, src_stride, \
+        pred + pred_stride * h, pred_stride);                                  \
+  } while (0)
+
+#define STACK_H(w, fun)                                                     \
+  do {                                                                      \
+    fun(diff, diff_stride, src, src_stride, pred, pred_stride);             \
+    fun(diff + w, diff_stride, src + w, src_stride, pred + w, pred_stride); \
+  } while (0)
+
+#define SUBTRACT_FUN(size)                                               \
+  static void subtract_##size(int16_t *diff, ptrdiff_t diff_stride,      \
+                              const uint16_t *src, ptrdiff_t src_stride, \
+                              const uint16_t *pred, ptrdiff_t pred_stride)
+
+SUBTRACT_FUN(8x16) { STACK_V(8, subtract_8x8); }
+SUBTRACT_FUN(16x8) { STACK_H(8, subtract_8x8); }
+SUBTRACT_FUN(16x16) { STACK_V(8, subtract_16x8); }
+SUBTRACT_FUN(16x32) { STACK_V(16, subtract_16x16); }
+SUBTRACT_FUN(32x16) { STACK_H(16, subtract_16x16); }
+SUBTRACT_FUN(32x32) { STACK_V(16, subtract_32x16); }
+SUBTRACT_FUN(32x64) { STACK_V(32, subtract_32x32); }
+SUBTRACT_FUN(64x32) { STACK_H(32, subtract_32x32); }
+SUBTRACT_FUN(64x64) { STACK_V(32, subtract_64x32); }
+#if CONFIG_EXT_PARTITION
+SUBTRACT_FUN(64x128) { STACK_V(64, subtract_64x64); }
+SUBTRACT_FUN(128x64) { STACK_H(64, subtract_64x64); }
+SUBTRACT_FUN(128x128) { STACK_V(64, subtract_128x64); }
+#endif
+SUBTRACT_FUN(4x16) { STACK_V(8, subtract_4x8); }
+SUBTRACT_FUN(16x4) { STACK_H(8, subtract_8x4); }
+SUBTRACT_FUN(8x32) { STACK_V(16, subtract_8x16); }
+SUBTRACT_FUN(32x8) { STACK_H(16, subtract_16x8); }
+SUBTRACT_FUN(16x64) { STACK_V(32, subtract_16x32); }
+SUBTRACT_FUN(64x16) { STACK_H(32, subtract_32x16); }
+#if CONFIG_EXT_PARTITION
+SUBTRACT_FUN(32x128) { STACK_V(64, subtract_32x64); }
+SUBTRACT_FUN(128x32) { STACK_H(64, subtract_64x32); }
+#endif
 
 static SubtractWxHFuncType getSubtractFunc(int rows, int cols) {
-  SubtractWxHFuncType ret_func_ptr = NULL;
   if (rows == 4) {
-    if (cols == 4) {
-      ret_func_ptr = subtract_4x4;
-    } else if (cols == 8) {
-      ret_func_ptr = subtract_8x4;
-    }
-  } else if (rows == 8) {
-    if (cols == 4) {
-      ret_func_ptr = subtract_4x8;
-    } else if (cols == 8) {
-      ret_func_ptr = subtract_8x8;
-    } else if (cols == 16) {
-      ret_func_ptr = subtract_16x8;
-    }
-  } else if (rows == 16) {
-    if (cols == 8) {
-      ret_func_ptr = subtract_8x16;
-    } else if (cols == 16) {
-      ret_func_ptr = subtract_16x16;
-    } else if (cols == 32) {
-      ret_func_ptr = subtract_32x16;
-    }
-  } else if (rows == 32) {
-    if (cols == 16) {
-      ret_func_ptr = subtract_16x32;
-    } else if (cols == 32) {
-      ret_func_ptr = subtract_32x32;
-    } else if (cols == 64) {
-      ret_func_ptr = subtract_64x32;
-    }
-  } else if (rows == 64) {
-    if (cols == 32) {
-      ret_func_ptr = subtract_32x64;
-    } else if (cols == 64) {
-      ret_func_ptr = subtract_64x64;
-    } else if (cols == 128) {
-      ret_func_ptr = subtract_128x64;
-    }
-  } else if (rows == 128) {
-    if (cols == 64) {
-      ret_func_ptr = subtract_64x128;
-    } else if (cols == 128) {
-      ret_func_ptr = subtract_128x128;
-    }
+    if (cols == 4) return subtract_4x4;
+    if (cols == 8) return subtract_8x4;
+    if (cols == 16) return subtract_16x4;
+  }
+  if (rows == 8) {
+    if (cols == 4) return subtract_4x8;
+    if (cols == 8) return subtract_8x8;
+    if (cols == 16) return subtract_16x8;
+    if (cols == 32) return subtract_32x8;
+  }
+  if (rows == 16) {
+    if (cols == 4) return subtract_4x16;
+    if (cols == 8) return subtract_8x16;
+    if (cols == 16) return subtract_16x16;
+    if (cols == 32) return subtract_32x16;
+    if (cols == 64) return subtract_64x16;
+  }
+  if (rows == 32) {
+    if (cols == 8) return subtract_8x32;
+    if (cols == 16) return subtract_16x32;
+    if (cols == 32) return subtract_32x32;
+    if (cols == 64) return subtract_64x32;
+#if CONFIG_EXT_PARTITION
+    if (cols == 128) return subtract_128x32;
+#endif  // CONFIG_EXT_PARTITION
+  }
+  if (rows == 64) {
+    if (cols == 16) return subtract_16x64;
+    if (cols == 32) return subtract_32x64;
+    if (cols == 64) return subtract_64x64;
+#if CONFIG_EXT_PARTITION
+    if (cols == 128) return subtract_128x64;
+#endif  // CONFIG_EXT_PARTITION
   }
-  if (!ret_func_ptr) {
-    assert(0);
+#if CONFIG_EXT_PARTITION
+  if (rows == 128) {
+    if (cols == 32) return subtract_32x128;
+    if (cols == 64) return subtract_64x128;
+    if (cols == 128) return subtract_128x128;
   }
-  return ret_func_ptr;
+#endif  // CONFIG_EXT_PARTITION
+  assert(0);
+  return NULL;
 }
 
 void aom_highbd_subtract_block_sse2(int rows, int cols, int16_t *diff,
diff --git a/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c b/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c
index 93923ffb0..62acf3ed3 100644
--- a/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c
+++ b/third_party/aom/aom_dsp/x86/highbd_variance_sse2.c
@@ -189,6 +189,8 @@ VAR_FN(8, 8, 8, 6);
 VAR_FN(16, 4, 16, 6);
 VAR_FN(8, 32, 8, 8);
 VAR_FN(32, 8, 16, 8);
+VAR_FN(16, 64, 16, 10);
+VAR_FN(64, 16, 16, 10);
 #endif
 
 #undef VAR_FN
@@ -411,7 +413,9 @@ DECLS(sse2);
   FN(8, 4, 8, 3, 2, opt, (int64_t));    \
   FN(16, 4, 16, 4, 2, opt, (int64_t));  \
   FN(8, 32, 8, 3, 5, opt, (int64_t));   \
-  FN(32, 8, 16, 5, 3, opt, (int64_t))
+  FN(32, 8, 16, 5, 3, opt, (int64_t));  \
+  FN(16, 64, 16, 4, 6, opt, (int64_t)); \
+  FN(64, 16, 16, 6, 4, opt, (int64_t))
 #else
 #define FNS(opt)                        \
   FN(64, 64, 16, 6, 6, opt, (int64_t)); \
@@ -588,7 +592,9 @@ DECLS(sse2);
   FN(8, 4, 8, 3, 2, opt, (int64_t));    \
   FN(16, 4, 16, 4, 2, opt, (int64_t));  \
   FN(8, 32, 8, 3, 5, opt, (int64_t));   \
-  FN(32, 8, 16, 5, 3, opt, (int64_t));
+  FN(32, 8, 16, 5, 3, opt, (int64_t));  \
+  FN(16, 64, 16, 4, 6, opt, (int64_t)); \
+  FN(64, 16, 16, 6, 4, opt, (int64_t));
 #else
 #define FNS(opt)                        \
   FN(64, 64, 16, 6, 6, opt, (int64_t)); \
diff --git a/third_party/aom/aom_dsp/x86/intrapred_avx2.c b/third_party/aom/aom_dsp/x86/intrapred_avx2.c
new file mode 100644
index 000000000..6b8922b8c
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/intrapred_avx2.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+
+static INLINE __m256i dc_sum_32(const uint8_t *ref) {
+  const __m256i x = _mm256_loadu_si256((const __m256i *)ref);
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i y = _mm256_sad_epu8(x, zero);
+  __m256i u = _mm256_permute2x128_si256(y, y, 1);
+  y = _mm256_add_epi64(u, y);
+  u = _mm256_unpackhi_epi64(y, y);
+  return _mm256_add_epi16(y, u);
+}
+
+static INLINE void row_store_32xh(const __m256i *r, int height, uint8_t *dst,
+                                  ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < height; ++i) {
+    _mm256_storeu_si256((__m256i *)dst, *r);
+    dst += stride;
+  }
+}
+
+void aom_dc_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  const __m256i sum_above = dc_sum_32(above);
+  __m256i sum_left = dc_sum_32(left);
+  sum_left = _mm256_add_epi16(sum_left, sum_above);
+  const __m256i thirtytwo = _mm256_set1_epi16(32);
+  sum_left = _mm256_add_epi16(sum_left, thirtytwo);
+  sum_left = _mm256_srai_epi16(sum_left, 6);
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i row = _mm256_shuffle_epi8(sum_left, zero);
+  row_store_32xh(&row, 32, dst, stride);
+}
+
+void aom_dc_top_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m256i sum = dc_sum_32(above);
+  (void)left;
+
+  const __m256i sixteen = _mm256_set1_epi16(16);
+  sum = _mm256_add_epi16(sum, sixteen);
+  sum = _mm256_srai_epi16(sum, 5);
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i row = _mm256_shuffle_epi8(sum, zero);
+  row_store_32xh(&row, 32, dst, stride);
+}
+
+void aom_dc_left_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m256i sum = dc_sum_32(left);
+  (void)above;
+
+  const __m256i sixteen = _mm256_set1_epi16(16);
+  sum = _mm256_add_epi16(sum, sixteen);
+  sum = _mm256_srai_epi16(sum, 5);
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i row = _mm256_shuffle_epi8(sum, zero);
+  row_store_32xh(&row, 32, dst, stride);
+}
+
+void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  row_store_32xh(&row, 32, dst, stride);
+}
+
+void aom_v_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m256i row = _mm256_loadu_si256((const __m256i *)above);
+  (void)left;
+  row_store_32xh(&row, 32, dst, stride);
+}
+
+// There are 32 rows togeter. This function does line:
+// 0,1,2,3, and 16,17,18,19. The next call would do
+// 4,5,6,7, and 20,21,22,23. So 4 times of calling
+// would finish 32 rows.
+static INLINE void h_predictor_32x8line(const __m256i *row, uint8_t *dst,
+                                        ptrdiff_t stride) {
+  __m256i t[4];
+  __m256i m = _mm256_setzero_si256();
+  const __m256i inc = _mm256_set1_epi8(4);
+  int i;
+
+  for (i = 0; i < 4; i++) {
+    t[i] = _mm256_shuffle_epi8(*row, m);
+    __m256i r0 = _mm256_permute2x128_si256(t[i], t[i], 0);
+    __m256i r1 = _mm256_permute2x128_si256(t[i], t[i], 0x11);
+    _mm256_storeu_si256((__m256i *)dst, r0);
+    _mm256_storeu_si256((__m256i *)(dst + (stride << 4)), r1);
+    dst += stride;
+    m = _mm256_add_epi8(m, inc);
+  }
+}
+
+void aom_h_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  const __m256i left_col = _mm256_loadu_si256((__m256i const *)left);
+
+  __m256i u = _mm256_unpacklo_epi8(left_col, left_col);
+
+  __m256i v = _mm256_unpacklo_epi8(u, u);
+  h_predictor_32x8line(&v, dst, stride);
+  dst += stride << 2;
+
+  v = _mm256_unpackhi_epi8(u, u);
+  h_predictor_32x8line(&v, dst, stride);
+  dst += stride << 2;
+
+  u = _mm256_unpackhi_epi8(left_col, left_col);
+
+  v = _mm256_unpacklo_epi8(u, u);
+  h_predictor_32x8line(&v, dst, stride);
+  dst += stride << 2;
+
+  v = _mm256_unpackhi_epi8(u, u);
+  h_predictor_32x8line(&v, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// Rectangle
+
+// TODO(luoyi) The following two functions are shared with intrapred_sse2.c.
+// Use a header file, intrapred_common_x86.h
+static INLINE __m128i dc_sum_16_sse2(const uint8_t *ref) {
+  __m128i x = _mm_load_si128((__m128i const *)ref);
+  const __m128i zero = _mm_setzero_si128();
+  x = _mm_sad_epu8(x, zero);
+  const __m128i high = _mm_unpackhi_epi64(x, x);
+  return _mm_add_epi16(x, high);
+}
+
+static INLINE __m128i dc_sum_32_sse2(const uint8_t *ref) {
+  __m128i x0 = _mm_load_si128((__m128i const *)ref);
+  __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
+  const __m128i zero = _mm_setzero_si128();
+  x0 = _mm_sad_epu8(x0, zero);
+  x1 = _mm_sad_epu8(x1, zero);
+  x0 = _mm_add_epi16(x0, x1);
+  const __m128i high = _mm_unpackhi_epi64(x0, x0);
+  return _mm_add_epi16(x0, high);
+}
+
+void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  const __m128i top_sum = dc_sum_32_sse2(above);
+  __m128i left_sum = dc_sum_16_sse2(left);
+  left_sum = _mm_add_epi16(top_sum, left_sum);
+  uint32_t sum = _mm_cvtsi128_si32(left_sum);
+  sum += 24;
+  sum /= 48;
+
+  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  row_store_32xh(&row, 16, dst, stride);
+}
+
+void aom_dc_top_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m256i sum = dc_sum_32(above);
+  (void)left;
+
+  const __m256i sixteen = _mm256_set1_epi16(16);
+  sum = _mm256_add_epi16(sum, sixteen);
+  sum = _mm256_srai_epi16(sum, 5);
+  const __m256i zero = _mm256_setzero_si256();
+  __m256i row = _mm256_shuffle_epi8(sum, zero);
+  row_store_32xh(&row, 16, dst, stride);
+}
+
+void aom_dc_left_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i sum = dc_sum_16_sse2(left);
+  (void)above;
+
+  const __m128i eight = _mm_set1_epi16(8);
+  sum = _mm_add_epi16(sum, eight);
+  sum = _mm_srai_epi16(sum, 4);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i r = _mm_shuffle_epi8(sum, zero);
+  const __m256i row = _mm256_inserti128_si256(_mm256_castsi128_si256(r), r, 1);
+  row_store_32xh(&row, 16, dst, stride);
+}
+
+void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  row_store_32xh(&row, 16, dst, stride);
+}
+
+void aom_v_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m256i row = _mm256_loadu_si256((const __m256i *)above);
+  (void)left;
+  row_store_32xh(&row, 16, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// TM_PRED
+
+// Return 16 16-bit pixels in one row (__m256i)
+static INLINE __m256i paeth_pred(const __m256i *left, const __m256i *top,
+                                 const __m256i *topleft) {
+  const __m256i base =
+      _mm256_sub_epi16(_mm256_add_epi16(*top, *left), *topleft);
+
+  __m256i pl = _mm256_abs_epi16(_mm256_sub_epi16(base, *left));
+  __m256i pt = _mm256_abs_epi16(_mm256_sub_epi16(base, *top));
+  __m256i ptl = _mm256_abs_epi16(_mm256_sub_epi16(base, *topleft));
+
+  __m256i mask1 = _mm256_cmpgt_epi16(pl, pt);
+  mask1 = _mm256_or_si256(mask1, _mm256_cmpgt_epi16(pl, ptl));
+  __m256i mask2 = _mm256_cmpgt_epi16(pt, ptl);
+
+  pl = _mm256_andnot_si256(mask1, *left);
+
+  ptl = _mm256_and_si256(mask2, *topleft);
+  pt = _mm256_andnot_si256(mask2, *top);
+  pt = _mm256_or_si256(pt, ptl);
+  pt = _mm256_and_si256(mask1, pt);
+
+  return _mm256_or_si256(pt, pl);
+}
+
+// Return 16 8-bit pixels in one row (__m128i)
+static INLINE __m128i paeth_16x1_pred(const __m256i *left, const __m256i *top,
+                                      const __m256i *topleft) {
+  const __m256i p0 = paeth_pred(left, top, topleft);
+  const __m256i p1 = _mm256_permute4x64_epi64(p0, 0xe);
+  const __m256i p = _mm256_packus_epi16(p0, p1);
+  return _mm256_castsi256_si128(p);
+}
+
+static INLINE __m256i get_top_vector(const uint8_t *above) {
+  const __m128i x = _mm_load_si128((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t0 = _mm_unpacklo_epi8(x, zero);
+  const __m128i t1 = _mm_unpackhi_epi8(x, zero);
+  return _mm256_inserti128_si256(_mm256_castsi128_si256(t0), t1, 1);
+}
+
+void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  __m128i x = _mm_loadl_epi64((const __m128i *)left);
+  const __m256i l = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
+  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  __m256i rep = _mm256_set1_epi16(0x8000);
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i top = get_top_vector(above);
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+}
+
+static INLINE __m256i get_left_vector(const uint8_t *left) {
+  const __m128i x = _mm_load_si128((const __m128i *)left);
+  return _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
+}
+
+void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  const __m256i l = get_left_vector(left);
+  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  __m256i rep = _mm256_set1_epi16(0x8000);
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i top = get_top_vector(above);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m256i l = get_left_vector(left);
+  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  __m256i rep = _mm256_set1_epi16(0x8000);
+  const __m256i one = _mm256_set1_epi16(1);
+  const __m256i top = get_top_vector(above);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+
+  l = get_left_vector(left + 16);
+  rep = _mm256_set1_epi16(0x8000);
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+}
+
+// Return 32 8-bit pixels in one row (__m256i)
+static INLINE __m256i paeth_32x1_pred(const __m256i *left, const __m256i *top0,
+                                      const __m256i *top1,
+                                      const __m256i *topleft) {
+  __m256i p0 = paeth_pred(left, top0, topleft);
+  __m256i p1 = _mm256_permute4x64_epi64(p0, 0xe);
+  const __m256i x0 = _mm256_packus_epi16(p0, p1);
+
+  p0 = paeth_pred(left, top1, topleft);
+  p1 = _mm256_permute4x64_epi64(p0, 0xe);
+  const __m256i x1 = _mm256_packus_epi16(p0, p1);
+
+  return _mm256_permute2x128_si256(x0, x1, 0x20);
+}
+
+void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  const __m256i l = get_left_vector(left);
+  const __m256i t0 = get_top_vector(above);
+  const __m256i t1 = get_top_vector(above + 16);
+  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  __m256i rep = _mm256_set1_epi16(0x8000);
+  const __m256i one = _mm256_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+
+    const __m256i r = paeth_32x1_pred(&l16, &t0, &t1, &tl);
+
+    _mm256_storeu_si256((__m256i *)dst, r);
+
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m256i l = get_left_vector(left);
+  const __m256i t0 = get_top_vector(above);
+  const __m256i t1 = get_top_vector(above + 16);
+  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  __m256i rep = _mm256_set1_epi16(0x8000);
+  const __m256i one = _mm256_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+
+    const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
+    const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
+
+    _mm_store_si128((__m128i *)dst, r0);
+    _mm_store_si128((__m128i *)(dst + 16), r1);
+
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+
+  l = get_left_vector(left + 16);
+  rep = _mm256_set1_epi16(0x8000);
+  for (i = 0; i < 16; ++i) {
+    const __m256i l16 = _mm256_shuffle_epi8(l, rep);
+
+    const __m128i r0 = paeth_16x1_pred(&l16, &t0, &tl);
+    const __m128i r1 = paeth_16x1_pred(&l16, &t1, &tl);
+
+    _mm_store_si128((__m128i *)dst, r0);
+    _mm_store_si128((__m128i *)(dst + 16), r1);
+
+    dst += stride;
+    rep = _mm256_add_epi16(rep, one);
+  }
+}
diff --git a/third_party/aom/aom_dsp/x86/intrapred_sse2.asm b/third_party/aom/aom_dsp/x86/intrapred_sse2.asm
index 02567db49..9aece27be 100644
--- a/third_party/aom/aom_dsp/x86/intrapred_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/intrapred_sse2.asm
@@ -623,149 +623,3 @@ cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left
   lea                     dstq, [dstq+strideq*4]
   jnz .loop
   REP_RET
-
-INIT_XMM sse2
-cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left
-  pxor                  m1, m1
-  movq                  m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x
-  punpcklbw             m0, m1
-  pshuflw               m2, m0, 0x0   ; [63:0] tl tl tl tl [word]
-  psrldq                m0, 2
-  psubw                 m0, m2        ; [63:0] t1-tl t2-tl t3-tl t4-tl [word]
-  movd                  m2, [leftq]
-  punpcklbw             m2, m1
-  pshuflw               m4, m2, 0x0   ; [63:0] l1 l1 l1 l1 [word]
-  pshuflw               m3, m2, 0x55  ; [63:0] l2 l2 l2 l2 [word]
-  paddw                 m4, m0
-  paddw                 m3, m0
-  packuswb              m4, m4
-  packuswb              m3, m3
-  movd      [dstq        ], m4
-  movd      [dstq+strideq], m3
-  lea                 dstq, [dstq+strideq*2]
-  pshuflw               m4, m2, 0xaa
-  pshuflw               m3, m2, 0xff
-  paddw                 m4, m0
-  paddw                 m3, m0
-  packuswb              m4, m4
-  packuswb              m3, m3
-  movd      [dstq        ], m4
-  movd      [dstq+strideq], m3
-  RET
-
-INIT_XMM sse2
-cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left
-  pxor                  m1, m1
-  movd                  m2, [aboveq-1]
-  movq                  m0, [aboveq]
-  punpcklbw             m2, m1
-  punpcklbw             m0, m1        ; t1 t2 t3 t4 t5 t6 t7 t8 [word]
-  pshuflw               m2, m2, 0x0   ; [63:0] tl tl tl tl [word]
-  DEFINE_ARGS dst, stride, line, left
-  mov                lineq, -4
-  punpcklqdq            m2, m2        ; tl tl tl tl tl tl tl tl [word]
-  psubw                 m0, m2        ; t1-tl t2-tl ... t8-tl [word]
-  movq                  m2, [leftq]
-  punpcklbw             m2, m1        ; l1 l2 l3 l4 l5 l6 l7 l8 [word]
-.loop:
-  pshuflw               m4, m2, 0x0   ; [63:0] l1 l1 l1 l1 [word]
-  pshuflw               m3, m2, 0x55  ; [63:0] l2 l2 l2 l2 [word]
-  punpcklqdq            m4, m4        ; l1 l1 l1 l1 l1 l1 l1 l1 [word]
-  punpcklqdq            m3, m3        ; l2 l2 l2 l2 l2 l2 l2 l2 [word]
-  paddw                 m4, m0
-  paddw                 m3, m0
-  packuswb              m4, m3
-  movq      [dstq        ], m4
-  movhps    [dstq+strideq], m4
-  lea                 dstq, [dstq+strideq*2]
-  psrldq                m2, 4
-  inc                lineq
-  jnz .loop
-  REP_RET
-
-INIT_XMM sse2
-cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left
-  pxor                  m1, m1
-  mova                  m2, [aboveq-16];
-  mova                  m0, [aboveq]   ; t1 t2 ... t16 [byte]
-  punpckhbw             m2, m1         ; [127:112] tl [word]
-  punpckhbw             m4, m0, m1
-  punpcklbw             m0, m1         ; m0:m4 t1 t2 ... t16 [word]
-  DEFINE_ARGS dst, stride, line, left, stride8
-  mov                lineq, -8
-  pshufhw               m2, m2, 0xff
-  mova                  m3, [leftq]    ; l1 l2 ... l16 [byte]
-  punpckhqdq            m2, m2         ; tl repeated 8 times [word]
-  psubw                 m0, m2
-  psubw                 m4, m2         ; m0:m4 t1-tl t2-tl ... t16-tl [word]
-  punpckhbw             m5, m3, m1
-  punpcklbw             m3, m1         ; m3:m5 l1 l2 ... l16 [word]
-  lea             stride8q, [strideq*8]
-.loop:
-  pshuflw               m6, m3, 0x0
-  pshuflw               m7, m5, 0x0
-  punpcklqdq            m6, m6         ; l1 repeated 8 times [word]
-  punpcklqdq            m7, m7         ; l8 repeated 8 times [word]
-  paddw                 m1, m6, m0
-  paddw                 m6, m4         ; m1:m6 ti-tl+l1 [i=1,15] [word]
-  psrldq                m5, 2
-  packuswb              m1, m6
-  mova     [dstq         ], m1
-  paddw                 m1, m7, m0
-  paddw                 m7, m4         ; m1:m7 ti-tl+l8 [i=1,15] [word]
-  psrldq                m3, 2
-  packuswb              m1, m7
-  mova     [dstq+stride8q], m1
-  inc                lineq
-  lea                 dstq, [dstq+strideq]
-  jnz .loop
-  REP_RET
-
-INIT_XMM sse2
-cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left
-  pxor                  m1, m1
-  movd                  m2, [aboveq-1]
-  mova                  m0, [aboveq]
-  mova                  m4, [aboveq+16]
-  punpcklbw             m2, m1
-  punpckhbw             m3, m0, m1
-  punpckhbw             m5, m4, m1
-  punpcklbw             m0, m1
-  punpcklbw             m4, m1
-  pshuflw               m2, m2, 0x0
-  DEFINE_ARGS dst, stride, line, left
-  mov                lineq, -16
-  punpcklqdq            m2, m2
-  add                leftq, 32
-  psubw                 m0, m2
-  psubw                 m3, m2
-  psubw                 m4, m2
-  psubw                 m5, m2
-.loop:
-  movd                  m2, [leftq+lineq*2]
-  pxor                  m1, m1
-  punpcklbw             m2, m1
-  pshuflw               m7, m2, 0x55
-  pshuflw               m2, m2, 0x0
-  punpcklqdq            m2, m2
-  punpcklqdq            m7, m7
-  paddw                 m6, m2, m3
-  paddw                 m1, m2, m0
-  packuswb              m1, m6
-  mova   [dstq           ], m1
-  paddw                 m6, m2, m5
-  paddw                 m1, m2, m4
-  packuswb              m1, m6
-  mova   [dstq+16        ], m1
-  paddw                 m6, m7, m3
-  paddw                 m1, m7, m0
-  packuswb              m1, m6
-  mova   [dstq+strideq   ], m1
-  paddw                 m6, m7, m5
-  paddw                 m1, m7, m4
-  packuswb              m1, m6
-  mova   [dstq+strideq+16], m1
-  lea                 dstq, [dstq+strideq*2]
-  inc                lineq
-  jnz .loop
-  REP_RET
diff --git a/third_party/aom/aom_dsp/x86/intrapred_sse2.c b/third_party/aom/aom_dsp/x86/intrapred_sse2.c
new file mode 100644
index 000000000..2a83b9001
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/intrapred_sse2.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+
+static INLINE void dc_store_4x8(uint32_t dc, uint8_t *dst, ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < 4; ++i) {
+    *(uint32_t *)dst = dc;
+    dst += stride;
+    *(uint32_t *)dst = dc;
+    dst += stride;
+  }
+}
+
+static INLINE void dc_store_8xh(const __m128i *row, int height, uint8_t *dst,
+                                ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < height; ++i) {
+    _mm_storel_epi64((__m128i *)dst, *row);
+    dst += stride;
+  }
+}
+
+static INLINE void dc_store_16xh(const __m128i *row, int height, uint8_t *dst,
+                                 ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < height; ++i) {
+    _mm_store_si128((__m128i *)dst, *row);
+    dst += stride;
+  }
+}
+
+static INLINE void dc_store_32xh(const __m128i *row, int height, uint8_t *dst,
+                                 ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < height; ++i) {
+    _mm_store_si128((__m128i *)dst, *row);
+    _mm_store_si128((__m128i *)(dst + 16), *row);
+    dst += stride;
+  }
+}
+
+static INLINE __m128i dc_sum_4(const uint8_t *ref) {
+  __m128i x = _mm_loadl_epi64((__m128i const *)ref);
+  const __m128i zero = _mm_setzero_si128();
+  x = _mm_unpacklo_epi8(x, zero);
+  return _mm_sad_epu8(x, zero);
+}
+
+static INLINE __m128i dc_sum_8(const uint8_t *ref) {
+  __m128i x = _mm_loadl_epi64((__m128i const *)ref);
+  const __m128i zero = _mm_setzero_si128();
+  return _mm_sad_epu8(x, zero);
+}
+
+static INLINE __m128i dc_sum_16(const uint8_t *ref) {
+  __m128i x = _mm_load_si128((__m128i const *)ref);
+  const __m128i zero = _mm_setzero_si128();
+  x = _mm_sad_epu8(x, zero);
+  const __m128i high = _mm_unpackhi_epi64(x, x);
+  return _mm_add_epi16(x, high);
+}
+
+static INLINE __m128i dc_sum_32(const uint8_t *ref) {
+  __m128i x0 = _mm_load_si128((__m128i const *)ref);
+  __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
+  const __m128i zero = _mm_setzero_si128();
+  x0 = _mm_sad_epu8(x0, zero);
+  x1 = _mm_sad_epu8(x1, zero);
+  x0 = _mm_add_epi16(x0, x1);
+  const __m128i high = _mm_unpackhi_epi64(x0, x0);
+  return _mm_add_epi16(x0, high);
+}
+
+// -----------------------------------------------------------------------------
+// DC_PRED
+
+void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  const __m128i sum_left = dc_sum_8(left);
+  __m128i sum_above = dc_sum_4(above);
+  sum_above = _mm_add_epi16(sum_left, sum_above);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 6;
+  sum /= 12;
+
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const uint32_t pred = _mm_cvtsi128_si32(row);
+  dc_store_4x8(pred, dst, stride);
+}
+
+void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  const __m128i sum_left = dc_sum_4(left);
+  __m128i sum_above = dc_sum_8(above);
+  sum_above = _mm_add_epi16(sum_above, sum_left);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 6;
+  sum /= 12;
+
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  dc_store_8xh(&row, 4, dst, stride);
+}
+
+void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m128i sum_left = dc_sum_16(left);
+  __m128i sum_above = dc_sum_8(above);
+  sum_above = _mm_add_epi16(sum_above, sum_left);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 12;
+  sum /= 24;
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  dc_store_8xh(&row, 16, dst, stride);
+}
+
+void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m128i sum_left = dc_sum_8(left);
+  __m128i sum_above = dc_sum_16(above);
+  sum_above = _mm_add_epi16(sum_above, sum_left);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 12;
+  sum /= 24;
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  dc_store_16xh(&row, 8, dst, stride);
+}
+
+void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  const __m128i sum_left = dc_sum_32(left);
+  __m128i sum_above = dc_sum_16(above);
+  sum_above = _mm_add_epi16(sum_left, sum_above);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 24;
+  sum /= 48;
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  dc_store_16xh(&row, 32, dst, stride);
+}
+
+void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                 const uint8_t *above, const uint8_t *left) {
+  __m128i sum_above = dc_sum_32(above);
+  const __m128i sum_left = dc_sum_16(left);
+  sum_above = _mm_add_epi16(sum_above, sum_left);
+
+  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  sum += 24;
+  sum /= 48;
+  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  dc_store_32xh(&row, 16, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// DC_TOP
+
+void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_4(above);
+  const __m128i two = _mm_set1_epi16((int16_t)2);
+  sum_above = _mm_add_epi16(sum_above, two);
+  sum_above = _mm_srai_epi16(sum_above, 2);
+  sum_above = _mm_shufflelo_epi16(sum_above, 0);
+  sum_above = _mm_packus_epi16(sum_above, sum_above);
+
+  const uint32_t pred = _mm_cvtsi128_si32(sum_above);
+  dc_store_4x8(pred, dst, stride);
+}
+
+void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_8(above);
+  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  sum_above = _mm_add_epi16(sum_above, four);
+  sum_above = _mm_srai_epi16(sum_above, 3);
+  sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
+  const __m128i row = _mm_shufflelo_epi16(sum_above, 0);
+  dc_store_8xh(&row, 4, dst, stride);
+}
+
+void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_8(above);
+  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  sum_above = _mm_add_epi16(sum_above, four);
+  sum_above = _mm_srai_epi16(sum_above, 3);
+  sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
+  const __m128i row = _mm_shufflelo_epi16(sum_above, 0);
+  dc_store_8xh(&row, 16, dst, stride);
+}
+
+void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_16(above);
+  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  sum_above = _mm_add_epi16(sum_above, eight);
+  sum_above = _mm_srai_epi16(sum_above, 4);
+  sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
+  sum_above = _mm_shufflelo_epi16(sum_above, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
+  dc_store_16xh(&row, 8, dst, stride);
+}
+
+void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_16(above);
+  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  sum_above = _mm_add_epi16(sum_above, eight);
+  sum_above = _mm_srai_epi16(sum_above, 4);
+  sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
+  sum_above = _mm_shufflelo_epi16(sum_above, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
+  dc_store_16xh(&row, 32, dst, stride);
+}
+
+void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)left;
+  __m128i sum_above = dc_sum_32(above);
+  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  sum_above = _mm_add_epi16(sum_above, sixteen);
+  sum_above = _mm_srai_epi16(sum_above, 5);
+  sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
+  sum_above = _mm_shufflelo_epi16(sum_above, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_above, sum_above);
+  dc_store_32xh(&row, 16, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// DC_LEFT
+
+void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_8(left);
+  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  sum_left = _mm_add_epi16(sum_left, four);
+  sum_left = _mm_srai_epi16(sum_left, 3);
+  sum_left = _mm_shufflelo_epi16(sum_left, 0);
+  sum_left = _mm_packus_epi16(sum_left, sum_left);
+
+  const uint32_t pred = _mm_cvtsi128_si32(sum_left);
+  dc_store_4x8(pred, dst, stride);
+}
+
+void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_4(left);
+  const __m128i two = _mm_set1_epi16((uint16_t)2);
+  sum_left = _mm_add_epi16(sum_left, two);
+  sum_left = _mm_srai_epi16(sum_left, 2);
+  sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
+  const __m128i row = _mm_shufflelo_epi16(sum_left, 0);
+  dc_store_8xh(&row, 4, dst, stride);
+}
+
+void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_16(left);
+  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  sum_left = _mm_add_epi16(sum_left, eight);
+  sum_left = _mm_srai_epi16(sum_left, 4);
+  sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
+  const __m128i row = _mm_shufflelo_epi16(sum_left, 0);
+  dc_store_8xh(&row, 16, dst, stride);
+}
+
+void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_8(left);
+  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  sum_left = _mm_add_epi16(sum_left, four);
+  sum_left = _mm_srai_epi16(sum_left, 3);
+  sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
+  sum_left = _mm_shufflelo_epi16(sum_left, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
+  dc_store_16xh(&row, 8, dst, stride);
+}
+
+void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_32(left);
+  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  sum_left = _mm_add_epi16(sum_left, sixteen);
+  sum_left = _mm_srai_epi16(sum_left, 5);
+  sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
+  sum_left = _mm_shufflelo_epi16(sum_left, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
+  dc_store_16xh(&row, 32, dst, stride);
+}
+
+void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  (void)above;
+  __m128i sum_left = dc_sum_16(left);
+  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  sum_left = _mm_add_epi16(sum_left, eight);
+  sum_left = _mm_srai_epi16(sum_left, 4);
+  sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
+  sum_left = _mm_shufflelo_epi16(sum_left, 0);
+  const __m128i row = _mm_unpacklo_epi64(sum_left, sum_left);
+  dc_store_32xh(&row, 16, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// DC_128
+
+void aom_dc_128_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const uint32_t pred = 0x80808080;
+  dc_store_4x8(pred, dst, stride);
+}
+
+void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  dc_store_8xh(&row, 4, dst, stride);
+}
+
+void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  dc_store_8xh(&row, 16, dst, stride);
+}
+
+void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  dc_store_16xh(&row, 8, dst, stride);
+}
+
+void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  dc_store_16xh(&row, 32, dst, stride);
+}
+
+void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  (void)above;
+  (void)left;
+  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  dc_store_32xh(&row, 16, dst, stride);
+}
+
+// -----------------------------------------------------------------------------
+// V_PRED
+
+void aom_v_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const uint32_t pred = *(uint32_t *)above;
+  (void)left;
+  dc_store_4x8(pred, dst, stride);
+}
+
+void aom_v_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  const __m128i row = _mm_loadl_epi64((__m128i const *)above);
+  (void)left;
+  dc_store_8xh(&row, 4, dst, stride);
+}
+
+void aom_v_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  const __m128i row = _mm_loadl_epi64((__m128i const *)above);
+  (void)left;
+  dc_store_8xh(&row, 16, dst, stride);
+}
+
+void aom_v_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  const __m128i row = _mm_load_si128((__m128i const *)above);
+  (void)left;
+  dc_store_16xh(&row, 8, dst, stride);
+}
+
+void aom_v_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m128i row = _mm_load_si128((__m128i const *)above);
+  (void)left;
+  dc_store_16xh(&row, 32, dst, stride);
+}
+
+void aom_v_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  const __m128i row0 = _mm_load_si128((__m128i const *)above);
+  const __m128i row1 = _mm_load_si128((__m128i const *)(above + 16));
+  (void)left;
+  int i;
+  for (i = 0; i < 16; ++i) {
+    _mm_store_si128((__m128i *)dst, row0);
+    _mm_store_si128((__m128i *)(dst + 16), row1);
+    dst += stride;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// H_PRED
+
+void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  __m128i left_col = _mm_loadl_epi64((__m128i const *)left);
+  left_col = _mm_unpacklo_epi8(left_col, left_col);
+  __m128i row0 = _mm_shufflelo_epi16(left_col, 0);
+  __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
+  __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
+  __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  dst += stride;
+  left_col = _mm_unpackhi_epi64(left_col, left_col);
+  row0 = _mm_shufflelo_epi16(left_col, 0);
+  row1 = _mm_shufflelo_epi16(left_col, 0x55);
+  row2 = _mm_shufflelo_epi16(left_col, 0xaa);
+  row3 = _mm_shufflelo_epi16(left_col, 0xff);
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  dst += stride;
+  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+}
+
+void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  __m128i left_col = _mm_loadl_epi64((__m128i const *)left);
+  left_col = _mm_unpacklo_epi8(left_col, left_col);
+  __m128i row0 = _mm_shufflelo_epi16(left_col, 0);
+  __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
+  __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
+  __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+}
+
+void aom_h_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  const __m128i left_col = _mm_load_si128((__m128i const *)left);
+  __m128i left_col_low = _mm_unpacklo_epi8(left_col, left_col);
+  __m128i left_col_high = _mm_unpackhi_epi8(left_col, left_col);
+
+  __m128i row0 = _mm_shufflelo_epi16(left_col_low, 0);
+  __m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
+  __m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
+  __m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+  dst += stride;
+
+  left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
+  row0 = _mm_shufflelo_epi16(left_col_low, 0);
+  row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
+  row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
+  row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+  dst += stride;
+
+  row0 = _mm_shufflelo_epi16(left_col_high, 0);
+  row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
+  row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
+  row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+  dst += stride;
+
+  left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
+  row0 = _mm_shufflelo_epi16(left_col_high, 0);
+  row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
+  row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
+  row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
+  _mm_storel_epi64((__m128i *)dst, row0);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row1);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row2);
+  dst += stride;
+  _mm_storel_epi64((__m128i *)dst, row3);
+}
+
+static INLINE void h_pred_store_16xh(const __m128i *row, int h, uint8_t *dst,
+                                     ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < h; ++i) {
+    _mm_store_si128((__m128i *)dst, row[i]);
+    dst += stride;
+  }
+}
+
+static INLINE void repeat_low_4pixels(const __m128i *x, __m128i *row) {
+  const __m128i u0 = _mm_shufflelo_epi16(*x, 0);
+  const __m128i u1 = _mm_shufflelo_epi16(*x, 0x55);
+  const __m128i u2 = _mm_shufflelo_epi16(*x, 0xaa);
+  const __m128i u3 = _mm_shufflelo_epi16(*x, 0xff);
+
+  row[0] = _mm_unpacklo_epi64(u0, u0);
+  row[1] = _mm_unpacklo_epi64(u1, u1);
+  row[2] = _mm_unpacklo_epi64(u2, u2);
+  row[3] = _mm_unpacklo_epi64(u3, u3);
+}
+
+static INLINE void repeat_high_4pixels(const __m128i *x, __m128i *row) {
+  const __m128i u0 = _mm_shufflehi_epi16(*x, 0);
+  const __m128i u1 = _mm_shufflehi_epi16(*x, 0x55);
+  const __m128i u2 = _mm_shufflehi_epi16(*x, 0xaa);
+  const __m128i u3 = _mm_shufflehi_epi16(*x, 0xff);
+
+  row[0] = _mm_unpackhi_epi64(u0, u0);
+  row[1] = _mm_unpackhi_epi64(u1, u1);
+  row[2] = _mm_unpackhi_epi64(u2, u2);
+  row[3] = _mm_unpackhi_epi64(u3, u3);
+}
+
+// Process 16x8, first 4 rows
+// Use first 8 bytes of left register: xxxxxxxx33221100
+static INLINE void h_prediction_16x8_1(const __m128i *left, uint8_t *dst,
+                                       ptrdiff_t stride) {
+  __m128i row[4];
+  repeat_low_4pixels(left, row);
+  h_pred_store_16xh(row, 4, dst, stride);
+}
+
+// Process 16x8, second 4 rows
+// Use second 8 bytes of left register: 77665544xxxxxxxx
+static INLINE void h_prediction_16x8_2(const __m128i *left, uint8_t *dst,
+                                       ptrdiff_t stride) {
+  __m128i row[4];
+  repeat_high_4pixels(left, row);
+  h_pred_store_16xh(row, 4, dst, stride);
+}
+
+void aom_h_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
+                               const uint8_t *above, const uint8_t *left) {
+  (void)above;
+  const __m128i left_col = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
+  h_prediction_16x8_1(&left_col_8p, dst, stride);
+  dst += stride << 2;
+  h_prediction_16x8_2(&left_col_8p, dst, stride);
+}
+
+void aom_h_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  __m128i left_col, left_col_8p;
+  (void)above;
+  int i = 0;
+
+  do {
+    left_col = _mm_load_si128((const __m128i *)left);
+    left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
+    h_prediction_16x8_1(&left_col_8p, dst, stride);
+    dst += stride << 2;
+    h_prediction_16x8_2(&left_col_8p, dst, stride);
+    dst += stride << 2;
+
+    left_col_8p = _mm_unpackhi_epi8(left_col, left_col);
+    h_prediction_16x8_1(&left_col_8p, dst, stride);
+    dst += stride << 2;
+    h_prediction_16x8_2(&left_col_8p, dst, stride);
+    dst += stride << 2;
+
+    left += 16;
+    i++;
+  } while (i < 2);
+}
+
+static INLINE void h_pred_store_32xh(const __m128i *row, int h, uint8_t *dst,
+                                     ptrdiff_t stride) {
+  int i;
+  for (i = 0; i < h; ++i) {
+    _mm_store_si128((__m128i *)dst, row[i]);
+    _mm_store_si128((__m128i *)(dst + 16), row[i]);
+    dst += stride;
+  }
+}
+
+// Process 32x8, first 4 rows
+// Use first 8 bytes of left register: xxxxxxxx33221100
+static INLINE void h_prediction_32x8_1(const __m128i *left, uint8_t *dst,
+                                       ptrdiff_t stride) {
+  __m128i row[4];
+  repeat_low_4pixels(left, row);
+  h_pred_store_32xh(row, 4, dst, stride);
+}
+
+// Process 32x8, second 4 rows
+// Use second 8 bytes of left register: 77665544xxxxxxxx
+static INLINE void h_prediction_32x8_2(const __m128i *left, uint8_t *dst,
+                                       ptrdiff_t stride) {
+  __m128i row[4];
+  repeat_high_4pixels(left, row);
+  h_pred_store_32xh(row, 4, dst, stride);
+}
+
+void aom_h_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
+                                const uint8_t *above, const uint8_t *left) {
+  __m128i left_col, left_col_8p;
+  (void)above;
+
+  left_col = _mm_load_si128((const __m128i *)left);
+
+  left_col_8p = _mm_unpacklo_epi8(left_col, left_col);
+  h_prediction_32x8_1(&left_col_8p, dst, stride);
+  dst += stride << 2;
+  h_prediction_32x8_2(&left_col_8p, dst, stride);
+  dst += stride << 2;
+
+  left_col_8p = _mm_unpackhi_epi8(left_col, left_col);
+  h_prediction_32x8_1(&left_col_8p, dst, stride);
+  dst += stride << 2;
+  h_prediction_32x8_2(&left_col_8p, dst, stride);
+}
diff --git a/third_party/aom/aom_dsp/x86/intrapred_ssse3.c b/third_party/aom/aom_dsp/x86/intrapred_ssse3.c
new file mode 100644
index 000000000..85b82744e
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/intrapred_ssse3.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <tmmintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+#include "aom_dsp/intrapred_common.h"
+
+// -----------------------------------------------------------------------------
+// TM_PRED
+
+// Return 8 16-bit pixels in one row
+static INLINE __m128i paeth_8x1_pred(const __m128i *left, const __m128i *top,
+                                     const __m128i *topleft) {
+  const __m128i base = _mm_sub_epi16(_mm_add_epi16(*top, *left), *topleft);
+
+  __m128i pl = _mm_abs_epi16(_mm_sub_epi16(base, *left));
+  __m128i pt = _mm_abs_epi16(_mm_sub_epi16(base, *top));
+  __m128i ptl = _mm_abs_epi16(_mm_sub_epi16(base, *topleft));
+
+  __m128i mask1 = _mm_cmpgt_epi16(pl, pt);
+  mask1 = _mm_or_si128(mask1, _mm_cmpgt_epi16(pl, ptl));
+  __m128i mask2 = _mm_cmpgt_epi16(pt, ptl);
+
+  pl = _mm_andnot_si128(mask1, *left);
+
+  ptl = _mm_and_si128(mask2, *topleft);
+  pt = _mm_andnot_si128(mask2, *top);
+  pt = _mm_or_si128(pt, ptl);
+  pt = _mm_and_si128(mask1, pt);
+
+  return _mm_or_si128(pl, pt);
+}
+
+void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i t = _mm_loadl_epi64((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t16 = _mm_unpacklo_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
+
+    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i t = _mm_loadl_epi64((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t16 = _mm_unpacklo_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
+
+    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i t = _mm_loadl_epi64((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t16 = _mm_unpacklo_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
+
+    _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                   const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i t = _mm_loadl_epi64((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t16 = _mm_unpacklo_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
+
+    _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_load_si128((const __m128i *)left);
+  const __m128i t = _mm_loadl_epi64((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t16 = _mm_unpacklo_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
+
+    _mm_storel_epi64((__m128i *)dst, _mm_packus_epi16(row, row));
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+// Return 16 8-bit pixels in one row
+static INLINE __m128i paeth_16x1_pred(const __m128i *left, const __m128i *top0,
+                                      const __m128i *top1,
+                                      const __m128i *topleft) {
+  const __m128i p0 = paeth_8x1_pred(left, top0, topleft);
+  const __m128i p1 = paeth_8x1_pred(left, top1, topleft);
+  return _mm_packus_epi16(p0, p1);
+}
+
+void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i l = _mm_loadl_epi64((const __m128i *)left);
+  const __m128i t = _mm_load_si128((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top0 = _mm_unpacklo_epi8(t, zero);
+  const __m128i top1 = _mm_unpackhi_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i l = _mm_load_si128((const __m128i *)left);
+  const __m128i t = _mm_load_si128((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top0 = _mm_unpacklo_epi8(t, zero);
+  const __m128i top1 = _mm_unpackhi_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    const __m128i l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i l = _mm_load_si128((const __m128i *)left);
+  const __m128i t = _mm_load_si128((const __m128i *)above);
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top0 = _mm_unpacklo_epi8(t, zero);
+  const __m128i top1 = _mm_unpackhi_epi8(t, zero);
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i l16;
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+
+  l = _mm_load_si128((const __m128i *)(left + 16));
+  rep = _mm_set1_epi16(0x8000);
+  for (i = 0; i < 16; ++i) {
+    l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i row = paeth_16x1_pred(&l16, &top0, &top1, &tl16);
+
+    _mm_store_si128((__m128i *)dst, row);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  const __m128i a = _mm_load_si128((const __m128i *)above);
+  const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i al = _mm_unpacklo_epi8(a, zero);
+  const __m128i ah = _mm_unpackhi_epi8(a, zero);
+  const __m128i bl = _mm_unpacklo_epi8(b, zero);
+  const __m128i bh = _mm_unpackhi_epi8(b, zero);
+
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i l = _mm_load_si128((const __m128i *)left);
+  __m128i l16;
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
+    const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
+
+    _mm_store_si128((__m128i *)dst, r32l);
+    _mm_store_si128((__m128i *)(dst + 16), r32h);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  const __m128i a = _mm_load_si128((const __m128i *)above);
+  const __m128i b = _mm_load_si128((const __m128i *)(above + 16));
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i al = _mm_unpacklo_epi8(a, zero);
+  const __m128i ah = _mm_unpackhi_epi8(a, zero);
+  const __m128i bl = _mm_unpacklo_epi8(b, zero);
+  const __m128i bh = _mm_unpackhi_epi8(b, zero);
+
+  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  const __m128i one = _mm_set1_epi16(1);
+  __m128i l = _mm_load_si128((const __m128i *)left);
+  __m128i l16;
+
+  int i;
+  for (i = 0; i < 16; ++i) {
+    l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
+    const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
+
+    _mm_store_si128((__m128i *)dst, r32l);
+    _mm_store_si128((__m128i *)(dst + 16), r32h);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+
+  rep = _mm_set1_epi16(0x8000);
+  l = _mm_load_si128((const __m128i *)(left + 16));
+  for (i = 0; i < 16; ++i) {
+    l16 = _mm_shuffle_epi8(l, rep);
+    const __m128i r32l = paeth_16x1_pred(&l16, &al, &ah, &tl16);
+    const __m128i r32h = paeth_16x1_pred(&l16, &bl, &bh, &tl16);
+
+    _mm_store_si128((__m128i *)dst, r32l);
+    _mm_store_si128((__m128i *)(dst + 16), r32h);
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// SMOOTH_PRED
+
+// pixels[0]: above and below_pred interleave vector
+// pixels[1]: left vector
+// pixels[2]: right_pred vector
+static INLINE void load_pixel_w4(const uint8_t *above, const uint8_t *left,
+                                 int height, __m128i *pixels) {
+  __m128i d = _mm_loadl_epi64((const __m128i *)above);
+  pixels[2] = _mm_set1_epi16((uint16_t)above[3]);
+  pixels[1] = _mm_loadl_epi64((const __m128i *)left);
+
+  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i zero = _mm_setzero_si128();
+  d = _mm_unpacklo_epi8(d, zero);
+  pixels[0] = _mm_unpacklo_epi16(d, bp);
+}
+
+// weights[0]: weights_h vector
+// weights[1]: scale - weights_h vecotr
+// weights[2]: weights_w and scale - weights_w interleave vector
+static INLINE void load_weight_w4(const uint8_t *weight_array, int height,
+                                  __m128i *weights) {
+  __m128i t = _mm_loadu_si128((const __m128i *)&weight_array[4]);
+  const __m128i zero = _mm_setzero_si128();
+
+  weights[0] = _mm_unpacklo_epi8(t, zero);
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  weights[1] = _mm_sub_epi16(d, weights[0]);
+  weights[2] = _mm_unpacklo_epi16(weights[0], weights[1]);
+
+  if (height == 8) {
+    t = _mm_srli_si128(t, 4);
+    weights[0] = _mm_unpacklo_epi8(t, zero);
+    weights[1] = _mm_sub_epi16(d, weights[0]);
+  }
+}
+
+static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *weight,
+                                   int h, uint8_t *dst, ptrdiff_t stride) {
+  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i inc = _mm_set1_epi16(0x202);
+  const __m128i gat = _mm_set1_epi32(0xc080400);
+  __m128i rep = _mm_set1_epi16(0x8000);
+  __m128i d = _mm_set1_epi16(0x100);
+
+  int i;
+  for (i = 0; i < h; ++i) {
+    const __m128i wg_wg = _mm_shuffle_epi8(weight[0], d);
+    const __m128i sc_sc = _mm_shuffle_epi8(weight[1], d);
+    const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
+    __m128i s = _mm_madd_epi16(pixel[0], wh_sc);
+
+    __m128i b = _mm_shuffle_epi8(pixel[1], rep);
+    b = _mm_unpacklo_epi16(b, pixel[2]);
+    __m128i sum = _mm_madd_epi16(b, weight[2]);
+
+    sum = _mm_add_epi32(s, sum);
+    sum = _mm_add_epi32(sum, round);
+    sum = _mm_srai_epi32(sum, 1 + sm_weight_log2_scale);
+
+    sum = _mm_shuffle_epi8(sum, gat);
+    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    dst += stride;
+
+    rep = _mm_add_epi16(rep, one);
+    d = _mm_add_epi16(d, inc);
+  }
+}
+
+void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i pixels[3];
+  load_pixel_w4(above, left, 4, pixels);
+
+  __m128i weights[3];
+  load_weight_w4(sm_weight_arrays, 4, weights);
+
+  smooth_pred_4xh(pixels, weights, 4, dst, stride);
+}
+
+void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i pixels[3];
+  load_pixel_w4(above, left, 8, pixels);
+
+  __m128i weights[3];
+  load_weight_w4(sm_weight_arrays, 8, weights);
+
+  smooth_pred_4xh(pixels, weights, 8, dst, stride);
+}
+
+// pixels[0]: above and below_pred interleave vector, first half
+// pixels[1]: above and below_pred interleave vector, second half
+// pixels[2]: left vector
+// pixels[3]: right_pred vector
+static INLINE void load_pixel_w8(const uint8_t *above, const uint8_t *left,
+                                 int height, __m128i *pixels) {
+  __m128i d = _mm_loadl_epi64((const __m128i *)above);
+  pixels[3] = _mm_set1_epi16((uint16_t)above[7]);
+  pixels[2] = _mm_load_si128((const __m128i *)left);
+  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i zero = _mm_setzero_si128();
+
+  d = _mm_unpacklo_epi8(d, zero);
+  pixels[0] = _mm_unpacklo_epi16(d, bp);
+  pixels[1] = _mm_unpackhi_epi16(d, bp);
+}
+
+// weight_h[0]: weight_h vector
+// weight_h[1]: scale - weight_h vector
+// weight_h[2]: same as [0], second half for height = 16 only
+// weight_h[3]: same as [1], second half for height = 16 only
+// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
+// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
+static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
+                                  __m128i *weight_h, __m128i *weight_w) {
+  const __m128i zero = _mm_setzero_si128();
+  const int we_offset = height < 8 ? 4 : 8;
+  __m128i we = _mm_loadu_si128((const __m128i *)&weight_array[we_offset]);
+  weight_h[0] = _mm_unpacklo_epi8(we, zero);
+
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+
+  if (height == 4) {
+    we = _mm_srli_si128(we, 4);
+    __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
+    __m128i tmp2 = _mm_sub_epi16(d, tmp1);
+    weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
+    weight_w[1] = _mm_unpackhi_epi16(tmp1, tmp2);
+  } else {
+    weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
+    weight_w[1] = _mm_unpackhi_epi16(weight_h[0], weight_h[1]);
+  }
+
+  if (height == 16) {
+    we = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+    weight_h[0] = _mm_unpacklo_epi8(we, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+    weight_h[2] = _mm_unpackhi_epi8(we, zero);
+    weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
+  }
+}
+
+static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
+                                   const __m128i *ww, int h, uint8_t *dst,
+                                   ptrdiff_t stride, int second_half) {
+  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i inc = _mm_set1_epi16(0x202);
+  const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
+
+  __m128i rep = second_half ? _mm_set1_epi16(0x8008) : _mm_set1_epi16(0x8000);
+  __m128i d = _mm_set1_epi16(0x100);
+
+  int i;
+  for (i = 0; i < h; ++i) {
+    const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
+    const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
+    const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
+    __m128i s0 = _mm_madd_epi16(pixels[0], wh_sc);
+    __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
+
+    __m128i b = _mm_shuffle_epi8(pixels[2], rep);
+    b = _mm_unpacklo_epi16(b, pixels[3]);
+    __m128i sum0 = _mm_madd_epi16(b, ww[0]);
+    __m128i sum1 = _mm_madd_epi16(b, ww[1]);
+
+    s0 = _mm_add_epi32(s0, sum0);
+    s0 = _mm_add_epi32(s0, round);
+    s0 = _mm_srai_epi32(s0, 1 + sm_weight_log2_scale);
+
+    s1 = _mm_add_epi32(s1, sum1);
+    s1 = _mm_add_epi32(s1, round);
+    s1 = _mm_srai_epi32(s1, 1 + sm_weight_log2_scale);
+
+    sum0 = _mm_packus_epi16(s0, s1);
+    sum0 = _mm_shuffle_epi8(sum0, gat);
+    _mm_storel_epi64((__m128i *)dst, sum0);
+    dst += stride;
+
+    rep = _mm_add_epi16(rep, one);
+    d = _mm_add_epi16(d, inc);
+  }
+}
+
+void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i pixels[4];
+  load_pixel_w8(above, left, 4, pixels);
+
+  __m128i wh[4], ww[2];
+  load_weight_w8(sm_weight_arrays, 4, wh, ww);
+
+  smooth_pred_8xh(pixels, wh, ww, 4, dst, stride, 0);
+}
+
+void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                    const uint8_t *above, const uint8_t *left) {
+  __m128i pixels[4];
+  load_pixel_w8(above, left, 8, pixels);
+
+  __m128i wh[4], ww[2];
+  load_weight_w8(sm_weight_arrays, 8, wh, ww);
+
+  smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
+}
+
+void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i pixels[4];
+  load_pixel_w8(above, left, 16, pixels);
+
+  __m128i wh[4], ww[2];
+  load_weight_w8(sm_weight_arrays, 16, wh, ww);
+
+  smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
+  dst += stride << 3;
+  smooth_pred_8xh(pixels, &wh[2], ww, 8, dst, stride, 1);
+}
+
+// pixels[0]: above and below_pred interleave vector, 1/4
+// pixels[1]: above and below_pred interleave vector, 2/4
+// pixels[2]: above and below_pred interleave vector, 3/4
+// pixels[3]: above and below_pred interleave vector, 3/4
+// pixels[4]: left vector
+// pixels[5]: left vector, h = 32 only
+// pixels[6]: right_pred vector
+static INLINE void load_pixel_w16(const uint8_t *above, const uint8_t *left,
+                                  int height, __m128i *pixels) {
+  __m128i ab = _mm_load_si128((const __m128i *)above);
+  pixels[6] = _mm_set1_epi16((uint16_t)above[15]);
+  pixels[4] = _mm_load_si128((const __m128i *)left);
+  pixels[5] = _mm_load_si128((const __m128i *)(left + 16));
+  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i x = _mm_unpacklo_epi8(ab, zero);
+  pixels[0] = _mm_unpacklo_epi16(x, bp);
+  pixels[1] = _mm_unpackhi_epi16(x, bp);
+
+  x = _mm_unpackhi_epi8(ab, zero);
+  pixels[2] = _mm_unpacklo_epi16(x, bp);
+  pixels[3] = _mm_unpackhi_epi16(x, bp);
+}
+
+// weight_h[0]: weight_h vector
+// weight_h[1]: scale - weight_h vector
+// weight_h[2]: same as [0], second half for height = 16 only
+// weight_h[3]: same as [1], second half for height = 16 only
+// ... ...
+// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
+// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
+// ... ...
+static INLINE void load_weight_w16(const uint8_t *weight_array, int height,
+                                   __m128i *weight_h, __m128i *weight_w) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i w8 = _mm_loadu_si128((const __m128i *)&weight_array[8]);
+  __m128i w16 = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+  __m128i w32_0 = _mm_loadu_si128((const __m128i *)&weight_array[32]);
+  __m128i w32_1 = _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+
+  if (height == 8) {
+    weight_h[0] = _mm_unpacklo_epi8(w8, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);  // scale - weight_h
+
+    __m128i x = _mm_unpacklo_epi8(w16, zero);
+    __m128i y = _mm_sub_epi16(d, x);
+    weight_w[0] = _mm_unpacklo_epi16(x, y);
+    weight_w[1] = _mm_unpackhi_epi16(x, y);
+    x = _mm_unpackhi_epi8(w16, zero);
+    y = _mm_sub_epi16(d, x);
+    weight_w[2] = _mm_unpacklo_epi16(x, y);
+    weight_w[3] = _mm_unpackhi_epi16(x, y);
+  }
+
+  if (height == 16) {
+    weight_h[0] = _mm_unpacklo_epi8(w16, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+    weight_h[2] = _mm_unpackhi_epi8(w16, zero);
+    weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
+
+    weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
+    weight_w[1] = _mm_unpackhi_epi16(weight_h[0], weight_h[1]);
+    weight_w[2] = _mm_unpacklo_epi16(weight_h[2], weight_h[3]);
+    weight_w[3] = _mm_unpackhi_epi16(weight_h[2], weight_h[3]);
+  }
+
+  if (height == 32) {
+    weight_h[0] = _mm_unpacklo_epi8(w32_0, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+    weight_h[2] = _mm_unpackhi_epi8(w32_0, zero);
+    weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
+
+    __m128i x = _mm_unpacklo_epi8(w16, zero);
+    __m128i y = _mm_sub_epi16(d, x);
+    weight_w[0] = _mm_unpacklo_epi16(x, y);
+    weight_w[1] = _mm_unpackhi_epi16(x, y);
+    x = _mm_unpackhi_epi8(w16, zero);
+    y = _mm_sub_epi16(d, x);
+    weight_w[2] = _mm_unpacklo_epi16(x, y);
+    weight_w[3] = _mm_unpackhi_epi16(x, y);
+
+    weight_h[4] = _mm_unpacklo_epi8(w32_1, zero);
+    weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
+    weight_h[6] = _mm_unpackhi_epi8(w32_1, zero);
+    weight_h[7] = _mm_sub_epi16(d, weight_h[6]);
+  }
+}
+
+static INLINE void smooth_pred_16x8(const __m128i *pixels, const __m128i *wh,
+                                    const __m128i *ww, uint8_t *dst,
+                                    ptrdiff_t stride, int quarter) {
+  __m128i d = _mm_set1_epi16(0x100);
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i inc = _mm_set1_epi16(0x202);
+  const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
+  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  __m128i rep =
+      (quarter % 2 == 0) ? _mm_set1_epi16(0x8000) : _mm_set1_epi16(0x8008);
+  const __m128i left = (quarter < 2) ? pixels[4] : pixels[5];
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
+    const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
+    const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
+    __m128i s0 = _mm_madd_epi16(pixels[0], wh_sc);
+    __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
+    __m128i s2 = _mm_madd_epi16(pixels[2], wh_sc);
+    __m128i s3 = _mm_madd_epi16(pixels[3], wh_sc);
+
+    __m128i b = _mm_shuffle_epi8(left, rep);
+    b = _mm_unpacklo_epi16(b, pixels[6]);
+    __m128i sum0 = _mm_madd_epi16(b, ww[0]);
+    __m128i sum1 = _mm_madd_epi16(b, ww[1]);
+    __m128i sum2 = _mm_madd_epi16(b, ww[2]);
+    __m128i sum3 = _mm_madd_epi16(b, ww[3]);
+
+    s0 = _mm_add_epi32(s0, sum0);
+    s0 = _mm_add_epi32(s0, round);
+    s0 = _mm_srai_epi32(s0, 1 + sm_weight_log2_scale);
+
+    s1 = _mm_add_epi32(s1, sum1);
+    s1 = _mm_add_epi32(s1, round);
+    s1 = _mm_srai_epi32(s1, 1 + sm_weight_log2_scale);
+
+    s2 = _mm_add_epi32(s2, sum2);
+    s2 = _mm_add_epi32(s2, round);
+    s2 = _mm_srai_epi32(s2, 1 + sm_weight_log2_scale);
+
+    s3 = _mm_add_epi32(s3, sum3);
+    s3 = _mm_add_epi32(s3, round);
+    s3 = _mm_srai_epi32(s3, 1 + sm_weight_log2_scale);
+
+    sum0 = _mm_packus_epi16(s0, s1);
+    sum0 = _mm_shuffle_epi8(sum0, gat);
+    sum1 = _mm_packus_epi16(s2, s3);
+    sum1 = _mm_shuffle_epi8(sum1, gat);
+
+    _mm_storel_epi64((__m128i *)dst, sum0);
+    _mm_storel_epi64((__m128i *)(dst + 8), sum1);
+
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+    d = _mm_add_epi16(d, inc);
+  }
+}
+
+void aom_smooth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                     const uint8_t *above,
+                                     const uint8_t *left) {
+  __m128i pixels[7];
+  load_pixel_w16(above, left, 8, pixels);
+
+  __m128i wh[2], ww[4];
+  load_weight_w16(sm_weight_arrays, 8, wh, ww);
+
+  smooth_pred_16x8(pixels, wh, ww, dst, stride, 0);
+}
+
+void aom_smooth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i pixels[7];
+  load_pixel_w16(above, left, 16, pixels);
+
+  __m128i wh[4], ww[4];
+  load_weight_w16(sm_weight_arrays, 16, wh, ww);
+
+  smooth_pred_16x8(pixels, wh, ww, dst, stride, 0);
+  dst += stride << 3;
+  smooth_pred_16x8(pixels, &wh[2], ww, dst, stride, 1);
+}
+
+void aom_smooth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i pixels[7];
+  load_pixel_w16(above, left, 32, pixels);
+
+  __m128i wh[8], ww[4];
+  load_weight_w16(sm_weight_arrays, 32, wh, ww);
+
+  smooth_pred_16x8(pixels, wh, ww, dst, stride, 0);
+  dst += stride << 3;
+  smooth_pred_16x8(pixels, &wh[2], ww, dst, stride, 1);
+  dst += stride << 3;
+  smooth_pred_16x8(pixels, &wh[4], ww, dst, stride, 2);
+  dst += stride << 3;
+  smooth_pred_16x8(pixels, &wh[6], ww, dst, stride, 3);
+}
+
+static INLINE void load_pixel_w32(const uint8_t *above, const uint8_t *left,
+                                  int height, __m128i *pixels) {
+  __m128i ab0 = _mm_load_si128((const __m128i *)above);
+  __m128i ab1 = _mm_load_si128((const __m128i *)(above + 16));
+
+  pixels[10] = _mm_set1_epi16((uint16_t)above[31]);
+  pixels[8] = _mm_load_si128((const __m128i *)left);
+  pixels[9] = _mm_load_si128((const __m128i *)(left + 16));
+
+  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i zero = _mm_setzero_si128();
+
+  __m128i x = _mm_unpacklo_epi8(ab0, zero);
+  pixels[0] = _mm_unpacklo_epi16(x, bp);
+  pixels[1] = _mm_unpackhi_epi16(x, bp);
+
+  x = _mm_unpackhi_epi8(ab0, zero);
+  pixels[2] = _mm_unpacklo_epi16(x, bp);
+  pixels[3] = _mm_unpackhi_epi16(x, bp);
+
+  x = _mm_unpacklo_epi8(ab1, zero);
+  pixels[4] = _mm_unpacklo_epi16(x, bp);
+  pixels[5] = _mm_unpackhi_epi16(x, bp);
+
+  x = _mm_unpackhi_epi8(ab1, zero);
+  pixels[6] = _mm_unpacklo_epi16(x, bp);
+  pixels[7] = _mm_unpackhi_epi16(x, bp);
+}
+
+static INLINE void load_weight_w32(const uint8_t *weight_array, int height,
+                                   __m128i *weight_h, __m128i *weight_w) {
+  const __m128i zero = _mm_setzero_si128();
+  __m128i w16 = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+  __m128i w32_0 = _mm_loadu_si128((const __m128i *)&weight_array[32]);
+  __m128i w32_1 = _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
+  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+
+  if (height == 16) {
+    weight_h[0] = _mm_unpacklo_epi8(w16, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+    weight_h[2] = _mm_unpackhi_epi8(w16, zero);
+    weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
+
+    __m128i x = _mm_unpacklo_epi8(w32_0, zero);
+    __m128i y = _mm_sub_epi16(d, x);
+    weight_w[0] = _mm_unpacklo_epi16(x, y);
+    weight_w[1] = _mm_unpackhi_epi16(x, y);
+
+    x = _mm_unpackhi_epi8(w32_0, zero);
+    y = _mm_sub_epi16(d, x);
+    weight_w[2] = _mm_unpacklo_epi16(x, y);
+    weight_w[3] = _mm_unpackhi_epi16(x, y);
+
+    x = _mm_unpacklo_epi8(w32_1, zero);
+    y = _mm_sub_epi16(d, x);
+    weight_w[4] = _mm_unpacklo_epi16(x, y);
+    weight_w[5] = _mm_unpackhi_epi16(x, y);
+
+    x = _mm_unpackhi_epi8(w32_1, zero);
+    y = _mm_sub_epi16(d, x);
+    weight_w[6] = _mm_unpacklo_epi16(x, y);
+    weight_w[7] = _mm_unpackhi_epi16(x, y);
+  }
+
+  if (height == 32) {
+    weight_h[0] = _mm_unpacklo_epi8(w32_0, zero);
+    weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
+    weight_h[2] = _mm_unpackhi_epi8(w32_0, zero);
+    weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
+
+    weight_h[4] = _mm_unpacklo_epi8(w32_1, zero);
+    weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
+    weight_h[6] = _mm_unpackhi_epi8(w32_1, zero);
+    weight_h[7] = _mm_sub_epi16(d, weight_h[6]);
+
+    weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
+    weight_w[1] = _mm_unpackhi_epi16(weight_h[0], weight_h[1]);
+    weight_w[2] = _mm_unpacklo_epi16(weight_h[2], weight_h[3]);
+    weight_w[3] = _mm_unpackhi_epi16(weight_h[2], weight_h[3]);
+
+    weight_w[4] = _mm_unpacklo_epi16(weight_h[4], weight_h[5]);
+    weight_w[5] = _mm_unpackhi_epi16(weight_h[4], weight_h[5]);
+    weight_w[6] = _mm_unpacklo_epi16(weight_h[6], weight_h[7]);
+    weight_w[7] = _mm_unpackhi_epi16(weight_h[6], weight_h[7]);
+  }
+}
+
+static INLINE void smooth_pred_32x8(const __m128i *pixels, const __m128i *wh,
+                                    const __m128i *ww, uint8_t *dst,
+                                    ptrdiff_t stride, int quarter) {
+  __m128i d = _mm_set1_epi16(0x100);
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i inc = _mm_set1_epi16(0x202);
+  const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
+  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  __m128i rep =
+      (quarter % 2 == 0) ? _mm_set1_epi16(0x8000) : _mm_set1_epi16(0x8008);
+  const __m128i left = (quarter < 2) ? pixels[8] : pixels[9];
+
+  int i;
+  for (i = 0; i < 8; ++i) {
+    const __m128i wg_wg = _mm_shuffle_epi8(wh[0], d);
+    const __m128i sc_sc = _mm_shuffle_epi8(wh[1], d);
+    const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
+
+    int j;
+    __m128i s[8];
+    __m128i b = _mm_shuffle_epi8(left, rep);
+    b = _mm_unpacklo_epi16(b, pixels[10]);
+
+    for (j = 0; j < 8; ++j) {
+      s[j] = _mm_madd_epi16(pixels[j], wh_sc);
+      s[j] = _mm_add_epi32(s[j], _mm_madd_epi16(b, ww[j]));
+      s[j] = _mm_add_epi32(s[j], round);
+      s[j] = _mm_srai_epi32(s[j], 1 + sm_weight_log2_scale);
+    }
+
+    for (j = 0; j < 8; j += 2) {
+      __m128i sum = _mm_packus_epi16(s[j], s[j + 1]);
+      sum = _mm_shuffle_epi8(sum, gat);
+      _mm_storel_epi64((__m128i *)(dst + (j << 2)), sum);
+    }
+    dst += stride;
+    rep = _mm_add_epi16(rep, one);
+    d = _mm_add_epi16(d, inc);
+  }
+}
+
+void aom_smooth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i pixels[11];
+  load_pixel_w32(above, left, 16, pixels);
+
+  __m128i wh[4], ww[8];
+  load_weight_w32(sm_weight_arrays, 16, wh, ww);
+
+  smooth_pred_32x8(pixels, wh, ww, dst, stride, 0);
+  dst += stride << 3;
+  smooth_pred_32x8(pixels, &wh[2], ww, dst, stride, 1);
+}
+
+void aom_smooth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
+                                      const uint8_t *above,
+                                      const uint8_t *left) {
+  __m128i pixels[11];
+  load_pixel_w32(above, left, 32, pixels);
+
+  __m128i wh[8], ww[8];
+  load_weight_w32(sm_weight_arrays, 32, wh, ww);
+
+  smooth_pred_32x8(pixels, &wh[0], ww, dst, stride, 0);
+  dst += stride << 3;
+  smooth_pred_32x8(pixels, &wh[2], ww, dst, stride, 1);
+  dst += stride << 3;
+  smooth_pred_32x8(pixels, &wh[4], ww, dst, stride, 2);
+  dst += stride << 3;
+  smooth_pred_32x8(pixels, &wh[6], ww, dst, stride, 3);
+}
diff --git a/third_party/aom/aom_dsp/x86/inv_txfm_common_avx2.h b/third_party/aom/aom_dsp/x86/inv_txfm_common_avx2.h
index 4238e651b..26c5cfe59 100644
--- a/third_party/aom/aom_dsp/x86/inv_txfm_common_avx2.h
+++ b/third_party/aom/aom_dsp/x86/inv_txfm_common_avx2.h
@@ -18,17 +18,17 @@
 #include "aom_dsp/x86/txfm_common_avx2.h"
 
 static INLINE void load_coeff(const tran_low_t *coeff, __m256i *in) {
-#if CONFIG_HIGHBITDEPTH
-  *in = _mm256_setr_epi16(
-      (int16_t)coeff[0], (int16_t)coeff[1], (int16_t)coeff[2],
-      (int16_t)coeff[3], (int16_t)coeff[4], (int16_t)coeff[5],
-      (int16_t)coeff[6], (int16_t)coeff[7], (int16_t)coeff[8],
-      (int16_t)coeff[9], (int16_t)coeff[10], (int16_t)coeff[11],
-      (int16_t)coeff[12], (int16_t)coeff[13], (int16_t)coeff[14],
-      (int16_t)coeff[15]);
-#else
-  *in = _mm256_loadu_si256((const __m256i *)coeff);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    *in = _mm256_setr_epi16(
+        (int16_t)coeff[0], (int16_t)coeff[1], (int16_t)coeff[2],
+        (int16_t)coeff[3], (int16_t)coeff[4], (int16_t)coeff[5],
+        (int16_t)coeff[6], (int16_t)coeff[7], (int16_t)coeff[8],
+        (int16_t)coeff[9], (int16_t)coeff[10], (int16_t)coeff[11],
+        (int16_t)coeff[12], (int16_t)coeff[13], (int16_t)coeff[14],
+        (int16_t)coeff[15]);
+  } else {
+    *in = _mm256_loadu_si256((const __m256i *)coeff);
+  }
 }
 
 static INLINE void load_buffer_16x16(const tran_low_t *coeff, __m256i *in) {
diff --git a/third_party/aom/aom_dsp/x86/inv_txfm_sse2.h b/third_party/aom/aom_dsp/x86/inv_txfm_sse2.h
index 95d246c3c..342816977 100644
--- a/third_party/aom/aom_dsp/x86/inv_txfm_sse2.h
+++ b/third_party/aom/aom_dsp/x86/inv_txfm_sse2.h
@@ -133,12 +133,12 @@ static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
 // Function to allow 8 bit optimisations to be used when profile 0 is used with
 // highbitdepth enabled
 static INLINE __m128i load_input_data(const tran_low_t *data) {
-#if CONFIG_HIGHBITDEPTH
-  return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5],
-                        data[6], data[7]);
-#else
-  return _mm_load_si128((const __m128i *)data);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5],
+                          data[6], data[7]);
+  } else {
+    return _mm_load_si128((const __m128i *)data);
+  }
 }
 
 static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) {
diff --git a/third_party/aom/aom_dsp/x86/loopfilter_sse2.c b/third_party/aom/aom_dsp/x86/loopfilter_sse2.c
index 7e134dc63..8343dbbed 100644
--- a/third_party/aom/aom_dsp/x86/loopfilter_sse2.c
+++ b/third_party/aom/aom_dsp/x86/loopfilter_sse2.c
@@ -178,10 +178,20 @@ void aom_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */,
 #endif  // !CONFIG_PARALLEL_DEBLOCKING
   FILTER4;
 
+#if CONFIG_PARALLEL_DEBLOCKING
+  *(int32_t *)(s - 1 * p) = _mm_cvtsi128_si32(ps1ps0);
+  ps1ps0 = _mm_srli_si128(ps1ps0, 8);
+  *(int32_t *)(s - 2 * p) = _mm_cvtsi128_si32(ps1ps0);
+
+  *(int32_t *)(s + 0 * p) = _mm_cvtsi128_si32(qs1qs0);
+  qs1qs0 = _mm_srli_si128(qs1qs0, 8);
+  *(int32_t *)(s + 1 * p) = _mm_cvtsi128_si32(qs1qs0);
+#else
   _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0));  // *op1
   _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0);               // *op0
   _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0);               // *oq0
   _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0));  // *oq1
+#endif
 }
 
 void aom_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
@@ -267,8 +277,10 @@ void aom_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
   x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0);
   // 00 20 01 21 02 22 03 23  04 24 05 25 06 26 07 27
   ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0);
+#if !CONFIG_PARALLEL_DEBLOCKING
   // 04 14 24 34 05 15 25 35  06 16 26 36 07 17 27 37
   qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0);
+#endif
   // 00 10 20 30 01 11 21 31  02 12 22 32 03 13 23 33
   ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0);
 
@@ -279,7 +291,7 @@ void aom_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
   *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
   ps1ps0 = _mm_srli_si128(ps1ps0, 4);
   *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
-
+#if !CONFIG_PARALLEL_DEBLOCKING
   *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
   qs1qs0 = _mm_srli_si128(qs1qs0, 4);
   *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
@@ -287,6 +299,19 @@ void aom_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
   *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
   qs1qs0 = _mm_srli_si128(qs1qs0, 4);
   *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+#endif
+}
+
+static INLINE void store_buffer_horz_8(const __m128i *x, int p, int num,
+                                       uint8_t *s) {
+#if CONFIG_PARALLEL_DEBLOCKING
+  *(int32_t *)(s - (num + 1) * p) = _mm_cvtsi128_si32(*x);
+  const __m128i hi = _mm_srli_si128(*x, 8);
+  *(int32_t *)(s + num * p) = _mm_cvtsi128_si32(hi);
+#else
+  _mm_storel_epi64((__m128i *)(s - (num + 1) * p), *x);
+  _mm_storeh_pi((__m64 *)(s + num * p), _mm_castsi128_ps(*x));
+#endif
 }
 
 void aom_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
@@ -580,44 +605,37 @@ void aom_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
     q6p6 = _mm_andnot_si128(flat2, q6p6);
     flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6);
     q6p6 = _mm_or_si128(q6p6, flat2_q6p6);
-    _mm_storel_epi64((__m128i *)(s - 7 * p), q6p6);
-    _mm_storeh_pi((__m64 *)(s + 6 * p), _mm_castsi128_ps(q6p6));
+    store_buffer_horz_8(&q6p6, p, 6, s);
 
     q5p5 = _mm_andnot_si128(flat2, q5p5);
     flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5);
     q5p5 = _mm_or_si128(q5p5, flat2_q5p5);
-    _mm_storel_epi64((__m128i *)(s - 6 * p), q5p5);
-    _mm_storeh_pi((__m64 *)(s + 5 * p), _mm_castsi128_ps(q5p5));
+    store_buffer_horz_8(&q5p5, p, 5, s);
 
     q4p4 = _mm_andnot_si128(flat2, q4p4);
     flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4);
     q4p4 = _mm_or_si128(q4p4, flat2_q4p4);
-    _mm_storel_epi64((__m128i *)(s - 5 * p), q4p4);
-    _mm_storeh_pi((__m64 *)(s + 4 * p), _mm_castsi128_ps(q4p4));
+    store_buffer_horz_8(&q4p4, p, 4, s);
 
     q3p3 = _mm_andnot_si128(flat2, q3p3);
     flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3);
     q3p3 = _mm_or_si128(q3p3, flat2_q3p3);
-    _mm_storel_epi64((__m128i *)(s - 4 * p), q3p3);
-    _mm_storeh_pi((__m64 *)(s + 3 * p), _mm_castsi128_ps(q3p3));
+    store_buffer_horz_8(&q3p3, p, 3, s);
 
     q2p2 = _mm_andnot_si128(flat2, q2p2);
     flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2);
     q2p2 = _mm_or_si128(q2p2, flat2_q2p2);
-    _mm_storel_epi64((__m128i *)(s - 3 * p), q2p2);
-    _mm_storeh_pi((__m64 *)(s + 2 * p), _mm_castsi128_ps(q2p2));
+    store_buffer_horz_8(&q2p2, p, 2, s);
 
     q1p1 = _mm_andnot_si128(flat2, q1p1);
     flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1);
     q1p1 = _mm_or_si128(q1p1, flat2_q1p1);
-    _mm_storel_epi64((__m128i *)(s - 2 * p), q1p1);
-    _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(q1p1));
+    store_buffer_horz_8(&q1p1, p, 1, s);
 
     q0p0 = _mm_andnot_si128(flat2, q0p0);
     flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0);
     q0p0 = _mm_or_si128(q0p0, flat2_q0p0);
-    _mm_storel_epi64((__m128i *)(s - 1 * p), q0p0);
-    _mm_storeh_pi((__m64 *)(s - 0 * p), _mm_castsi128_ps(q0p0));
+    store_buffer_horz_8(&q0p0, p, 0, s);
   }
 }
 
@@ -651,10 +669,33 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
   return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
 }
 
-void aom_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
-                                     const unsigned char *_blimit,
-                                     const unsigned char *_limit,
-                                     const unsigned char *_thresh) {
+typedef enum { FOUR_PIXELS, EIGHT_PIXELS, SIXTEEN_PIXELS } PixelOutput;
+
+static INLINE void store_buffer_horz_16(PixelOutput pixel_num, const __m128i *x,
+                                        int p, int offset, uint8_t *s) {
+  int i;
+  if (pixel_num == FOUR_PIXELS) {
+    for (i = 13; i >= 0; i--) {
+      *(int32_t *)(s - (i - offset) * p) = _mm_cvtsi128_si32(x[i]);
+    }
+  }
+  if (pixel_num == EIGHT_PIXELS) {
+    for (i = 13; i >= 0; i--) {
+      _mm_storel_epi64((__m128i *)(s - (i - offset) * p), x[i]);
+    }
+  }
+  if (pixel_num == SIXTEEN_PIXELS) {
+    for (i = 13; i >= 0; i--) {
+      _mm_storeu_si128((__m128i *)(s - (i - offset) * p), x[i]);
+    }
+  }
+}
+
+static INLINE void lpf_horz_edge_16_internal(PixelOutput pixel_num,
+                                             unsigned char *s, int p,
+                                             const unsigned char *_blimit,
+                                             const unsigned char *_limit,
+                                             const unsigned char *_thresh) {
   const __m128i zero = _mm_set1_epi16(0);
   const __m128i one = _mm_set1_epi8(1);
   const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -910,73 +951,62 @@ void aom_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
       f_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f_hi);
       f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi);
 
-      p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 7 * p), p6);
+      __m128i x[14];
+      x[13] = filter16_mask(&flat2, &p6, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi);
-      p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 6 * p), p5);
+      x[12] = filter16_mask(&flat2, &p5, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi);
-      p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 5 * p), p4);
+      x[11] = filter16_mask(&flat2, &p4, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi);
-      p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 4 * p), p3);
+      x[10] = filter16_mask(&flat2, &p3, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi);
-      op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 3 * p), op2);
+      x[9] = filter16_mask(&flat2, &op2, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi);
-      op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 2 * p), op1);
+      x[8] = filter16_mask(&flat2, &op1, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi);
-      op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 1 * p), op0);
+      x[7] = filter16_mask(&flat2, &op0, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi);
-      oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s - 0 * p), oq0);
+      x[6] = filter16_mask(&flat2, &oq0, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi);
-      oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 1 * p), oq1);
+      x[5] = filter16_mask(&flat2, &oq1, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi);
-      oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 2 * p), oq2);
+      x[4] = filter16_mask(&flat2, &oq2, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi);
-      q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 3 * p), q3);
+      x[3] = filter16_mask(&flat2, &q3, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi);
-      q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 4 * p), q4);
+      x[2] = filter16_mask(&flat2, &q4, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi);
-      q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 5 * p), q5);
+      x[1] = filter16_mask(&flat2, &q5, &f_lo, &f_hi);
 
       f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo);
       f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi);
-      q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi);
-      _mm_storeu_si128((__m128i *)(s + 6 * p), q6);
+      x[0] = filter16_mask(&flat2, &q6, &f_lo, &f_hi);
+
+      store_buffer_horz_16(pixel_num, x, p, 6, s);
     }
     // wide flat
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1186,15 +1216,35 @@ void aom_lpf_horizontal_8_sse2(unsigned char *s, int p,
     p2 = _mm_and_si128(flat, p2);
     p2 = _mm_or_si128(work_a, p2);
 
+#if CONFIG_PARALLEL_DEBLOCKING
+    *(int32_t *)(s - 3 * p) = _mm_cvtsi128_si32(p2);
+    *(int32_t *)(s - 2 * p) = _mm_cvtsi128_si32(p1);
+    *(int32_t *)(s - 1 * p) = _mm_cvtsi128_si32(p0);
+    *(int32_t *)(s + 0 * p) = _mm_cvtsi128_si32(q0);
+    *(int32_t *)(s + 1 * p) = _mm_cvtsi128_si32(q1);
+    *(int32_t *)(s + 2 * p) = _mm_cvtsi128_si32(q2);
+#else
     _mm_storel_epi64((__m128i *)(s - 3 * p), p2);
     _mm_storel_epi64((__m128i *)(s - 2 * p), p1);
     _mm_storel_epi64((__m128i *)(s - 1 * p), p0);
     _mm_storel_epi64((__m128i *)(s + 0 * p), q0);
     _mm_storel_epi64((__m128i *)(s + 1 * p), q1);
     _mm_storel_epi64((__m128i *)(s + 2 * p), q2);
+#endif
   }
 }
 
+void aom_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
+                                     const unsigned char *_blimit,
+                                     const unsigned char *_limit,
+                                     const unsigned char *_thresh) {
+#if CONFIG_PARALLEL_DEBLOCKING
+  lpf_horz_edge_16_internal(FOUR_PIXELS, s, p, _blimit, _limit, _thresh);
+#else
+  lpf_horz_edge_16_internal(SIXTEEN_PIXELS, s, p, _blimit, _limit, _thresh);
+#endif
+}
+
 void aom_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, const uint8_t *_blimit0,
                                     const uint8_t *_limit0,
                                     const uint8_t *_thresh0,
diff --git a/third_party/aom/aom_dsp/x86/lpf_common_sse2.h b/third_party/aom/aom_dsp/x86/lpf_common_sse2.h
new file mode 100644
index 000000000..027c890dc
--- /dev/null
+++ b/third_party/aom/aom_dsp/x86/lpf_common_sse2.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef _AOM_DSP_X86_LPF_COMMON_X86_H
+#define _AOM_DSP_X86_LPF_COMMON_X86_H
+
+#include <emmintrin.h>  // SSE2
+
+#include "./aom_config.h"
+
+static INLINE void highbd_transpose(uint16_t *src[], int in_p, uint16_t *dst[],
+                                    int out_p, int num_8x8_to_transpose) {
+  int idx8x8 = 0;
+  __m128i p0, p1, p2, p3, p4, p5, p6, p7, x0, x1, x2, x3, x4, x5, x6, x7;
+  do {
+    uint16_t *in = src[idx8x8];
+    uint16_t *out = dst[idx8x8];
+
+    p0 =
+        _mm_loadu_si128((__m128i *)(in + 0 * in_p));  // 00 01 02 03 04 05 06 07
+    p1 =
+        _mm_loadu_si128((__m128i *)(in + 1 * in_p));  // 10 11 12 13 14 15 16 17
+    p2 =
+        _mm_loadu_si128((__m128i *)(in + 2 * in_p));  // 20 21 22 23 24 25 26 27
+    p3 =
+        _mm_loadu_si128((__m128i *)(in + 3 * in_p));  // 30 31 32 33 34 35 36 37
+    p4 =
+        _mm_loadu_si128((__m128i *)(in + 4 * in_p));  // 40 41 42 43 44 45 46 47
+    p5 =
+        _mm_loadu_si128((__m128i *)(in + 5 * in_p));  // 50 51 52 53 54 55 56 57
+    p6 =
+        _mm_loadu_si128((__m128i *)(in + 6 * in_p));  // 60 61 62 63 64 65 66 67
+    p7 =
+        _mm_loadu_si128((__m128i *)(in + 7 * in_p));  // 70 71 72 73 74 75 76 77
+    // 00 10 01 11 02 12 03 13
+    x0 = _mm_unpacklo_epi16(p0, p1);
+    // 20 30 21 31 22 32 23 33
+    x1 = _mm_unpacklo_epi16(p2, p3);
+    // 40 50 41 51 42 52 43 53
+    x2 = _mm_unpacklo_epi16(p4, p5);
+    // 60 70 61 71 62 72 63 73
+    x3 = _mm_unpacklo_epi16(p6, p7);
+    // 00 10 20 30 01 11 21 31
+    x4 = _mm_unpacklo_epi32(x0, x1);
+    // 40 50 60 70 41 51 61 71
+    x5 = _mm_unpacklo_epi32(x2, x3);
+    // 00 10 20 30 40 50 60 70
+    x6 = _mm_unpacklo_epi64(x4, x5);
+    // 01 11 21 31 41 51 61 71
+    x7 = _mm_unpackhi_epi64(x4, x5);
+
+    _mm_storeu_si128((__m128i *)(out + 0 * out_p), x6);
+    // 00 10 20 30 40 50 60 70
+    _mm_storeu_si128((__m128i *)(out + 1 * out_p), x7);
+    // 01 11 21 31 41 51 61 71
+
+    // 02 12 22 32 03 13 23 33
+    x4 = _mm_unpackhi_epi32(x0, x1);
+    // 42 52 62 72 43 53 63 73
+    x5 = _mm_unpackhi_epi32(x2, x3);
+    // 02 12 22 32 42 52 62 72
+    x6 = _mm_unpacklo_epi64(x4, x5);
+    // 03 13 23 33 43 53 63 73
+    x7 = _mm_unpackhi_epi64(x4, x5);
+
+    _mm_storeu_si128((__m128i *)(out + 2 * out_p), x6);
+    // 02 12 22 32 42 52 62 72
+    _mm_storeu_si128((__m128i *)(out + 3 * out_p), x7);
+    // 03 13 23 33 43 53 63 73
+
+    // 04 14 05 15 06 16 07 17
+    x0 = _mm_unpackhi_epi16(p0, p1);
+    // 24 34 25 35 26 36 27 37
+    x1 = _mm_unpackhi_epi16(p2, p3);
+    // 44 54 45 55 46 56 47 57
+    x2 = _mm_unpackhi_epi16(p4, p5);
+    // 64 74 65 75 66 76 67 77
+    x3 = _mm_unpackhi_epi16(p6, p7);
+    // 04 14 24 34 05 15 25 35
+    x4 = _mm_unpacklo_epi32(x0, x1);
+    // 44 54 64 74 45 55 65 75
+    x5 = _mm_unpacklo_epi32(x2, x3);
+    // 04 14 24 34 44 54 64 74
+    x6 = _mm_unpacklo_epi64(x4, x5);
+    // 05 15 25 35 45 55 65 75
+    x7 = _mm_unpackhi_epi64(x4, x5);
+
+    _mm_storeu_si128((__m128i *)(out + 4 * out_p), x6);
+    // 04 14 24 34 44 54 64 74
+    _mm_storeu_si128((__m128i *)(out + 5 * out_p), x7);
+    // 05 15 25 35 45 55 65 75
+
+    // 06 16 26 36 07 17 27 37
+    x4 = _mm_unpackhi_epi32(x0, x1);
+    // 46 56 66 76 47 57 67 77
+    x5 = _mm_unpackhi_epi32(x2, x3);
+    // 06 16 26 36 46 56 66 76
+    x6 = _mm_unpacklo_epi64(x4, x5);
+    // 07 17 27 37 47 57 67 77
+    x7 = _mm_unpackhi_epi64(x4, x5);
+
+    _mm_storeu_si128((__m128i *)(out + 6 * out_p), x6);
+    // 06 16 26 36 46 56 66 76
+    _mm_storeu_si128((__m128i *)(out + 7 * out_p), x7);
+    // 07 17 27 37 47 57 67 77
+  } while (++idx8x8 < num_8x8_to_transpose);
+}
+
+static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, int in_p,
+                                        uint16_t *out, int out_p) {
+  uint16_t *src0[1];
+  uint16_t *src1[1];
+  uint16_t *dest0[1];
+  uint16_t *dest1[1];
+  src0[0] = in0;
+  src1[0] = in1;
+  dest0[0] = out;
+  dest1[0] = out + 8;
+  highbd_transpose(src0, in_p, dest0, out_p, 1);
+  highbd_transpose(src1, in_p, dest1, out_p, 1);
+}
+#endif  // _AOM_DSP_X86_LPF_COMMON_X86_H
diff --git a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c
index 6a73ac460..2536f91d2 100644
--- a/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.c
@@ -98,7 +98,13 @@ MASKSAD4XN_SSSE3(16)
 MASKSADMXN_SSSE3(16, 4)
 MASKSAD8XN_SSSE3(32)
 MASKSADMXN_SSSE3(32, 8)
-#endif
+MASKSADMXN_SSSE3(16, 64)
+MASKSADMXN_SSSE3(64, 16)
+#if CONFIG_EXT_PARTITION
+MASKSADMXN_SSSE3(32, 128)
+MASKSADMXN_SSSE3(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
                                             int src_stride,
@@ -294,7 +300,13 @@ HIGHBD_MASKSAD4XN_SSSE3(16)
 HIGHBD_MASKSADMXN_SSSE3(16, 4)
 HIGHBD_MASKSADMXN_SSSE3(8, 32)
 HIGHBD_MASKSADMXN_SSSE3(32, 8)
-#endif
+HIGHBD_MASKSADMXN_SSSE3(16, 64)
+HIGHBD_MASKSADMXN_SSSE3(64, 16)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASKSADMXN_SSSE3(32, 128)
+HIGHBD_MASKSADMXN_SSSE3(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 static INLINE unsigned int highbd_masked_sad_ssse3(
     const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
diff --git a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
index 24e7ed1c6..3ffe132be 100644
--- a/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/third_party/aom/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -131,7 +131,13 @@ MASK_SUBPIX_VAR4XH_SSSE3(16)
 MASK_SUBPIX_VAR_SSSE3(16, 4)
 MASK_SUBPIX_VAR8XH_SSSE3(32)
 MASK_SUBPIX_VAR_SSSE3(32, 8)
-#endif
+MASK_SUBPIX_VAR_SSSE3(64, 16)
+MASK_SUBPIX_VAR_SSSE3(16, 64)
+#if CONFIG_EXT_PARTITION
+MASK_SUBPIX_VAR_SSSE3(128, 32)
+MASK_SUBPIX_VAR_SSSE3(32, 128)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 static INLINE __m128i filter_block(const __m128i a, const __m128i b,
                                    const __m128i filter) {
@@ -712,6 +718,12 @@ HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16)
 HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4)
 HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32)
 HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 64)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 16)
+#if CONFIG_EXT_PARTITION
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 128)
+HIGHBD_MASK_SUBPIX_VAR_SSSE3(128, 32)
+#endif
 #endif
 
 static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b,
diff --git a/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c b/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c
index 3fd6f71e5..52dd508ec 100644
--- a/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c
+++ b/third_party/aom/aom_dsp/x86/obmc_sad_sse4.c
@@ -142,6 +142,8 @@ OBMCSADWXH(4, 16)
 OBMCSADWXH(16, 4)
 OBMCSADWXH(8, 32)
 OBMCSADWXH(32, 8)
+OBMCSADWXH(16, 64)
+OBMCSADWXH(64, 16)
 #endif
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -271,5 +273,7 @@ HBD_OBMCSADWXH(4, 16)
 HBD_OBMCSADWXH(16, 4)
 HBD_OBMCSADWXH(8, 32)
 HBD_OBMCSADWXH(32, 8)
+HBD_OBMCSADWXH(16, 64)
+HBD_OBMCSADWXH(64, 16)
 #endif
 #endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
index 44cfa8e28..392616af3 100644
--- a/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
+++ b/third_party/aom/aom_dsp/x86/obmc_variance_sse4.c
@@ -151,7 +151,13 @@ OBMCVARWXH(4, 16)
 OBMCVARWXH(16, 4)
 OBMCVARWXH(8, 32)
 OBMCVARWXH(32, 8)
-#endif
+OBMCVARWXH(16, 64)
+OBMCVARWXH(64, 16)
+#if CONFIG_EXT_PARTITION
+OBMCVARWXH(32, 128)
+OBMCVARWXH(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 ////////////////////////////////////////////////////////////////////////////////
 // High bit-depth
@@ -364,5 +370,11 @@ HBD_OBMCVARWXH(4, 16)
 HBD_OBMCVARWXH(16, 4)
 HBD_OBMCVARWXH(8, 32)
 HBD_OBMCVARWXH(32, 8)
-#endif
+HBD_OBMCVARWXH(16, 64)
+HBD_OBMCVARWXH(64, 16)
+#if CONFIG_EXT_PARTITION
+HBD_OBMCVARWXH(32, 128)
+HBD_OBMCVARWXH(128, 32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/aom_dsp/x86/quantize_sse2.c b/third_party/aom/aom_dsp/x86/quantize_sse2.c
index 890c1f01e..0e7f679d0 100644
--- a/third_party/aom/aom_dsp/x86/quantize_sse2.c
+++ b/third_party/aom/aom_dsp/x86/quantize_sse2.c
@@ -16,29 +16,29 @@
 #include "aom/aom_integer.h"
 
 static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
-#if CONFIG_HIGHBITDEPTH
-  return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
-                        (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
-                        (int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
-                        (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
-#else
-  return _mm_load_si128((const __m128i *)coeff_ptr);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
+                          (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
+                          (int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
+                          (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
+  } else {
+    return _mm_load_si128((const __m128i *)coeff_ptr);
+  }
 }
 
 static INLINE void store_coefficients(__m128i coeff_vals,
                                       tran_low_t *coeff_ptr) {
-#if CONFIG_HIGHBITDEPTH
-  __m128i one = _mm_set1_epi16(1);
-  __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
-  __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
-  __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
-  __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
-  _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
-  _mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
-#else
-  _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    __m128i one = _mm_set1_epi16(1);
+    __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
+    __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
+    __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
+    __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
+    _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
+    _mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
+  } else {
+    _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals);
+  }
 }
 
 void aom_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
diff --git a/third_party/aom/aom_dsp/x86/sad4d_sse2.asm b/third_party/aom/aom_dsp/x86/sad4d_sse2.asm
index 4570e2ce6..2c67f450f 100644
--- a/third_party/aom/aom_dsp/x86/sad4d_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/sad4d_sse2.asm
@@ -256,4 +256,6 @@ SADNXN4D  4, 16
 SADNXN4D 16,  4
 SADNXN4D  8, 32
 SADNXN4D 32,  8
+SADNXN4D 16, 64
+SADNXN4D 64, 16
 %endif
diff --git a/third_party/aom/aom_dsp/x86/sad_sse2.asm b/third_party/aom/aom_dsp/x86/sad_sse2.asm
index 88d427077..b4cc6abf1 100644
--- a/third_party/aom/aom_dsp/x86/sad_sse2.asm
+++ b/third_party/aom/aom_dsp/x86/sad_sse2.asm
@@ -163,6 +163,10 @@ SAD64XN 64 ; sad64x64_sse2
 SAD64XN 32 ; sad64x32_sse2
 SAD64XN 64, 1 ; sad64x64_avg_sse2
 SAD64XN 32, 1 ; sad64x32_avg_sse2
+%if CONFIG_EXT_PARTITION_TYPES
+SAD64XN 16 ; sad64x16_sse2
+SAD64XN 16, 1 ; sad64x16_avg_sse2
+%endif
 
 ; unsigned int aom_sad32x32_sse2(uint8_t *src, int src_stride,
 ;                                uint8_t *ref, int ref_stride);
@@ -261,6 +265,8 @@ SAD16XN  8, 1 ; sad16x8_avg_sse2
 %if CONFIG_EXT_PARTITION_TYPES
 SAD16XN 4 ; sad_16x4_sse2
 SAD16XN 4, 1 ; sad_16x4_avg_sse2
+SAD16XN 64 ; sad_16x64_sse2
+SAD16XN 64, 1 ; sad_16x64_avg_sse2
 %endif
 
 ; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
diff --git a/third_party/aom/aom_dsp/x86/txfm_common_avx2.h b/third_party/aom/aom_dsp/x86/txfm_common_avx2.h
index 4f7a60c22..1a8fed710 100644
--- a/third_party/aom/aom_dsp/x86/txfm_common_avx2.h
+++ b/third_party/aom/aom_dsp/x86/txfm_common_avx2.h
@@ -15,6 +15,7 @@
 #include <immintrin.h>
 
 #include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/common_avx2.h"
 
 #define pair256_set_epi16(a, b)                                            \
   _mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \
@@ -34,135 +35,6 @@ static INLINE void mm256_reverse_epi16(__m256i *u) {
   *u = _mm256_permute2x128_si256(v, v, 1);
 }
 
-// Note: in and out could have the same value
-static INLINE void mm256_transpose_16x16(const __m256i *in, __m256i *out) {
-  __m256i tr0_0 = _mm256_unpacklo_epi16(in[0], in[1]);
-  __m256i tr0_1 = _mm256_unpackhi_epi16(in[0], in[1]);
-  __m256i tr0_2 = _mm256_unpacklo_epi16(in[2], in[3]);
-  __m256i tr0_3 = _mm256_unpackhi_epi16(in[2], in[3]);
-  __m256i tr0_4 = _mm256_unpacklo_epi16(in[4], in[5]);
-  __m256i tr0_5 = _mm256_unpackhi_epi16(in[4], in[5]);
-  __m256i tr0_6 = _mm256_unpacklo_epi16(in[6], in[7]);
-  __m256i tr0_7 = _mm256_unpackhi_epi16(in[6], in[7]);
-
-  __m256i tr0_8 = _mm256_unpacklo_epi16(in[8], in[9]);
-  __m256i tr0_9 = _mm256_unpackhi_epi16(in[8], in[9]);
-  __m256i tr0_a = _mm256_unpacklo_epi16(in[10], in[11]);
-  __m256i tr0_b = _mm256_unpackhi_epi16(in[10], in[11]);
-  __m256i tr0_c = _mm256_unpacklo_epi16(in[12], in[13]);
-  __m256i tr0_d = _mm256_unpackhi_epi16(in[12], in[13]);
-  __m256i tr0_e = _mm256_unpacklo_epi16(in[14], in[15]);
-  __m256i tr0_f = _mm256_unpackhi_epi16(in[14], in[15]);
-
-  // 00 10 01 11 02 12 03 13  08 18 09 19 0a 1a 0b 1b
-  // 04 14 05 15 06 16 07 17  0c 1c 0d 1d 0e 1e 0f 1f
-  // 20 30 21 31 22 32 23 33  28 38 29 39 2a 3a 2b 3b
-  // 24 34 25 35 26 36 27 37  2c 3c 2d 3d 2e 3e 2f 3f
-  // 40 50 41 51 42 52 43 53  48 58 49 59 4a 5a 4b 5b
-  // 44 54 45 55 46 56 47 57  4c 5c 4d 5d 4e 5e 4f 5f
-  // 60 70 61 71 62 72 63 73  68 78 69 79 6a 7a 6b 7b
-  // 64 74 65 75 66 76 67 77  6c 7c 6d 7d 6e 7e 6f 7f
-
-  // 80 90 81 91 82 92 83 93  88 98 89 99 8a 9a 8b 9b
-  // 84 94 85 95 86 96 87 97  8c 9c 8d 9d 8e 9e 8f 9f
-  // a0 b0 a1 b1 a2 b2 a3 b3  a8 b8 a9 b9 aa ba ab bb
-  // a4 b4 a5 b5 a6 b6 a7 b7  ac bc ad bd ae be af bf
-  // c0 d0 c1 d1 c2 d2 c3 d3  c8 d8 c9 d9 ca da cb db
-  // c4 d4 c5 d5 c6 d6 c7 d7  cc dc cd dd ce de cf df
-  // e0 f0 e1 f1 e2 f2 e3 f3  e8 f8 e9 f9 ea fa eb fb
-  // e4 f4 e5 f5 e6 f6 e7 f7  ec fc ed fd ee fe ef ff
-
-  __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_2);
-  __m256i tr1_1 = _mm256_unpackhi_epi32(tr0_0, tr0_2);
-  __m256i tr1_2 = _mm256_unpacklo_epi32(tr0_1, tr0_3);
-  __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_1, tr0_3);
-  __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_4, tr0_6);
-  __m256i tr1_5 = _mm256_unpackhi_epi32(tr0_4, tr0_6);
-  __m256i tr1_6 = _mm256_unpacklo_epi32(tr0_5, tr0_7);
-  __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_5, tr0_7);
-
-  __m256i tr1_8 = _mm256_unpacklo_epi32(tr0_8, tr0_a);
-  __m256i tr1_9 = _mm256_unpackhi_epi32(tr0_8, tr0_a);
-  __m256i tr1_a = _mm256_unpacklo_epi32(tr0_9, tr0_b);
-  __m256i tr1_b = _mm256_unpackhi_epi32(tr0_9, tr0_b);
-  __m256i tr1_c = _mm256_unpacklo_epi32(tr0_c, tr0_e);
-  __m256i tr1_d = _mm256_unpackhi_epi32(tr0_c, tr0_e);
-  __m256i tr1_e = _mm256_unpacklo_epi32(tr0_d, tr0_f);
-  __m256i tr1_f = _mm256_unpackhi_epi32(tr0_d, tr0_f);
-
-  // 00 10 20 30 01 11 21 31  08 18 28 38 09 19 29 39
-  // 02 12 22 32 03 13 23 33  0a 1a 2a 3a 0b 1b 2b 3b
-  // 04 14 24 34 05 15 25 35  0c 1c 2c 3c 0d 1d 2d 3d
-  // 06 16 26 36 07 17 27 37  0e 1e 2e 3e 0f 1f 2f 3f
-  // 40 50 60 70 41 51 61 71  48 58 68 78 49 59 69 79
-  // 42 52 62 72 43 53 63 73  4a 5a 6a 7a 4b 5b 6b 7b
-  // 44 54 64 74 45 55 65 75  4c 5c 6c 7c 4d 5d 6d 7d
-  // 46 56 66 76 47 57 67 77  4e 5e 6e 7e 4f 5f 6f 7f
-
-  // 80 90 a0 b0 81 91 a1 b1  88 98 a8 b8 89 99 a9 b9
-  // 82 92 a2 b2 83 93 a3 b3  8a 9a aa ba 8b 9b ab bb
-  // 84 94 a4 b4 85 95 a5 b5  8c 9c ac bc 8d 9d ad bd
-  // 86 96 a6 b6 87 97 a7 b7  8e ae 9e be 8f 9f af bf
-  // c0 d0 e0 f0 c1 d1 e1 f1  c8 d8 e8 f8 c9 d9 e9 f9
-  // c2 d2 e2 f2 c3 d3 e3 f3  ca da ea fa cb db eb fb
-  // c4 d4 e4 f4 c5 d5 e5 f5  cc dc ef fc cd dd ed fd
-  // c6 d6 e6 f6 c7 d7 e7 f7  ce de ee fe cf df ef ff
-
-  tr0_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4);
-  tr0_1 = _mm256_unpackhi_epi64(tr1_0, tr1_4);
-  tr0_2 = _mm256_unpacklo_epi64(tr1_1, tr1_5);
-  tr0_3 = _mm256_unpackhi_epi64(tr1_1, tr1_5);
-  tr0_4 = _mm256_unpacklo_epi64(tr1_2, tr1_6);
-  tr0_5 = _mm256_unpackhi_epi64(tr1_2, tr1_6);
-  tr0_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7);
-  tr0_7 = _mm256_unpackhi_epi64(tr1_3, tr1_7);
-
-  tr0_8 = _mm256_unpacklo_epi64(tr1_8, tr1_c);
-  tr0_9 = _mm256_unpackhi_epi64(tr1_8, tr1_c);
-  tr0_a = _mm256_unpacklo_epi64(tr1_9, tr1_d);
-  tr0_b = _mm256_unpackhi_epi64(tr1_9, tr1_d);
-  tr0_c = _mm256_unpacklo_epi64(tr1_a, tr1_e);
-  tr0_d = _mm256_unpackhi_epi64(tr1_a, tr1_e);
-  tr0_e = _mm256_unpacklo_epi64(tr1_b, tr1_f);
-  tr0_f = _mm256_unpackhi_epi64(tr1_b, tr1_f);
-
-  // 00 10 20 30 40 50 60 70  08 18 28 38 48 58 68 78
-  // 01 11 21 31 41 51 61 71  09 19 29 39 49 59 69 79
-  // 02 12 22 32 42 52 62 72  0a 1a 2a 3a 4a 5a 6a 7a
-  // 03 13 23 33 43 53 63 73  0b 1b 2b 3b 4b 5b 6b 7b
-  // 04 14 24 34 44 54 64 74  0c 1c 2c 3c 4c 5c 6c 7c
-  // 05 15 25 35 45 55 65 75  0d 1d 2d 3d 4d 5d 6d 7d
-  // 06 16 26 36 46 56 66 76  0e 1e 2e 3e 4e 5e 6e 7e
-  // 07 17 27 37 47 57 67 77  0f 1f 2f 3f 4f 5f 6f 7f
-
-  // 80 90 a0 b0 c0 d0 e0 f0  88 98 a8 b8 c8 d8 e8 f8
-  // 81 91 a1 b1 c1 d1 e1 f1  89 99 a9 b9 c9 d9 e9 f9
-  // 82 92 a2 b2 c2 d2 e2 f2  8a 9a aa ba ca da ea fa
-  // 83 93 a3 b3 c3 d3 e3 f3  8b 9b ab bb cb db eb fb
-  // 84 94 a4 b4 c4 d4 e4 f4  8c 9c ac bc cc dc ef fc
-  // 85 95 a5 b5 c5 d5 e5 f5  8d 9d ad bd cd dd ed fd
-  // 86 96 a6 b6 c6 d6 e6 f6  8e ae 9e be ce de ee fe
-  // 87 97 a7 b7 c7 d7 e7 f7  8f 9f af bf cf df ef ff
-
-  out[0] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x20);  // 0010 0000
-  out[8] = _mm256_permute2x128_si256(tr0_0, tr0_8, 0x31);  // 0011 0001
-  out[1] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x20);
-  out[9] = _mm256_permute2x128_si256(tr0_1, tr0_9, 0x31);
-  out[2] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x20);
-  out[10] = _mm256_permute2x128_si256(tr0_2, tr0_a, 0x31);
-  out[3] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x20);
-  out[11] = _mm256_permute2x128_si256(tr0_3, tr0_b, 0x31);
-
-  out[4] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x20);
-  out[12] = _mm256_permute2x128_si256(tr0_4, tr0_c, 0x31);
-  out[5] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x20);
-  out[13] = _mm256_permute2x128_si256(tr0_5, tr0_d, 0x31);
-  out[6] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x20);
-  out[14] = _mm256_permute2x128_si256(tr0_6, tr0_e, 0x31);
-  out[7] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x20);
-  out[15] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x31);
-}
-
 static INLINE __m256i butter_fly(const __m256i *a0, const __m256i *a1,
                                  const __m256i *cospi) {
   const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING);
diff --git a/third_party/aom/aom_dsp/x86/txfm_common_intrin.h b/third_party/aom/aom_dsp/x86/txfm_common_intrin.h
index e4ac56339..4e6eecd32 100644
--- a/third_party/aom/aom_dsp/x86/txfm_common_intrin.h
+++ b/third_party/aom/aom_dsp/x86/txfm_common_intrin.h
@@ -16,16 +16,16 @@
 //  This header file should be put below any x86 intrinsics head file
 
 static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) {
-#if CONFIG_HIGHBITDEPTH
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
-  __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
-  __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
-  _mm_storeu_si128((__m128i *)(dst_ptr), out0);
-  _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
-#else
-  _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
-#endif  // CONFIG_HIGHBITDEPTH
+  if (sizeof(tran_low_t) == 4) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
+    __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
+    __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
+    _mm_storeu_si128((__m128i *)(dst_ptr), out0);
+    _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
+  } else {
+    _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
+  }
 }
 
 #endif  // _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
diff --git a/third_party/aom/aom_dsp/x86/variance_sse2.c b/third_party/aom/aom_dsp/x86/variance_sse2.c
index 918844185..211fad3f8 100644
--- a/third_party/aom/aom_dsp/x86/variance_sse2.c
+++ b/third_party/aom/aom_dsp/x86/variance_sse2.c
@@ -382,6 +382,28 @@ unsigned int aom_variance32x8_sse2(const uint8_t *src, int src_stride,
   assert(sum >= -255 * 32 * 8);
   return *sse - (unsigned int)(((int64_t)sum * sum) >> 8);
 }
+
+unsigned int aom_variance16x64_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 16, 64, sse, &sum,
+                aom_get16x16var_sse2, 16);
+  assert(sum <= 255 * 16 * 64);
+  assert(sum >= -255 * 16 * 64);
+  return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+}
+
+unsigned int aom_variance64x16_sse2(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    unsigned int *sse) {
+  int sum;
+  variance_sse2(src, src_stride, ref, ref_stride, 64, 16, sse, &sum,
+                aom_get16x16var_sse2, 16);
+  assert(sum <= 255 * 64 * 16);
+  assert(sum >= -255 * 64 * 16);
+  return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+}
 #endif
 
 // The 2 unused parameters are place holders for PIC enabled build.
@@ -451,7 +473,9 @@ DECLS(ssse3);
   FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t));    \
   FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t));   \
   FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t));    \
-  FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t))
+  FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t));   \
+  FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t));  \
+  FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
 #else
 #define FNS(opt)                                    \
   FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t));  \
@@ -543,7 +567,9 @@ DECLS(ssse3);
   FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t));    \
   FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t));   \
   FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t));    \
-  FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t))
+  FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t));   \
+  FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t));  \
+  FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
 #else
 #define FNS(opt)                                    \
   FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t));  \
diff --git a/third_party/aom/aom_ports/aom_ports.cmake b/third_party/aom/aom_ports/aom_ports.cmake
index 922691ed1..e1ffb56f5 100644
--- a/third_party/aom/aom_ports/aom_ports.cmake
+++ b/third_party/aom/aom_ports/aom_ports.cmake
@@ -22,16 +22,16 @@ set(AOM_PORTS_INCLUDES
     "${AOM_ROOT}/aom_ports/msvc.h"
     "${AOM_ROOT}/aom_ports/system_state.h")
 
+set(AOM_PORTS_ASM_X86 "${AOM_ROOT}/aom_ports/emms.asm")
+
 set(AOM_PORTS_INCLUDES_X86
     "${AOM_ROOT}/aom_ports/x86_abi_support.asm")
 
-set(AOM_PORTS_ASM_MMX "${AOM_ROOT}/aom_ports/emms.asm")
-
 set(AOM_PORTS_SOURCES_ARM
     "${AOM_ROOT}/aom_ports/arm.h"
     "${AOM_ROOT}/aom_ports/arm_cpudetect.c")
 
-# For arm targets and targets where HAVE_MMX is true:
+# For arm and x86 targets:
 #   Creates the aom_ports build target, adds the includes in aom_ports to the
 #   target, and makes libaom depend on it.
 # Otherwise:
@@ -39,8 +39,8 @@ set(AOM_PORTS_SOURCES_ARM
 # For all target platforms:
 #   The libaom target must exist before this function is called.
 function (setup_aom_ports_targets)
-  if (HAVE_MMX)
-    add_asm_library("aom_ports" "AOM_PORTS_ASM_MMX" "aom")
+  if ("${AOM_TARGET_CPU}" MATCHES "^x86")
+    add_asm_library("aom_ports" "AOM_PORTS_ASM_X86" "aom")
     set(aom_ports_has_symbols 1)
   elseif ("${AOM_TARGET_CPU}" MATCHES "arm")
     add_library(aom_ports OBJECT ${AOM_PORTS_SOURCES_ARM})
diff --git a/third_party/aom/aom_ports/mem.h b/third_party/aom/aom_ports/mem.h
index bd881cdc8..500e397c6 100644
--- a/third_party/aom/aom_ports/mem.h
+++ b/third_party/aom/aom_ports/mem.h
@@ -54,12 +54,14 @@
   (((value) < 0) ? -ROUND_POWER_OF_TWO_64(-(value), (n)) \
                  : ROUND_POWER_OF_TWO_64((value), (n)))
 
+/* shift right or left depending on sign of n */
+#define RIGHT_SIGNED_SHIFT(value, n) \
+  ((n) < 0 ? ((value) << (-(n))) : ((value) >> (n)))
+
 #define ALIGN_POWER_OF_TWO(value, n) \
   (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
 
 #define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
-#if CONFIG_HIGHBITDEPTH
 #define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
-#endif  // CONFIG_HIGHBITDEPTH
 
 #endif  // AOM_PORTS_MEM_H_
diff --git a/third_party/aom/aom_scale/aom_scale.cmake b/third_party/aom/aom_scale/aom_scale.cmake
index b52fd83dc..1c00b590e 100644
--- a/third_party/aom/aom_scale/aom_scale.cmake
+++ b/third_party/aom/aom_scale/aom_scale.cmake
@@ -30,7 +30,7 @@ function (setup_aom_scale_targets)
 
   if (HAVE_DSPR2)
     add_intrinsics_object_library("" "dspr2" "aom_scale"
-                                  "AOM_SCALE_INTRIN_DSPR2" "aom_scale")
+                                  "AOM_SCALE_INTRIN_DSPR2" "aom")
   endif ()
 
   set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} aom_scale PARENT_SCOPE)
diff --git a/third_party/aom/aom_scale/aom_scale_rtcd.pl b/third_party/aom/aom_scale/aom_scale_rtcd.pl
index 9d9a1a29a..c44966f96 100644
--- a/third_party/aom/aom_scale/aom_scale_rtcd.pl
+++ b/third_party/aom/aom_scale/aom_scale_rtcd.pl
@@ -7,13 +7,13 @@ forward_decls qw/aom_scale_forward_decls/;
 
 # Scaler functions
 if (aom_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
-    add_proto qw/void aom_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
-    add_proto qw/void aom_vertical_band_5_4_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
-    add_proto qw/void aom_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
-    add_proto qw/void aom_vertical_band_5_3_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
-    add_proto qw/void aom_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
-    add_proto qw/void aom_vertical_band_2_1_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
-    add_proto qw/void aom_vertical_band_2_1_scale_i/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
+  add_proto qw/void aom_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+  add_proto qw/void aom_vertical_band_5_4_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
+  add_proto qw/void aom_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+  add_proto qw/void aom_vertical_band_5_3_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
+  add_proto qw/void aom_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+  add_proto qw/void aom_vertical_band_2_1_scale/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
+  add_proto qw/void aom_vertical_band_2_1_scale_i/, "unsigned char *source, int src_pitch, unsigned char *dest, int dest_pitch, unsigned int dest_width";
 }
 
 add_proto qw/void aom_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
@@ -27,12 +27,12 @@ add_proto qw/void aom_yv12_copy_u/, "const struct yv12_buffer_config *src_bc, st
 add_proto qw/void aom_yv12_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc";
 
 if (aom_config("CONFIG_AV1") eq "yes") {
-    add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf";
-    specialize qw/aom_extend_frame_borders dspr2/;
+  add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+  specialize qw/aom_extend_frame_borders dspr2/;
 
-    add_proto qw/void aom_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
-    specialize qw/aom_extend_frame_inner_borders dspr2/;
+  add_proto qw/void aom_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
+  specialize qw/aom_extend_frame_inner_borders dspr2/;
 
-    add_proto qw/void aom_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
+  add_proto qw/void aom_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
 }
 1;
diff --git a/third_party/aom/aom_scale/generic/aom_scale.c b/third_party/aom/aom_scale/generic/aom_scale.c
index 14f3ae0da..d124832b7 100644
--- a/third_party/aom/aom_scale/generic/aom_scale.c
+++ b/third_party/aom/aom_scale/generic/aom_scale.c
@@ -476,54 +476,30 @@ void aom_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
                      unsigned int hscale, unsigned int hratio,
                      unsigned int vscale, unsigned int vratio,
                      unsigned int interlaced) {
-  int i;
-  int dw = (hscale - 1 + src->y_width * hratio) / hscale;
-  int dh = (vscale - 1 + src->y_height * vratio) / vscale;
-
-  /* call our internal scaling routines!! */
-  Scale2D((unsigned char *)src->y_buffer, src->y_stride, src->y_width,
-          src->y_height, (unsigned char *)dst->y_buffer, dst->y_stride, dw, dh,
-          temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
-
-  if (dw < (int)dst->y_width)
-    for (i = 0; i < dh; ++i)
-      memset(dst->y_buffer + i * dst->y_stride + dw - 1,
-             dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1);
-
-  if (dh < (int)dst->y_height)
-    for (i = dh - 1; i < (int)dst->y_height; ++i)
-      memcpy(dst->y_buffer + i * dst->y_stride,
-             dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1);
-
-  Scale2D((unsigned char *)src->u_buffer, src->uv_stride, src->uv_width,
-          src->uv_height, (unsigned char *)dst->u_buffer, dst->uv_stride,
-          dw / 2, dh / 2, temp_area, temp_height, hscale, hratio, vscale,
-          vratio, interlaced);
-
-  if (dw / 2 < (int)dst->uv_width)
-    for (i = 0; i < dst->uv_height; ++i)
-      memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1,
-             dst->u_buffer[i * dst->uv_stride + dw / 2 - 2],
-             dst->uv_width - dw / 2 + 1);
-
-  if (dh / 2 < (int)dst->uv_height)
-    for (i = dh / 2 - 1; i < (int)dst->y_height / 2; ++i)
-      memcpy(dst->u_buffer + i * dst->uv_stride,
-             dst->u_buffer + (dh / 2 - 2) * dst->uv_stride, dst->uv_width);
-
-  Scale2D((unsigned char *)src->v_buffer, src->uv_stride, src->uv_width,
-          src->uv_height, (unsigned char *)dst->v_buffer, dst->uv_stride,
-          dw / 2, dh / 2, temp_area, temp_height, hscale, hratio, vscale,
-          vratio, interlaced);
-
-  if (dw / 2 < (int)dst->uv_width)
-    for (i = 0; i < dst->uv_height; ++i)
-      memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1,
-             dst->v_buffer[i * dst->uv_stride + dw / 2 - 2],
-             dst->uv_width - dw / 2 + 1);
-
-  if (dh / 2 < (int)dst->uv_height)
-    for (i = dh / 2 - 1; i < (int)dst->y_height / 2; ++i)
-      memcpy(dst->v_buffer + i * dst->uv_stride,
-             dst->v_buffer + (dh / 2 - 2) * dst->uv_stride, dst->uv_width);
+  const int dw = (hscale - 1 + src->y_width * hratio) / hscale;
+  const int dh = (vscale - 1 + src->y_height * vratio) / vscale;
+
+  for (int plane = 0; plane < 3; ++plane) {
+    const int is_uv = plane > 0;
+    const int plane_dw = dw >> is_uv;
+    const int plane_dh = dh >> is_uv;
+
+    Scale2D((unsigned char *)src->buffers[plane], src->strides[is_uv],
+            src->widths[is_uv], src->heights[is_uv],
+            (unsigned char *)dst->buffers[plane], dst->strides[is_uv], plane_dw,
+            plane_dh, temp_area, temp_height, hscale, hratio, vscale, vratio,
+            interlaced);
+
+    if (plane_dw < dst->widths[is_uv])
+      for (int i = 0; i < plane_dh; ++i)
+        memset(dst->buffers[plane] + i * dst->strides[is_uv] + plane_dw - 1,
+               dst->buffers[plane][i * dst->strides[is_uv] + plane_dw - 2],
+               dst->widths[is_uv] - plane_dw + 1);
+
+    if (plane_dh < dst->heights[is_uv])
+      for (int i = plane_dh - 1; i < dst->heights[is_uv]; ++i)
+        memcpy(dst->buffers[plane] + i * dst->strides[is_uv],
+               dst->buffers[plane] + (plane_dh - 2) * dst->strides[is_uv],
+               dst->widths[is_uv] + 1);
+  }
 }
diff --git a/third_party/aom/aom_scale/generic/yv12config.c b/third_party/aom/aom_scale/generic/yv12config.c
index ee15ae103..fce719273 100644
--- a/third_party/aom/aom_scale/generic/yv12config.c
+++ b/third_party/aom/aom_scale/generic/yv12config.c
@@ -11,9 +11,9 @@
 
 #include <assert.h>
 
-#include "aom_scale/yv12config.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
+#include "aom_scale/yv12config.h"
 
 /****************************************************************************
 *  Exports
@@ -35,7 +35,7 @@ int aom_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
     }
 
 #if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
-    if (ybf->y_buffer_8bit) free(ybf->y_buffer_8bit);
+    if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);
 #endif
 
     /* buffer_alloc isn't accessed by most functions.  Rather y_buffer,
@@ -168,9 +168,12 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                                    aom_byte_align);
 
 #if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
-    if (ybf->y_buffer_8bit) {
-      free(ybf->y_buffer_8bit);
-      ybf->y_buffer_8bit = NULL;
+    if (use_highbitdepth) {
+      if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);
+      ybf->y_buffer_8bit = (uint8_t *)aom_memalign(32, (size_t)yplane_size);
+      if (!ybf->y_buffer_8bit) return -1;
+    } else {
+      assert(!ybf->y_buffer_8bit);
     }
 #endif
 
diff --git a/third_party/aom/aom_scale/generic/yv12extend.c b/third_party/aom/aom_scale/generic/yv12extend.c
index 05e463362..8266743cf 100644
--- a/third_party/aom/aom_scale/generic/yv12extend.c
+++ b/third_party/aom/aom_scale/generic/yv12extend.c
@@ -101,8 +101,6 @@ static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
 #endif
 
 void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
-  const int uv_border = ybf->border / 2;
-
   assert(ybf->border % 2 == 0);
   assert(ybf->y_height - ybf->y_crop_height < 16);
   assert(ybf->y_width - ybf->y_crop_width < 16);
@@ -111,49 +109,33 @@ void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
 
 #if CONFIG_HIGHBITDEPTH
   if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-                      ybf->y_crop_height, ybf->border, ybf->border,
-                      ybf->border + ybf->y_height - ybf->y_crop_height,
-                      ybf->border + ybf->y_width - ybf->y_crop_width);
-
-    extend_plane_high(ybf->u_buffer, ybf->uv_stride, ybf->uv_crop_width,
-                      ybf->uv_crop_height, uv_border, uv_border,
-                      uv_border + ybf->uv_height - ybf->uv_crop_height,
-                      uv_border + ybf->uv_width - ybf->uv_crop_width);
-
-    extend_plane_high(ybf->v_buffer, ybf->uv_stride, ybf->uv_crop_width,
-                      ybf->uv_crop_height, uv_border, uv_border,
-                      uv_border + ybf->uv_height - ybf->uv_crop_height,
-                      uv_border + ybf->uv_width - ybf->uv_crop_width);
+    for (int plane = 0; plane < 3; ++plane) {
+      const int is_uv = plane > 0;
+      const int plane_border = ybf->border >> is_uv;
+      extend_plane_high(
+          ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
+          ybf->crop_heights[is_uv], plane_border, plane_border,
+          plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
+          plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+    }
     return;
   }
 #endif
-  extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-               ybf->y_crop_height, ybf->border, ybf->border,
-               ybf->border + ybf->y_height - ybf->y_crop_height,
-               ybf->border + ybf->y_width - ybf->y_crop_width);
-
-  extend_plane(ybf->u_buffer, ybf->uv_stride, ybf->uv_crop_width,
-               ybf->uv_crop_height, uv_border, uv_border,
-               uv_border + ybf->uv_height - ybf->uv_crop_height,
-               uv_border + ybf->uv_width - ybf->uv_crop_width);
-
-  extend_plane(ybf->v_buffer, ybf->uv_stride, ybf->uv_crop_width,
-               ybf->uv_crop_height, uv_border, uv_border,
-               uv_border + ybf->uv_height - ybf->uv_crop_height,
-               uv_border + ybf->uv_width - ybf->uv_crop_width);
+  for (int plane = 0; plane < 3; ++plane) {
+    const int is_uv = plane > 0;
+    const int plane_border = ybf->border >> is_uv;
+    extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
+                 ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
+                 plane_border, plane_border,
+                 plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
+                 plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+  }
 }
 
 #if CONFIG_AV1
 static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
-  const int c_w = ybf->uv_crop_width;
-  const int c_h = ybf->uv_crop_height;
   const int ss_x = ybf->uv_width < ybf->y_width;
   const int ss_y = ybf->uv_height < ybf->y_height;
-  const int c_et = ext_size >> ss_y;
-  const int c_el = ext_size >> ss_x;
-  const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height;
-  const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width;
 
   assert(ybf->y_height - ybf->y_crop_height < 16);
   assert(ybf->y_width - ybf->y_crop_width < 16);
@@ -162,25 +144,29 @@ static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
 
 #if CONFIG_HIGHBITDEPTH
   if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-                      ybf->y_crop_height, ext_size, ext_size,
-                      ext_size + ybf->y_height - ybf->y_crop_height,
-                      ext_size + ybf->y_width - ybf->y_crop_width);
-    extend_plane_high(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb,
-                      c_er);
-    extend_plane_high(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb,
-                      c_er);
+    for (int plane = 0; plane < 3; ++plane) {
+      const int is_uv = plane > 0;
+      const int top = ext_size >> (is_uv ? ss_y : 0);
+      const int left = ext_size >> (is_uv ? ss_x : 0);
+      const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
+      const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
+      extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
+                        ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top,
+                        left, bottom, right);
+    }
     return;
   }
 #endif
-  extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-               ybf->y_crop_height, ext_size, ext_size,
-               ext_size + ybf->y_height - ybf->y_crop_height,
-               ext_size + ybf->y_width - ybf->y_crop_width);
-
-  extend_plane(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
-
-  extend_plane(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
+  for (int plane = 0; plane < 3; ++plane) {
+    const int is_uv = plane > 0;
+    const int top = ext_size >> (is_uv ? ss_y : 0);
+    const int left = ext_size >> (is_uv ? ss_x : 0);
+    const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
+    const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
+    extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
+                 ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top, left,
+                 bottom, right);
+  }
 }
 
 void aom_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
@@ -230,10 +216,6 @@ static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
 // Note: The frames are assumed to be identical in size.
 void aom_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_bc,
                            YV12_BUFFER_CONFIG *dst_bc) {
-  int row;
-  const uint8_t *src = src_bc->y_buffer;
-  uint8_t *dst = dst_bc->y_buffer;
-
 #if 0
   /* These assertions are valid in the codec, but the libaom-tester uses
    * this code slightly differently.
@@ -243,63 +225,36 @@ void aom_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_bc,
 #endif
 
 #if CONFIG_HIGHBITDEPTH
-  if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
-    assert(dst_bc->flags & YV12_FLAG_HIGHBITDEPTH);
-    for (row = 0; row < src_bc->y_height; ++row) {
-      memcpy_short_addr(dst, src, src_bc->y_width);
-      src += src_bc->y_stride;
-      dst += dst_bc->y_stride;
-    }
-
-    src = src_bc->u_buffer;
-    dst = dst_bc->u_buffer;
-
-    for (row = 0; row < src_bc->uv_height; ++row) {
-      memcpy_short_addr(dst, src, src_bc->uv_width);
-      src += src_bc->uv_stride;
-      dst += dst_bc->uv_stride;
-    }
-
-    src = src_bc->v_buffer;
-    dst = dst_bc->v_buffer;
+  assert((src_bc->flags & YV12_FLAG_HIGHBITDEPTH) ==
+         (dst_bc->flags & YV12_FLAG_HIGHBITDEPTH));
 
-    for (row = 0; row < src_bc->uv_height; ++row) {
-      memcpy_short_addr(dst, src, src_bc->uv_width);
-      src += src_bc->uv_stride;
-      dst += dst_bc->uv_stride;
+  if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
+    for (int plane = 0; plane < 3; ++plane) {
+      const uint8_t *plane_src = src_bc->buffers[plane];
+      uint8_t *plane_dst = dst_bc->buffers[plane];
+      const int is_uv = plane > 0;
+
+      for (int row = 0; row < src_bc->heights[is_uv]; ++row) {
+        memcpy_short_addr(plane_dst, plane_src, src_bc->widths[is_uv]);
+        plane_src += src_bc->strides[is_uv];
+        plane_dst += dst_bc->strides[is_uv];
+      }
     }
-
     aom_yv12_extend_frame_borders_c(dst_bc);
     return;
-  } else {
-    assert(!(dst_bc->flags & YV12_FLAG_HIGHBITDEPTH));
   }
 #endif
-
-  for (row = 0; row < src_bc->y_height; ++row) {
-    memcpy(dst, src, src_bc->y_width);
-    src += src_bc->y_stride;
-    dst += dst_bc->y_stride;
-  }
-
-  src = src_bc->u_buffer;
-  dst = dst_bc->u_buffer;
-
-  for (row = 0; row < src_bc->uv_height; ++row) {
-    memcpy(dst, src, src_bc->uv_width);
-    src += src_bc->uv_stride;
-    dst += dst_bc->uv_stride;
-  }
-
-  src = src_bc->v_buffer;
-  dst = dst_bc->v_buffer;
-
-  for (row = 0; row < src_bc->uv_height; ++row) {
-    memcpy(dst, src, src_bc->uv_width);
-    src += src_bc->uv_stride;
-    dst += dst_bc->uv_stride;
+  for (int plane = 0; plane < 3; ++plane) {
+    const uint8_t *plane_src = src_bc->buffers[plane];
+    uint8_t *plane_dst = dst_bc->buffers[plane];
+    const int is_uv = plane > 0;
+
+    for (int row = 0; row < src_bc->heights[is_uv]; ++row) {
+      memcpy(plane_dst, plane_src, src_bc->widths[is_uv]);
+      plane_src += src_bc->strides[is_uv];
+      plane_dst += dst_bc->strides[is_uv];
+    }
   }
-
   aom_yv12_extend_frame_borders_c(dst_bc);
 }
 
diff --git a/third_party/aom/aom_scale/yv12config.h b/third_party/aom/aom_scale/yv12config.h
index 76bfe9d7b..0c47f650d 100644
--- a/third_party/aom/aom_scale/yv12config.h
+++ b/third_party/aom/aom_scale/yv12config.h
@@ -34,31 +34,59 @@ extern "C" {
 #define AOM_BORDER_IN_PIXELS 160
 
 typedef struct yv12_buffer_config {
-  int y_width;
-  int y_height;
-  int y_crop_width;
-  int y_crop_height;
-  int y_stride;
-
-  int uv_width;
-  int uv_height;
-  int uv_crop_width;
-  int uv_crop_height;
-  int uv_stride;
-
-  int alpha_width;
-  int alpha_height;
-  int alpha_stride;
-
-  uint8_t *y_buffer;
-  uint8_t *u_buffer;
-  uint8_t *v_buffer;
-  uint8_t *alpha_buffer;
+  union {
+    struct {
+      int y_width;
+      int uv_width;
+      int alpha_width;
+    };
+    int widths[3];
+  };
+  union {
+    struct {
+      int y_height;
+      int uv_height;
+      int alpha_height;
+    };
+    int heights[3];
+  };
+  union {
+    struct {
+      int y_crop_width;
+      int uv_crop_width;
+    };
+    int crop_widths[2];
+  };
+  union {
+    struct {
+      int y_crop_height;
+      int uv_crop_height;
+    };
+    int crop_heights[2];
+  };
+  union {
+    struct {
+      int y_stride;
+      int uv_stride;
+      int alpha_stride;
+    };
+    int strides[3];
+  };
+  union {
+    struct {
+      uint8_t *y_buffer;
+      uint8_t *u_buffer;
+      uint8_t *v_buffer;
+      uint8_t *alpha_buffer;
+    };
+    uint8_t *buffers[4];
+  };
 
 #if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
   // If the frame is stored in a 16-bit buffer, this stores an 8-bit version
   // for use in global motion detection. It is allocated on-demand.
   uint8_t *y_buffer_8bit;
+  int buf_8bit_valid;
 #endif
 
   uint8_t *buffer_alloc;
@@ -69,10 +97,8 @@ typedef struct yv12_buffer_config {
   int subsampling_y;
   unsigned int bit_depth;
   aom_color_space_t color_space;
-#if CONFIG_COLORSPACE_HEADERS
   aom_transfer_function_t transfer_function;
   aom_chroma_sample_position_t chroma_sample_position;
-#endif
   aom_color_range_t color_range;
   int render_width;
   int render_height;
diff --git a/third_party/aom/aomdec.c b/third_party/aom/aomdec.c
index 035572c70..e07af353a 100644
--- a/third_party/aom/aomdec.c
+++ b/third_party/aom/aomdec.c
@@ -87,8 +87,6 @@ static const arg_def_t frameparallelarg =
     ARG_DEF(NULL, "frame-parallel", 0, "Frame parallel decode");
 static const arg_def_t verbosearg =
     ARG_DEF("v", "verbose", 0, "Show version string");
-static const arg_def_t error_concealment =
-    ARG_DEF(NULL, "error-concealment", 0, "Enable decoder error-concealment");
 static const arg_def_t scalearg =
     ARG_DEF("S", "scale", 0, "Scale output frames uniformly");
 static const arg_def_t continuearg =
@@ -131,7 +129,6 @@ static const arg_def_t *all_args[] = { &codecarg,
                                        &fb_arg,
                                        &md5arg,
                                        &framestatsarg,
-                                       &error_concealment,
                                        &continuearg,
 #if CONFIG_HIGHBITDEPTH
                                        &outbitdeptharg,
@@ -507,7 +504,6 @@ static int main_loop(int argc, const char **argv_) {
   int do_md5 = 0, progress = 0, frame_parallel = 0;
   int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
   int arg_skip = 0;
-  int ec_enabled = 0;
   int keep_going = 0;
   const AvxInterface *interface = NULL;
   const AvxInterface *fourcc_interface = NULL;
@@ -719,7 +715,6 @@ static int main_loop(int argc, const char **argv_) {
   if (!interface) interface = get_aom_decoder_by_index(0);
 
   dec_flags = (postproc ? AOM_CODEC_USE_POSTPROC : 0) |
-              (ec_enabled ? AOM_CODEC_USE_ERROR_CONCEALMENT : 0) |
               (frame_parallel ? AOM_CODEC_USE_FRAME_THREADING : 0);
   if (aom_codec_dec_init(&decoder, interface->codec_interface(), &cfg,
                          dec_flags)) {
diff --git a/third_party/aom/aomenc.c b/third_party/aom/aomenc.c
index 27372712e..43ad4bf03 100644
--- a/third_party/aom/aomenc.c
+++ b/third_party/aom/aomenc.c
@@ -290,18 +290,23 @@ static const arg_def_t dropframe_thresh =
     ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
 static const arg_def_t resize_mode =
     ARG_DEF(NULL, "resize-mode", 1, "Frame resize mode");
-static const arg_def_t resize_numerator =
-    ARG_DEF(NULL, "resize-numerator", 1, "Frame resize numerator");
-static const arg_def_t resize_kf_numerator =
-    ARG_DEF(NULL, "resize-kf-numerator", 1, "Frame resize keyframe numerator");
+static const arg_def_t resize_denominator =
+    ARG_DEF(NULL, "resize-denominator", 1, "Frame resize denominator");
+static const arg_def_t resize_kf_denominator = ARG_DEF(
+    NULL, "resize-kf-denominator", 1, "Frame resize keyframe denominator");
 #if CONFIG_FRAME_SUPERRES
 static const arg_def_t superres_mode =
     ARG_DEF(NULL, "superres-mode", 1, "Frame super-resolution mode");
-static const arg_def_t superres_numerator =
-    ARG_DEF(NULL, "superres-numerator", 1, "Frame super-resolution numerator");
-static const arg_def_t superres_kf_numerator =
-    ARG_DEF(NULL, "superres-kf-numerator", 1,
-            "Frame super-resolution keyframe numerator");
+static const arg_def_t superres_denominator = ARG_DEF(
+    NULL, "superres-denominator", 1, "Frame super-resolution denominator");
+static const arg_def_t superres_kf_denominator =
+    ARG_DEF(NULL, "superres-kf-denominator", 1,
+            "Frame super-resolution keyframe denominator");
+static const arg_def_t superres_qthresh = ARG_DEF(
+    NULL, "superres-qthresh", 1, "Frame super-resolution qindex threshold");
+static const arg_def_t superres_kf_qthresh =
+    ARG_DEF(NULL, "superres-kf-qthresh", 1,
+            "Frame super-resolution keyframe qindex threshold");
 #endif  // CONFIG_FRAME_SUPERRES
 static const struct arg_enum_list end_usage_enum[] = { { "vbr", AOM_VBR },
                                                        { "cbr", AOM_CBR },
@@ -328,12 +333,14 @@ static const arg_def_t buf_optimal_sz =
     ARG_DEF(NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)");
 static const arg_def_t *rc_args[] = { &dropframe_thresh,
                                       &resize_mode,
-                                      &resize_numerator,
-                                      &resize_kf_numerator,
+                                      &resize_denominator,
+                                      &resize_kf_denominator,
 #if CONFIG_FRAME_SUPERRES
                                       &superres_mode,
-                                      &superres_numerator,
-                                      &superres_kf_numerator,
+                                      &superres_denominator,
+                                      &superres_kf_denominator,
+                                      &superres_qthresh,
+                                      &superres_kf_qthresh,
 #endif  // CONFIG_FRAME_SUPERRES
                                       &end_usage,
                                       &target_bitrate,
@@ -377,10 +384,16 @@ static const arg_def_t arnr_maxframes =
 static const arg_def_t arnr_strength =
     ARG_DEF(NULL, "arnr-strength", 1, "AltRef filter strength (0..6)");
 static const struct arg_enum_list tuning_enum[] = {
-  { "psnr", AOM_TUNE_PSNR }, { "ssim", AOM_TUNE_SSIM }, { NULL, 0 }
+  { "psnr", AOM_TUNE_PSNR },
+  { "ssim", AOM_TUNE_SSIM },
+#ifdef CONFIG_DIST_8X8
+  { "cdef-dist", AOM_TUNE_CDEF_DIST },
+  { "daala-dist", AOM_TUNE_DAALA_DIST },
+#endif
+  { NULL, 0 }
 };
-static const arg_def_t tune_ssim =
-    ARG_DEF_ENUM(NULL, "tune", 1, "Material to favor", tuning_enum);
+static const arg_def_t tune_metric =
+    ARG_DEF_ENUM(NULL, "tune", 1, "Distortion metric tuned with", tuning_enum);
 static const arg_def_t cq_level =
     ARG_DEF(NULL, "cq-level", 1, "Constant/Constrained Quality level");
 static const arg_def_t max_intra_rate_pct =
@@ -399,6 +412,12 @@ static const arg_def_t tile_cols =
 static const arg_def_t tile_rows =
     ARG_DEF(NULL, "tile-rows", 1,
             "Number of tile rows to use, log2 (set to 0 while threads > 1)");
+#if CONFIG_MAX_TILE
+static const arg_def_t tile_width =
+    ARG_DEF(NULL, "tile-width", 1, "Tile widths (comma separated)");
+static const arg_def_t tile_height =
+    ARG_DEF(NULL, "tile-height", 1, "Tile heights (command separated)");
+#endif
 #if CONFIG_DEPENDENT_HORZTILES
 static const arg_def_t tile_dependent_rows =
     ARG_DEF(NULL, "tile-dependent-rows", 1, "Enable dependent Tile rows");
@@ -418,6 +437,11 @@ static const arg_def_t qm_min = ARG_DEF(
 static const arg_def_t qm_max = ARG_DEF(
     NULL, "qm-max", 1, "Max quant matrix flatness (0..15), default is 16");
 #endif
+#if CONFIG_DIST_8X8
+static const arg_def_t enable_dist_8x8 =
+    ARG_DEF(NULL, "enable-dist-8x8", 1,
+            "Enable dist-8x8 (0: false (default), 1: true)");
+#endif  // CONFIG_DIST_8X8
 static const arg_def_t num_tg = ARG_DEF(
     NULL, "num-tile-groups", 1, "Maximum number of tile groups, default is 1");
 static const arg_def_t mtu_size =
@@ -432,7 +456,7 @@ static const arg_def_t frame_parallel_decoding =
     ARG_DEF(NULL, "frame-parallel", 1,
             "Enable frame parallel decodability features "
             "(0: false (default), 1: true)");
-#if CONFIG_DELTA_Q && !CONFIG_EXT_DELTA_Q
+#if !CONFIG_EXT_DELTA_Q
 static const arg_def_t aq_mode = ARG_DEF(
     NULL, "aq-mode", 1,
     "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
@@ -463,29 +487,17 @@ static const arg_def_t max_gf_interval = ARG_DEF(
     "max gf/arf frame interval (default 0, indicating in-built behavior)");
 
 static const struct arg_enum_list color_space_enum[] = {
-  { "unknown", AOM_CS_UNKNOWN },
-  { "bt601", AOM_CS_BT_601 },
-  { "bt709", AOM_CS_BT_709 },
-  { "smpte170", AOM_CS_SMPTE_170 },
-  { "smpte240", AOM_CS_SMPTE_240 },
-#if CONFIG_COLORSPACE_HEADERS
-  { "bt2020ncl", AOM_CS_BT_2020_NCL },
-  { "bt2020cl", AOM_CS_BT_2020_CL },
-  { "sRGB", AOM_CS_SRGB },
-  { "ICtCp", AOM_CS_ICTCP },
-#else
-  { "bt2020", AOM_CS_BT_2020 },
-  { "reserved", AOM_CS_RESERVED },
-  { "sRGB", AOM_CS_SRGB },
-#endif
-  { NULL, 0 }
+  { "unknown", AOM_CS_UNKNOWN },     { "bt601", AOM_CS_BT_601 },
+  { "bt709", AOM_CS_BT_709 },        { "smpte170", AOM_CS_SMPTE_170 },
+  { "smpte240", AOM_CS_SMPTE_240 },  { "bt2020ncl", AOM_CS_BT_2020_NCL },
+  { "bt2020cl", AOM_CS_BT_2020_CL }, { "sRGB", AOM_CS_SRGB },
+  { "ICtCp", AOM_CS_ICTCP },         { NULL, 0 }
 };
 
 static const arg_def_t input_color_space =
-    ARG_DEF_ENUM(NULL, "color-space", 1, "The color space of input content:",
-                 color_space_enum);
+    ARG_DEF_ENUM(NULL, "color-space", 1,
+                 "The color space of input content:", color_space_enum);
 
-#if CONFIG_COLORSPACE_HEADERS
 static const struct arg_enum_list transfer_function_enum[] = {
   { "unknown", AOM_TF_UNKNOWN },
   { "bt709", AOM_TF_BT_709 },
@@ -495,8 +507,8 @@ static const struct arg_enum_list transfer_function_enum[] = {
 };
 
 static const arg_def_t input_transfer_function = ARG_DEF_ENUM(
-    NULL, "transfer-function", 1, "The transfer function of input content:",
-    transfer_function_enum);
+    NULL, "transfer-function", 1,
+    "The transfer function of input content:", transfer_function_enum);
 
 static const struct arg_enum_list chroma_sample_position_enum[] = {
   { "unknown", AOM_CSP_UNKNOWN },
@@ -509,7 +521,6 @@ static const arg_def_t input_chroma_sample_position =
     ARG_DEF_ENUM(NULL, "chroma-sample-position", 1,
                  "The chroma sample position when chroma 4:2:0 is signaled:",
                  chroma_sample_position_enum);
-#endif
 
 static const struct arg_enum_list tune_content_enum[] = {
   { "default", AOM_CONTENT_DEFAULT },
@@ -550,7 +561,7 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                                        &arnr_maxframes,
                                        &arnr_strength,
-                                       &tune_ssim,
+                                       &tune_metric,
                                        &cq_level,
                                        &max_intra_rate_pct,
                                        &max_inter_rate_pct,
@@ -561,6 +572,9 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
                                        &qm_min,
                                        &qm_max,
 #endif
+#if CONFIG_DIST_8X8
+                                       &enable_dist_8x8,
+#endif
                                        &frame_parallel_decoding,
                                        &aq_mode,
 #if CONFIG_EXT_DELTA_Q
@@ -570,10 +584,8 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
                                        &noise_sens,
                                        &tune_content,
                                        &input_color_space,
-#if CONFIG_COLORSPACE_HEADERS
                                        &input_transfer_function,
                                        &input_chroma_sample_position,
-#endif
                                        &min_gf_interval,
                                        &max_gf_interval,
 #if CONFIG_EXT_PARTITION
@@ -617,6 +629,9 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
                                         AV1E_SET_QM_MIN,
                                         AV1E_SET_QM_MAX,
 #endif
+#if CONFIG_DIST_8X8
+                                        AV1E_SET_ENABLE_DIST_8X8,
+#endif
                                         AV1E_SET_FRAME_PARALLEL_DECODING,
                                         AV1E_SET_AQ_MODE,
 #if CONFIG_EXT_DELTA_Q
@@ -626,10 +641,8 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
                                         AV1E_SET_NOISE_SENSITIVITY,
                                         AV1E_SET_TUNE_CONTENT,
                                         AV1E_SET_COLOR_SPACE,
-#if CONFIG_COLORSPACE_HEADERS
                                         AV1E_SET_TRANSFER_FUNCTION,
                                         AV1E_SET_CHROMA_SAMPLE_POSITION,
-#endif
                                         AV1E_SET_MIN_GF_INTERVAL,
                                         AV1E_SET_MAX_GF_INTERVAL,
 #if CONFIG_EXT_PARTITION
@@ -1044,17 +1057,21 @@ static int parse_stream_params(struct AvxEncoderConfig *global,
       config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &resize_mode, argi)) {
       config->cfg.rc_resize_mode = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &resize_numerator, argi)) {
-      config->cfg.rc_resize_numerator = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &resize_kf_numerator, argi)) {
-      config->cfg.rc_resize_kf_numerator = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &resize_denominator, argi)) {
+      config->cfg.rc_resize_denominator = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &resize_kf_denominator, argi)) {
+      config->cfg.rc_resize_kf_denominator = arg_parse_uint(&arg);
 #if CONFIG_FRAME_SUPERRES
     } else if (arg_match(&arg, &superres_mode, argi)) {
       config->cfg.rc_superres_mode = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &superres_numerator, argi)) {
-      config->cfg.rc_superres_numerator = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &superres_kf_numerator, argi)) {
-      config->cfg.rc_superres_kf_numerator = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &superres_denominator, argi)) {
+      config->cfg.rc_superres_denominator = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &superres_kf_denominator, argi)) {
+      config->cfg.rc_superres_kf_denominator = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &superres_qthresh, argi)) {
+      config->cfg.rc_superres_qthresh = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &superres_kf_qthresh, argi)) {
+      config->cfg.rc_superres_kf_qthresh = arg_parse_uint(&arg);
 #endif  // CONFIG_FRAME_SUPERRES
     } else if (arg_match(&arg, &end_usage, argi)) {
       config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
@@ -1094,6 +1111,14 @@ static int parse_stream_params(struct AvxEncoderConfig *global,
       config->cfg.kf_max_dist = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &kf_disabled, argi)) {
       config->cfg.kf_mode = AOM_KF_DISABLED;
+#if CONFIG_MAX_TILE
+    } else if (arg_match(&arg, &tile_width, argi)) {
+      config->cfg.tile_width_count =
+          arg_parse_list(&arg, config->cfg.tile_widths, MAX_TILE_WIDTHS);
+    } else if (arg_match(&arg, &tile_height, argi)) {
+      config->cfg.tile_height_count =
+          arg_parse_list(&arg, config->cfg.tile_heights, MAX_TILE_HEIGHTS);
+#endif
     } else {
       int i, match = 0;
       for (i = 0; ctrl_args[i]; i++) {
@@ -1261,12 +1286,14 @@ static void show_stream_config(struct stream_state *stream,
 #endif  // CONFIG_EXT_TILE
   SHOW(rc_dropframe_thresh);
   SHOW(rc_resize_mode);
-  SHOW(rc_resize_numerator);
-  SHOW(rc_resize_kf_numerator);
+  SHOW(rc_resize_denominator);
+  SHOW(rc_resize_kf_denominator);
 #if CONFIG_FRAME_SUPERRES
   SHOW(rc_superres_mode);
-  SHOW(rc_superres_numerator);
-  SHOW(rc_superres_kf_numerator);
+  SHOW(rc_superres_denominator);
+  SHOW(rc_superres_kf_denominator);
+  SHOW(rc_superres_qthresh);
+  SHOW(rc_superres_kf_qthresh);
 #endif  // CONFIG_FRAME_SUPERRES
   SHOW(rc_end_usage);
   SHOW(rc_target_bitrate);
@@ -1638,50 +1665,35 @@ static float usec_to_fps(uint64_t usec, unsigned int frames) {
 }
 
 static void test_decode(struct stream_state *stream,
-                        enum TestDecodeFatality fatal,
-                        const AvxInterface *codec) {
+                        enum TestDecodeFatality fatal) {
   aom_image_t enc_img, dec_img;
 
   if (stream->mismatch_seen) return;
 
   /* Get the internal reference frame */
-  if (strcmp(codec->name, "vp8") == 0) {
-    struct aom_ref_frame ref_enc, ref_dec;
-    const unsigned int frame_width = (stream->config.cfg.g_w + 15) & ~15;
-    const unsigned int frame_height = (stream->config.cfg.g_h + 15) & ~15;
-    aom_img_alloc(&ref_enc.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
-    enc_img = ref_enc.img;
-    aom_img_alloc(&ref_dec.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
-    dec_img = ref_dec.img;
-
-    ref_enc.frame_type = AOM_LAST_FRAME;
-    ref_dec.frame_type = AOM_LAST_FRAME;
-    aom_codec_control(&stream->encoder, AOM_COPY_REFERENCE, &ref_enc);
-    aom_codec_control(&stream->decoder, AOM_COPY_REFERENCE, &ref_dec);
-  } else {
-    aom_codec_control(&stream->encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
-    aom_codec_control(&stream->decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
+  aom_codec_control(&stream->encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
+  aom_codec_control(&stream->decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
 
 #if CONFIG_HIGHBITDEPTH
-    if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
-        (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
-      if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
-        aom_image_t enc_hbd_img;
-        aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
-                      enc_img.d_w, enc_img.d_h, 16);
-        aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
-        enc_img = enc_hbd_img;
-      }
-      if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
-        aom_image_t dec_hbd_img;
-        aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
-                      dec_img.d_w, dec_img.d_h, 16);
-        aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
-        dec_img = dec_hbd_img;
-      }
+  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
+      (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
+    if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+      aom_image_t enc_hbd_img;
+      aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
+                    enc_img.d_w, enc_img.d_h, 16);
+      aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
+      enc_img = enc_hbd_img;
+    }
+    if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+      aom_image_t dec_hbd_img;
+      aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
+                    dec_img.d_w, dec_img.d_h, 16);
+      aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
+      dec_img = dec_hbd_img;
     }
-#endif
   }
+#endif
+
   ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame");
   ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame");
 
@@ -2103,7 +2115,7 @@ int main(int argc, const char **argv_) {
 
         if (got_data && global.test_decode != TEST_DECODE_OFF) {
           FOREACH_STREAM(stream, streams) {
-            test_decode(stream, global.test_decode, global.codec);
+            test_decode(stream, global.test_decode);
           }
         }
       }
diff --git a/third_party/aom/args.c b/third_party/aom/args.c
index 571103595..b9384de70 100644
--- a/third_party/aom/args.c
+++ b/third_party/aom/args.c
@@ -210,3 +210,28 @@ int arg_parse_enum_or_int(const struct arg *arg) {
   if (arg->def->enums) return arg_parse_enum(arg);
   return arg_parse_int(arg);
 }
+
+// parse a comma separated list of at most n integers
+// return the number of elements in the list
+int arg_parse_list(const struct arg *arg, int *list, int n) {
+  const char *ptr = arg->val;
+  char *endptr;
+  int i = 0;
+
+  while (ptr[0] != '\0') {
+    int32_t rawval = (int32_t)strtol(ptr, &endptr, 10);
+    if (rawval < INT_MIN || rawval > INT_MAX) {
+      die("Option %s: Value %ld out of range for signed int\n", arg->name,
+          rawval);
+    } else if (i >= n) {
+      die("Option %s: List has more than %d entries\n", arg->name, n);
+    } else if (*endptr == ',') {
+      endptr++;
+    } else if (*endptr != '\0') {
+      die("Option %s: Bad list separator '%c'\n", arg->name, *endptr);
+    }
+    list[i++] = (int)rawval;
+    ptr = endptr;
+  }
+  return i;
+}
diff --git a/third_party/aom/args.h b/third_party/aom/args.h
index e7841fc64..c3427bcfa 100644
--- a/third_party/aom/args.h
+++ b/third_party/aom/args.h
@@ -57,6 +57,7 @@ int arg_parse_int(const struct arg *arg);
 struct aom_rational arg_parse_rational(const struct arg *arg);
 int arg_parse_enum(const struct arg *arg);
 int arg_parse_enum_or_int(const struct arg *arg);
+int arg_parse_list(const struct arg *arg, int *list, int n);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/av1.cmake b/third_party/aom/av1/av1.cmake
index 945166b2d..140eec815 100644
--- a/third_party/aom/av1/av1.cmake
+++ b/third_party/aom/av1/av1.cmake
@@ -89,7 +89,8 @@ set(AOM_AV1_DECODER_SOURCES
     "${AOM_ROOT}/av1/decoder/dsubexp.c"
     "${AOM_ROOT}/av1/decoder/dsubexp.h"
     "${AOM_ROOT}/av1/decoder/dthread.c"
-    "${AOM_ROOT}/av1/decoder/dthread.h")
+    "${AOM_ROOT}/av1/decoder/dthread.h"
+    "${AOM_ROOT}/av1/decoder/symbolrate.h")
 
 set(AOM_AV1_ENCODER_SOURCES
     "${AOM_ROOT}/av1/av1_cx_iface.c"
@@ -123,6 +124,8 @@ set(AOM_AV1_ENCODER_SOURCES
     "${AOM_ROOT}/av1/encoder/extend.h"
     "${AOM_ROOT}/av1/encoder/firstpass.c"
     "${AOM_ROOT}/av1/encoder/firstpass.h"
+    "${AOM_ROOT}/av1/encoder/hash.c"
+    "${AOM_ROOT}/av1/encoder/hash.h"
     "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.c"
     "${AOM_ROOT}/av1/encoder/hybrid_fwd_txfm.h"
     "${AOM_ROOT}/av1/encoder/lookahead.c"
@@ -131,6 +134,8 @@ set(AOM_AV1_ENCODER_SOURCES
     "${AOM_ROOT}/av1/encoder/mbgraph.h"
     "${AOM_ROOT}/av1/encoder/mcomp.c"
     "${AOM_ROOT}/av1/encoder/mcomp.h"
+    "${AOM_ROOT}/av1/encoder/palette.c"
+    "${AOM_ROOT}/av1/encoder/palette.h"
     "${AOM_ROOT}/av1/encoder/picklpf.c"
     "${AOM_ROOT}/av1/encoder/picklpf.h"
     "${AOM_ROOT}/av1/encoder/ratectrl.c"
@@ -167,11 +172,6 @@ set(AOM_AV1_COMMON_INTRIN_AVX2
     "${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_avx2.c"
     "${AOM_ROOT}/av1/common/x86/hybrid_inv_txfm_avx2.c")
 
-set(AOM_AV1_COMMON_INTRIN_DSPR2
-    "${AOM_ROOT}/av1/common/mips/dspr2/av1_itrans16_dspr2.c"
-    "${AOM_ROOT}/av1/common/mips/dspr2/av1_itrans4_dspr2.c"
-    "${AOM_ROOT}/av1/common/mips/dspr2/av1_itrans8_dspr2.c")
-
 set(AOM_AV1_COMMON_INTRIN_MSA
     "${AOM_ROOT}/av1/common/mips/msa/av1_idct16x16_msa.c"
     "${AOM_ROOT}/av1/common/mips/msa/av1_idct4x4_msa.c"
@@ -190,9 +190,6 @@ set(AOM_AV1_ENCODER_INTRIN_SSE2
 set(AOM_AV1_ENCODER_ASM_SSSE3_X86_64
     "${AOM_ROOT}/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm")
 
-set(AOM_AV1_ENCODER_INTRIN_SSSE3
-    "${AOM_ROOT}/av1/encoder/x86/dct_ssse3.c")
-
 set(AOM_AV1_ENCODER_INTRIN_SSE4_1
     ${AOM_AV1_ENCODER_INTRIN_SSE4_1}
     "${AOM_ROOT}/av1/encoder/x86/av1_highbd_quantize_sse4.c"
@@ -222,7 +219,6 @@ if (CONFIG_HIGHBITDEPTH)
 else ()
   set(AOM_AV1_COMMON_INTRIN_NEON
       ${AOM_AV1_COMMON_INTRIN_NEON}
-      "${AOM_ROOT}/av1/encoder/arm/neon/dct_neon.c"
       "${AOM_ROOT}/av1/common/arm/neon/iht4x4_add_neon.c"
       "${AOM_ROOT}/av1/common/arm/neon/iht8x8_add_neon.c")
 
@@ -234,14 +230,10 @@ endif ()
 if (CONFIG_CDEF)
   set(AOM_AV1_COMMON_SOURCES
       ${AOM_AV1_COMMON_SOURCES}
-      "${AOM_ROOT}/av1/common/clpf.c"
-      "${AOM_ROOT}/av1/common/clpf_simd.h"
-      "${AOM_ROOT}/av1/common/cdef_simd.h"
       "${AOM_ROOT}/av1/common/cdef.c"
       "${AOM_ROOT}/av1/common/cdef.h"
-      "${AOM_ROOT}/av1/common/od_dering.c"
-      "${AOM_ROOT}/av1/common/od_dering.h"
-      "${AOM_ROOT}/av1/common/od_dering_simd.h")
+      "${AOM_ROOT}/av1/common/cdef_block.c"
+      "${AOM_ROOT}/av1/common/cdef_block.h")
 
   set(AOM_AV1_ENCODER_SOURCES
       ${AOM_AV1_ENCODER_SOURCES}
@@ -249,32 +241,70 @@ if (CONFIG_CDEF)
 
   set(AOM_AV1_COMMON_INTRIN_SSE2
       ${AOM_AV1_COMMON_INTRIN_SSE2}
-      "${AOM_ROOT}/av1/common/clpf_sse2.c"
-      "${AOM_ROOT}/av1/common/od_dering_sse2.c")
+      "${AOM_ROOT}/av1/common/cdef_block_sse2.c")
 
   set(AOM_AV1_COMMON_INTRIN_SSSE3
       ${AOM_AV1_COMMON_INTRIN_SSSE3}
-      "${AOM_ROOT}/av1/common/clpf_ssse3.c"
-      "${AOM_ROOT}/av1/common/od_dering_ssse3.c")
+      "${AOM_ROOT}/av1/common/cdef_block_ssse3.c")
 
   set(AOM_AV1_COMMON_INTRIN_SSE4_1
       ${AOM_AV1_COMMON_INTRIN_SSE4_1}
-      "${AOM_ROOT}/av1/common/clpf_sse4.c"
-      "${AOM_ROOT}/av1/common/od_dering_sse4.c")
+      "${AOM_ROOT}/av1/common/cdef_block_sse4.c")
+
+  set(AOM_AV1_COMMON_INTRIN_AVX2
+      ${AOM_AV1_COMMON_INTRIN_AVX2}
+      "${AOM_ROOT}/av1/common/cdef_block_avx2.c")
 
   set(AOM_AV1_COMMON_INTRIN_NEON
       ${AOM_AV1_COMMON_INTRIN_NEON}
-      "${AOM_ROOT}/av1/common/clpf_neon.c"
-      "${AOM_ROOT}/av1/common/od_dering_neon.c")
+      "${AOM_ROOT}/av1/common/cdef_block_neon.c")
+
+  if (NOT CONFIG_CDEF_SINGLEPASS)
+    set(AOM_AV1_COMMON_SOURCES
+        ${AOM_AV1_COMMON_SOURCES}
+        "${AOM_ROOT}/av1/common/clpf.c"
+        "${AOM_ROOT}/av1/common/clpf_simd.h"
+        "${AOM_ROOT}/av1/common/cdef_block_simd.h")
+
+    set(AOM_AV1_COMMON_INTRIN_SSE2
+        ${AOM_AV1_COMMON_INTRIN_SSE2}
+        "${AOM_ROOT}/av1/common/clpf_sse2.c")
+
+    set(AOM_AV1_COMMON_INTRIN_SSSE3
+        ${AOM_AV1_COMMON_INTRIN_SSSE3}
+        "${AOM_ROOT}/av1/common/clpf_ssse3.c")
+
+    set(AOM_AV1_COMMON_INTRIN_SSE4_1
+        ${AOM_AV1_COMMON_INTRIN_SSE4_1}
+        "${AOM_ROOT}/av1/common/clpf_sse4.c")
+
+    set(AOM_AV1_COMMON_INTRIN_NEON
+        ${AOM_AV1_COMMON_INTRIN_NEON}
+        "${AOM_ROOT}/av1/common/clpf_neon.c")
+  endif ()
 endif ()
 
 if (CONFIG_CONVOLVE_ROUND)
+  set(AOM_AV1_COMMON_INTRIN_SSE2
+      ${AOM_AV1_COMMON_INTRIN_SSE2}
+      "${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
+  if (CONFIG_HIGHBITDEPTH)
+    set(AOM_AV1_COMMON_INTRIN_SSSE3
+        ${AOM_AV1_COMMON_INTRIN_SSSE3}
+        "${AOM_ROOT}/av1/common/x86/highbd_convolve_2d_ssse3.c")
+  endif ()
+
+  if(NOT CONFIG_COMPOUND_ROUND)
+    set(AOM_AV1_COMMON_INTRIN_SSE4_1
+        ${AOM_AV1_COMMON_INTRIN_SSE4_1}
+        "${AOM_ROOT}/av1/common/x86/av1_convolve_scale_sse4.c")
+  endif()
+
   set(AOM_AV1_COMMON_INTRIN_AVX2
       ${AOM_AV1_COMMON_INTRIN_AVX2}
       "${AOM_ROOT}/av1/common/x86/convolve_avx2.c")
 endif ()
 
-if (CONFIG_EXT_INTER)
   set(AOM_AV1_ENCODER_SOURCES
       ${AOM_AV1_ENCODER_SOURCES}
       "${AOM_ROOT}/av1/encoder/wedge_utils.c")
@@ -282,7 +312,6 @@ if (CONFIG_EXT_INTER)
   set(AOM_AV1_ENCODER_INTRIN_SSE2
       ${AOM_AV1_ENCODER_INTRIN_SSE2}
       "${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
-endif ()
 
 if (CONFIG_FILTER_INTRA)
   set(AOM_AV1_COMMON_INTRIN_SSE4_1
@@ -297,6 +326,13 @@ if (CONFIG_ACCOUNTING)
       "${AOM_ROOT}/av1/decoder/accounting.h")
 endif ()
 
+if (CONFIG_BGSPRITE)
+  set(AOM_AV1_ENCODER_SOURCES
+      ${AOM_AV1_ENCODER_SOURCES}
+      "${AOM_ROOT}/av1/encoder/bgsprite.c"
+      "${AOM_ROOT}/av1/encoder/bgsprite.h")
+endif ()
+
 if (CONFIG_GLOBAL_MOTION)
   set(AOM_AV1_ENCODER_SOURCES
       ${AOM_AV1_ENCODER_SOURCES}
@@ -331,11 +367,21 @@ if (CONFIG_INTERNAL_STATS)
       "${AOM_ROOT}/av1/encoder/blockiness.c")
 endif ()
 
-if (CONFIG_PALETTE)
+if (CONFIG_LV_MAP)
+  set(AOM_AV1_COMMON_SOURCES
+      ${AOM_AV1_COMMON_SOURCES}
+      "${AOM_ROOT}/av1/common/txb_common.c"
+      "${AOM_ROOT}/av1/common/txb_common.h")
+
+  set(AOM_AV1_DECODER_SOURCES
+      ${AOM_AV1_DECODER_SOURCES}
+      "${AOM_ROOT}/av1/decoder/decodetxb.c"
+      "${AOM_ROOT}/av1/decoder/decodetxb.h")
+
   set(AOM_AV1_ENCODER_SOURCES
       ${AOM_AV1_ENCODER_SOURCES}
-      "${AOM_ROOT}/av1/encoder/palette.c"
-      "${AOM_ROOT}/av1/encoder/palette.h")
+      "${AOM_ROOT}/av1/encoder/encodetxb.c"
+      "${AOM_ROOT}/av1/encoder/encodetxb.h")
 endif ()
 
 if (CONFIG_CFL)
@@ -361,6 +407,19 @@ if (CONFIG_LOOP_RESTORATION)
       "${AOM_ROOT}/av1/encoder/pickrst.h")
 endif ()
 
+if (CONFIG_INTRA_EDGE)
+  set(AOM_AV1_COMMON_INTRIN_SSE4_1
+      ${AOM_AV1_COMMON_INTRIN_SSE4_1}
+      "${AOM_ROOT}/av1/common/x86/intra_edge_sse4.c")
+endif ()
+
+if (CONFIG_NCOBMC_ADAPT_WEIGHT)
+  set(AOM_AV1_COMMON_SOURCES
+      ${AOM_AV1_COMMON_SOURCES}
+      "${AOM_ROOT}/av1/common/ncobmc_kernels.c"
+      "${AOM_ROOT}/av1/common/ncobmc_kernels.h")
+endif ()
+
 if (CONFIG_PVQ)
   set(AOM_AV1_COMMON_SOURCES
       ${AOM_AV1_COMMON_SOURCES}
@@ -417,9 +476,6 @@ if (CONFIG_PVQ)
           ${AOM_AV1_DECODER_INTRIN_SSE2}
           "${AOM_ROOT}/av1/encoder/x86/dct_intrin_sse2.c")
 
-      set(AOM_AV1_DECODER_INTRIN_SSSE3
-          ${AOM_AV1_DECODER_INTRIN_SSSE3}
-          "${AOM_ROOT}/av1/encoder/x86/dct_ssse3.c")
     endif ()
 endif ()
 
@@ -444,6 +500,28 @@ if (CONFIG_WARPED_MOTION OR CONFIG_GLOBAL_MOTION)
   endif ()
 endif ()
 
+if (CONFIG_HASH_ME)
+  set(AOM_AV1_ENCODER_SOURCES
+      ${AOM_AV1_ENCODER_SOURCES}
+      "${AOM_ROOT}/av1/encoder/hash_motion.h"
+      "${AOM_ROOT}/av1/encoder/hash_motion.c"
+      "${AOM_ROOT}/third_party/vector/vector.h"
+      "${AOM_ROOT}/third_party/vector/vector.c")
+endif ()
+
+if (CONFIG_Q_ADAPT_PROBS)
+  set(AOM_AV1_COMMON_SOURCES
+      ${AOM_AV1_COMMON_SOURCES}
+      "${AOM_ROOT}/av1/common/token_cdfs.h")
+endif ()
+
+if (CONFIG_XIPHRC)
+  set(AOM_AV1_ENCODER_SOURCES
+      ${AOM_AV1_ENCODER_SOURCES}
+      "${AOM_ROOT}/av1/encoder/ratectrl_xiph.c"
+      "${AOM_ROOT}/av1/encoder/ratectrl_xiph.h")
+endif ()
+
 # Setup AV1 common/decoder/encoder targets. The libaom target must exist before
 # this function is called.
 function (setup_av1_targets)
@@ -472,7 +550,7 @@ function (setup_av1_targets)
   endif ()
 
   if (HAVE_SSE2)
-    require_flag_nomsvc("-msse2" NO)
+    require_compiler_flag_nomsvc("-msse2" NO)
     add_intrinsics_object_library("-msse2" "sse2" "aom_av1_common"
                                   "AOM_AV1_COMMON_INTRIN_SSE2" "aom")
     if (CONFIG_AV1_DECODER)
@@ -494,7 +572,7 @@ function (setup_av1_targets)
   endif ()
 
   if (HAVE_SSSE3)
-    require_flag_nomsvc("-mssse3" NO)
+    require_compiler_flag_nomsvc("-mssse3" NO)
     add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_common"
                                   "AOM_AV1_COMMON_INTRIN_SSSE3" "aom")
 
@@ -504,15 +582,10 @@ function (setup_av1_targets)
                                       "AOM_AV1_DECODER_INTRIN_SSSE3" "aom")
       endif ()
     endif ()
-
-    if (CONFIG_AV1_ENCODER)
-      add_intrinsics_object_library("-mssse3" "ssse3" "aom_av1_encoder"
-                                    "AOM_AV1_ENCODER_INTRIN_SSSE3" "aom")
-    endif ()
   endif ()
 
   if (HAVE_SSE4_1)
-    require_flag_nomsvc("-msse4.1" NO)
+    require_compiler_flag_nomsvc("-msse4.1" NO)
     add_intrinsics_object_library("-msse4.1" "sse4" "aom_av1_common"
                                   "AOM_AV1_COMMON_INTRIN_SSE4_1" "aom")
 
@@ -530,7 +603,7 @@ function (setup_av1_targets)
   endif ()
 
   if (HAVE_AVX2)
-    require_flag_nomsvc("-mavx2" NO)
+    require_compiler_flag_nomsvc("-mavx2" NO)
     add_intrinsics_object_library("-mavx2" "avx2" "aom_av1_common"
                                   "AOM_AV1_COMMON_INTRIN_AVX2" "aom")
 
@@ -556,11 +629,6 @@ function (setup_av1_targets)
     endif ()
   endif ()
 
-  if (HAVE_DSPR2)
-    add_intrinsics_object_library("" "dspr2" "aom_av1_common"
-                                  "AOM_AV1_COMMON_INTRIN_DSPR2" "aom")
-  endif ()
-
   if (HAVE_MSA)
     add_intrinsics_object_library("" "msa" "aom_av1_common"
                                   "AOM_AV1_COMMON_INTRIN_MSA" "aom")
diff --git a/third_party/aom/av1/av1_common.mk b/third_party/aom/av1/av1_common.mk
index a8ba72016..35466ac88 100644
--- a/third_party/aom/av1/av1_common.mk
+++ b/third_party/aom/av1/av1_common.mk
@@ -75,6 +75,9 @@ AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
 AV1_COMMON_SRCS-yes += common/av1_inv_txfm1d_cfg.h
 AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/convolve_avx2.c
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
+ifeq ($(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND),yesx)
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_convolve_scale_sse4.c
+endif
 ifeq ($(CONFIG_HIGHBITDEPTH),yes)
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
 endif
@@ -85,25 +88,31 @@ AV1_COMMON_SRCS-yes += common/restoration.h
 AV1_COMMON_SRCS-yes += common/restoration.c
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/selfguided_sse4.c
 endif
+ifeq ($(CONFIG_INTRA_EDGE),yes)
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/intra_edge_sse4.c
+endif
 ifeq (yes,$(filter $(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION),yes))
 AV1_COMMON_SRCS-yes += common/warped_motion.h
 AV1_COMMON_SRCS-yes += common/warped_motion.c
 endif
 ifeq ($(CONFIG_CDEF),yes)
+ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
+AV1_COMMON_SRCS-$(HAVE_AVX2) += common/cdef_block_avx2.c
+else
 AV1_COMMON_SRCS-yes += common/clpf.c
 AV1_COMMON_SRCS-yes += common/clpf_simd.h
-AV1_COMMON_SRCS-yes += common/cdef_simd.h
 AV1_COMMON_SRCS-$(HAVE_SSE2) += common/clpf_sse2.c
 AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/clpf_ssse3.c
 AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/clpf_sse4.c
 AV1_COMMON_SRCS-$(HAVE_NEON) += common/clpf_neon.c
-AV1_COMMON_SRCS-$(HAVE_SSE2) += common/od_dering_sse2.c
-AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/od_dering_ssse3.c
-AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/od_dering_sse4.c
-AV1_COMMON_SRCS-$(HAVE_NEON) += common/od_dering_neon.c
-AV1_COMMON_SRCS-yes += common/od_dering.c
-AV1_COMMON_SRCS-yes += common/od_dering.h
-AV1_COMMON_SRCS-yes += common/od_dering_simd.h
+endif
+AV1_COMMON_SRCS-$(HAVE_SSE2) += common/cdef_block_sse2.c
+AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/cdef_block_ssse3.c
+AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/cdef_block_sse4.c
+AV1_COMMON_SRCS-$(HAVE_NEON) += common/cdef_block_neon.c
+AV1_COMMON_SRCS-yes += common/cdef_block.c
+AV1_COMMON_SRCS-yes += common/cdef_block.h
+AV1_COMMON_SRCS-yes += common/cdef_block_simd.h
 AV1_COMMON_SRCS-yes += common/cdef.c
 AV1_COMMON_SRCS-yes += common/cdef.h
 endif
@@ -115,6 +124,10 @@ AV1_COMMON_SRCS-yes += common/cfl.h
 AV1_COMMON_SRCS-yes += common/cfl.c
 endif
 
+ifeq ($(CONFIG_MOTION_VAR),yes)
+AV1_COMMON_SRCS-yes += common/obmc.h
+endif
+
 ifeq ($(CONFIG_PVQ),yes)
 # PVQ from daala
 AV1_COMMON_SRCS-yes += common/pvq.c
@@ -137,12 +150,6 @@ AV1_COMMON_SRCS-yes += common/pvq_state.h
 AV1_COMMON_SRCS-yes += common/generic_code.h
 endif
 
-ifneq ($(CONFIG_HIGHBITDEPTH),yes)
-AV1_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/av1_itrans4_dspr2.c
-AV1_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/av1_itrans8_dspr2.c
-AV1_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/av1_itrans16_dspr2.c
-endif
-
 # common (msa)
 AV1_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/av1_idct4x4_msa.c
 AV1_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/av1_idct8x8_msa.c
@@ -185,4 +192,14 @@ AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/highbd_convolve_2d_ssse3.c
 endif
 endif
 
+
+ifeq ($(CONFIG_Q_ADAPT_PROBS),yes)
+AV1_COMMON_SRCS-yes += common/token_cdfs.h
+endif
+
+ifeq ($(CONFIG_NCOBMC_ADAPT_WEIGHT),yes)
+AV1_COMMON_SRCS-yes += common/ncobmc_kernels.h
+AV1_COMMON_SRCS-yes += common/ncobmc_kernels.c
+endif
+
 $(eval $(call rtcd_h_template,av1_rtcd,av1/common/av1_rtcd_defs.pl))
diff --git a/third_party/aom/av1/av1_cx.mk b/third_party/aom/av1/av1_cx.mk
index 5c8c9b8f4..13f297403 100644
--- a/third_party/aom/av1/av1_cx.mk
+++ b/third_party/aom/av1/av1_cx.mk
@@ -63,6 +63,7 @@ AV1_CX_SRCS-yes += encoder/lookahead.c
 AV1_CX_SRCS-yes += encoder/lookahead.h
 AV1_CX_SRCS-yes += encoder/mcomp.h
 AV1_CX_SRCS-yes += encoder/encoder.h
+AV1_CX_SRCS-yes += encoder/random.h
 AV1_CX_SRCS-yes += encoder/ratectrl.h
 ifeq ($(CONFIG_XIPHRC),yes)
 AV1_CX_SRCS-yes += encoder/ratectrl_xiph.h
@@ -73,10 +74,9 @@ AV1_CX_SRCS-yes += encoder/tokenize.h
 AV1_CX_SRCS-yes += encoder/treewriter.h
 AV1_CX_SRCS-yes += encoder/mcomp.c
 AV1_CX_SRCS-yes += encoder/encoder.c
-ifeq ($(CONFIG_PALETTE),yes)
+AV1_CX_SRCS-yes += encoder/k_means_template.h
 AV1_CX_SRCS-yes += encoder/palette.h
 AV1_CX_SRCS-yes += encoder/palette.c
-endif
 AV1_CX_SRCS-yes += encoder/picklpf.c
 AV1_CX_SRCS-yes += encoder/picklpf.h
 AV1_CX_SRCS-$(CONFIG_LOOP_RESTORATION) += encoder/pickrst.c
@@ -107,6 +107,14 @@ AV1_CX_SRCS-yes += encoder/temporal_filter.c
 AV1_CX_SRCS-yes += encoder/temporal_filter.h
 AV1_CX_SRCS-yes += encoder/mbgraph.c
 AV1_CX_SRCS-yes += encoder/mbgraph.h
+AV1_CX_SRCS-yes += encoder/hash.c
+AV1_CX_SRCS-yes += encoder/hash.h
+ifeq ($(CONFIG_HASH_ME),yes)
+AV1_CX_SRCS-yes += ../third_party/vector/vector.h
+AV1_CX_SRCS-yes += ../third_party/vector/vector.c
+AV1_CX_SRCS-yes += encoder/hash_motion.c
+AV1_CX_SRCS-yes += encoder/hash_motion.h
+endif
 ifeq ($(CONFIG_CDEF),yes)
 AV1_CX_SRCS-yes += encoder/pickcdef.c
 endif
@@ -138,22 +146,18 @@ AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/av1_quantize_ssse3_x86_64.asm
 endif
 
 AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
-AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
 AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c
 
 AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c
 
 AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
 
-ifeq ($(CONFIG_EXT_INTER),yes)
 AV1_CX_SRCS-yes += encoder/wedge_utils.c
 AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/wedge_utils_sse2.c
-endif
 
 AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/error_intrin_avx2.c
 
 ifneq ($(CONFIG_HIGHBITDEPTH),yes)
-AV1_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c
 AV1_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/error_neon.c
 endif
 AV1_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/quantize_neon.c
diff --git a/third_party/aom/av1/av1_cx_iface.c b/third_party/aom/av1/av1_cx_iface.c
index 81fffd1f2..0f6c1c4d7 100644
--- a/third_party/aom/av1/av1_cx_iface.c
+++ b/third_party/aom/av1/av1_cx_iface.c
@@ -8,7 +8,6 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
-
 #include <stdlib.h>
 #include <string.h>
 
@@ -23,6 +22,9 @@
 #include "av1/encoder/firstpass.h"
 #include "av1/av1_iface_common.h"
 
+#define MAG_SIZE (4)
+#define MAX_INDEX_SIZE (256)
+
 struct av1_extracfg {
   int cpu_used;  // available cpu percentage in 1/16
   unsigned int enable_auto_alt_ref;
@@ -32,8 +34,8 @@ struct av1_extracfg {
   unsigned int noise_sensitivity;
   unsigned int sharpness;
   unsigned int static_thresh;
-  unsigned int tile_columns;
-  unsigned int tile_rows;
+  unsigned int tile_columns;  // log2 number of tile columns
+  unsigned int tile_rows;     // log2 number of tile rows
 #if CONFIG_DEPENDENT_HORZTILES
   unsigned int dependent_horz_tiles;
 #endif
@@ -55,6 +57,9 @@ struct av1_extracfg {
   unsigned int qm_min;
   unsigned int qm_max;
 #endif
+#if CONFIG_DIST_8X8
+  unsigned int enable_dist_8x8;
+#endif
   unsigned int num_tg;
   unsigned int mtu_size;
 #if CONFIG_TEMPMV_SIGNALING
@@ -69,10 +74,8 @@ struct av1_extracfg {
   aom_bit_depth_t bit_depth;
   aom_tune_content content;
   aom_color_space_t color_space;
-#if CONFIG_COLORSPACE_HEADERS
   aom_transfer_function_t transfer_function;
   aom_chroma_sample_position_t chroma_sample_position;
-#endif
   int color_range;
   int render_width;
   int render_height;
@@ -119,6 +122,9 @@ static struct av1_extracfg default_extra_cfg = {
   DEFAULT_QM_FIRST,  // qm_min
   DEFAULT_QM_LAST,   // qm_max
 #endif
+#if CONFIG_DIST_8X8
+  0,
+#endif
   1,  // max number of tile groups
   0,  // mtu_size
 #if CONFIG_TEMPMV_SIGNALING
@@ -129,14 +135,12 @@ static struct av1_extracfg default_extra_cfg = {
 #if CONFIG_EXT_DELTA_Q
   NO_DELTA_Q,  // deltaq_mode
 #endif
-  CONFIG_XIPHRC,        // frame_periodic_delta_q
-  AOM_BITS_8,           // Bit depth
-  AOM_CONTENT_DEFAULT,  // content
-  AOM_CS_UNKNOWN,       // color space
-#if CONFIG_COLORSPACE_HEADERS
-  AOM_TF_UNKNOWN,   // transfer function
-  AOM_CSP_UNKNOWN,  // chroma sample position
-#endif
+  CONFIG_XIPHRC,                // frame_periodic_delta_q
+  AOM_BITS_8,                   // Bit depth
+  AOM_CONTENT_DEFAULT,          // content
+  AOM_CS_UNKNOWN,               // color space
+  AOM_TF_UNKNOWN,               // transfer function
+  AOM_CSP_UNKNOWN,              // chroma sample position
   0,                            // color range
   0,                            // render width
   0,                            // render height
@@ -222,9 +226,9 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
   RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
   RANGE_CHECK_BOOL(extra_cfg, lossless);
-  RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 1);
+  RANGE_CHECK_HI(extra_cfg, aq_mode, AQ_MODE_COUNT - 1);
 #if CONFIG_EXT_DELTA_Q
-  RANGE_CHECK(extra_cfg, deltaq_mode, 0, DELTAQ_MODE_COUNT - 1);
+  RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTAQ_MODE_COUNT - 1);
 #endif
   RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
   RANGE_CHECK_HI(cfg, g_threads, 64);
@@ -246,17 +250,19 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
                 (MAX_LAG_BUFFERS - 1));
   }
 
-  RANGE_CHECK_HI(cfg, rc_resize_mode, RESIZE_DYNAMIC);
-  RANGE_CHECK(cfg, rc_resize_numerator, SCALE_DENOMINATOR / 2,
-              SCALE_DENOMINATOR);
-  RANGE_CHECK(cfg, rc_resize_kf_numerator, SCALE_DENOMINATOR / 2,
-              SCALE_DENOMINATOR);
+  RANGE_CHECK_HI(cfg, rc_resize_mode, RESIZE_MODES - 1);
+  RANGE_CHECK(cfg, rc_resize_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR << 1);
+  RANGE_CHECK(cfg, rc_resize_kf_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR << 1);
 #if CONFIG_FRAME_SUPERRES
-  RANGE_CHECK_HI(cfg, rc_superres_mode, SUPERRES_DYNAMIC);
-  RANGE_CHECK(cfg, rc_superres_numerator, SCALE_DENOMINATOR / 2,
-              SCALE_DENOMINATOR);
-  RANGE_CHECK(cfg, rc_superres_kf_numerator, SCALE_DENOMINATOR / 2,
-              SCALE_DENOMINATOR);
+  RANGE_CHECK_HI(cfg, rc_superres_mode, SUPERRES_MODES - 1);
+  RANGE_CHECK(cfg, rc_superres_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR << 1);
+  RANGE_CHECK(cfg, rc_superres_kf_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR << 1);
+  RANGE_CHECK(cfg, rc_superres_qthresh, 1, 63);
+  RANGE_CHECK(cfg, rc_superres_kf_qthresh, 1, 63);
 #endif  // CONFIG_FRAME_SUPERRES
 
   // AV1 does not support a lower bound on the keyframe interval in
@@ -299,8 +305,13 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
 #endif  // CONFIG_EXT_PARTITION
   } else {
 #endif  // CONFIG_EXT_TILE
+#if CONFIG_MAX_TILE
     RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
-    RANGE_CHECK_HI(extra_cfg, tile_rows, 2);
+    RANGE_CHECK_HI(extra_cfg, tile_rows, 6);
+#else   // CONFIG_MAX_TILE
+  RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
+  RANGE_CHECK_HI(extra_cfg, tile_rows, 2);
+#endif  // CONFIG_MAX_TILE
 #if CONFIG_EXT_TILE
   }
 #endif  // CONFIG_EXT_TILE
@@ -323,6 +334,14 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   if (extra_cfg->tuning == AOM_TUNE_SSIM)
     ERROR("Option --tune=ssim is not currently supported in AV1.");
 
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if CONFIG_PVQ
+  if (extra_cfg->content == AOM_CONTENT_SCREEN)
+    ERROR(
+        "Option --tune-content=screen is not currently supported when PVQ is "
+        "enabled.");
+#endif  // CONFIG_PVQ
+
   if (cfg->g_pass == AOM_RC_LAST_PASS) {
 #if !CONFIG_XIPHRC
     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
@@ -477,7 +496,12 @@ static aom_codec_err_t set_encoder_config(
   oxcf->qm_minlevel = extra_cfg->qm_min;
   oxcf->qm_maxlevel = extra_cfg->qm_max;
 #endif
-
+#if CONFIG_DIST_8X8
+  oxcf->using_dist_8x8 = extra_cfg->enable_dist_8x8;
+  if (extra_cfg->tuning == AOM_TUNE_CDEF_DIST ||
+      extra_cfg->tuning == AOM_TUNE_DAALA_DIST)
+    oxcf->using_dist_8x8 = 1;
+#endif
   oxcf->num_tile_groups = extra_cfg->num_tg;
 #if CONFIG_EXT_TILE
   // In large-scale tile encoding mode, num_tile_groups is always 1.
@@ -492,20 +516,31 @@ static aom_codec_err_t set_encoder_config(
   oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
 
   oxcf->resize_mode = (RESIZE_MODE)cfg->rc_resize_mode;
-  oxcf->resize_scale_numerator = (uint8_t)cfg->rc_resize_numerator;
-  oxcf->resize_kf_scale_numerator = (uint8_t)cfg->rc_resize_kf_numerator;
+  oxcf->resize_scale_denominator = (uint8_t)cfg->rc_resize_denominator;
+  oxcf->resize_kf_scale_denominator = (uint8_t)cfg->rc_resize_kf_denominator;
   if (oxcf->resize_mode == RESIZE_FIXED &&
-      oxcf->resize_scale_numerator == SCALE_DENOMINATOR &&
-      oxcf->resize_kf_scale_numerator == SCALE_DENOMINATOR)
+      oxcf->resize_scale_denominator == SCALE_NUMERATOR &&
+      oxcf->resize_kf_scale_denominator == SCALE_NUMERATOR)
     oxcf->resize_mode = RESIZE_NONE;
 
 #if CONFIG_FRAME_SUPERRES
   oxcf->superres_mode = (SUPERRES_MODE)cfg->rc_superres_mode;
-  oxcf->superres_scale_numerator = (uint8_t)cfg->rc_superres_numerator;
-  oxcf->superres_kf_scale_numerator = (uint8_t)cfg->rc_superres_kf_numerator;
+  oxcf->superres_scale_denominator = (uint8_t)cfg->rc_superres_denominator;
+  oxcf->superres_kf_scale_denominator =
+      (uint8_t)cfg->rc_superres_kf_denominator;
+  oxcf->superres_qthresh =
+      extra_cfg->lossless ? 255
+                          : av1_quantizer_to_qindex(cfg->rc_superres_qthresh);
+  oxcf->superres_kf_qthresh =
+      extra_cfg->lossless
+          ? 255
+          : av1_quantizer_to_qindex(cfg->rc_superres_kf_qthresh);
   if (oxcf->superres_mode == SUPERRES_FIXED &&
-      oxcf->superres_scale_numerator == SCALE_DENOMINATOR &&
-      oxcf->superres_kf_scale_numerator == SCALE_DENOMINATOR)
+      oxcf->superres_scale_denominator == SCALE_NUMERATOR &&
+      oxcf->superres_kf_scale_denominator == SCALE_NUMERATOR)
+    oxcf->superres_mode = SUPERRES_NONE;
+  if (oxcf->superres_mode == SUPERRES_QTHRESH &&
+      oxcf->superres_qthresh == 255 && oxcf->superres_kf_qthresh == 255)
     oxcf->superres_mode = SUPERRES_NONE;
 #endif  // CONFIG_FRAME_SUPERRES
 
@@ -539,10 +574,17 @@ static aom_codec_err_t set_encoder_config(
 #endif
 
   oxcf->color_space = extra_cfg->color_space;
+
 #if CONFIG_COLORSPACE_HEADERS
   oxcf->transfer_function = extra_cfg->transfer_function;
   oxcf->chroma_sample_position = extra_cfg->chroma_sample_position;
+#else
+  if (extra_cfg->transfer_function != AOM_TF_UNKNOWN)
+    return AOM_CODEC_UNSUP_FEATURE;
+  if (extra_cfg->chroma_sample_position != AOM_CSP_UNKNOWN)
+    return AOM_CODEC_UNSUP_FEATURE;
 #endif
+
   oxcf->color_range = extra_cfg->color_range;
   oxcf->render_width = extra_cfg->render_width;
   oxcf->render_height = extra_cfg->render_height;
@@ -588,6 +630,16 @@ static aom_codec_err_t set_encoder_config(
   }
 #endif  // CONFIG_EXT_TILE
 
+#if CONFIG_MAX_TILE
+  oxcf->tile_width_count = AOMMIN(cfg->tile_width_count, MAX_TILE_COLS);
+  oxcf->tile_height_count = AOMMIN(cfg->tile_height_count, MAX_TILE_ROWS);
+  for (int i = 0; i < oxcf->tile_width_count; i++) {
+    oxcf->tile_widths[i] = AOMMAX(cfg->tile_widths[i], 1);
+  }
+  for (int i = 0; i < oxcf->tile_height_count; i++) {
+    oxcf->tile_heights[i] = AOMMAX(cfg->tile_heights[i], 1);
+  }
+#endif
 #if CONFIG_DEPENDENT_HORZTILES
   oxcf->dependent_horz_tiles =
 #if CONFIG_EXT_TILE
@@ -608,39 +660,7 @@ static aom_codec_err_t set_encoder_config(
 #endif
 
   oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost;
-
   oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test;
-  /*
-  printf("Current AV1 Settings: \n");
-  printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
-  printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
-  printf("sharpness: %d\n",    oxcf->sharpness);
-  printf("cpu_used: %d\n",  oxcf->cpu_used);
-  printf("Mode: %d\n",     oxcf->mode);
-  printf("auto_key: %d\n",  oxcf->auto_key);
-  printf("key_freq: %d\n", oxcf->key_freq);
-  printf("end_usage: %d\n", oxcf->end_usage);
-  printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
-  printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
-  printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
-  printf("optimal_buffer_level: %d\n",  oxcf->optimal_buffer_level);
-  printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
-  printf("fixed_q: %d\n",  oxcf->fixed_q);
-  printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
-  printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
-  printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
-  printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
-  printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
-  printf("two_pass_vbrbias: %d\n",  oxcf->two_pass_vbrbias);
-  printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
-  printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
-  printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
-  printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
-  printf("Version: %d\n", oxcf->Version);
-  printf("error resilient: %d\n", oxcf->error_resilient_mode);
-  printf("frame parallel detokenization: %d\n",
-         oxcf->frame_parallel_decoding_mode);
-  */
   return AOM_CODEC_OK;
 }
 
@@ -764,6 +784,7 @@ static aom_codec_err_t ctrl_set_tile_rows(aom_codec_alg_priv_t *ctx,
   extra_cfg.tile_rows = CAST(AV1E_SET_TILE_ROWS, args);
   return update_extra_cfg(ctx, &extra_cfg);
 }
+
 #if CONFIG_DEPENDENT_HORZTILES
 static aom_codec_err_t ctrl_set_tile_dependent_rows(aom_codec_alg_priv_t *ctx,
                                                     va_list args) {
@@ -862,7 +883,14 @@ static aom_codec_err_t ctrl_set_qm_max(aom_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }
 #endif
-
+#if CONFIG_DIST_8X8
+static aom_codec_err_t ctrl_set_enable_dist_8x8(aom_codec_alg_priv_t *ctx,
+                                                va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.enable_dist_8x8 = CAST(AV1E_SET_ENABLE_DIST_8X8, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif
 static aom_codec_err_t ctrl_set_num_tg(aom_codec_alg_priv_t *ctx,
                                        va_list args) {
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1044,7 +1072,7 @@ static int write_superframe_index(aom_codec_alg_priv_t *ctx) {
   // Choose the magnitude
   int mag;
   unsigned int mask;
-  for (mag = 0, mask = 0xff; mag < 4; mag++) {
+  for (mag = 0, mask = 0xff; mag < MAG_SIZE; mag++) {
     if (max_frame_sz <= mask) break;
     mask <<= 8;
     mask |= 0xff;
@@ -1052,7 +1080,7 @@ static int write_superframe_index(aom_codec_alg_priv_t *ctx) {
   marker |= mag << 3;
 
   // Write the index
-  uint8_t buffer[256];
+  uint8_t buffer[MAX_INDEX_SIZE];
   uint8_t *x = buffer;
 
   if (TEST_SUPPLEMENTAL_SUPERFRAME_DATA) {
@@ -1080,6 +1108,7 @@ static int write_superframe_index(aom_codec_alg_priv_t *ctx) {
   *x++ = marker;
 
   const size_t index_sz = x - buffer;
+  assert(index_sz < MAX_INDEX_SIZE);
   assert(ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz);
 
   // move the frame to make room for the index
@@ -1229,36 +1258,46 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
       }
     }
 
-    size_t frame_size;
+    size_t frame_size = 0;
     unsigned int lib_flags = 0;
-    while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+    int is_frame_visible = 0;
+    int index_size = 0;
+    // invisible frames get packed with the next visible frame
+    while (cx_data_sz - index_size >= ctx->cx_data_sz / 2 &&
+           !is_frame_visible &&
            -1 != av1_get_compressed_data(cpi, &lib_flags, &frame_size, cx_data,
                                          &dst_time_stamp, &dst_end_time_stamp,
                                          !img)) {
 #if CONFIG_REFERENCE_BUFFER
-      if (cpi->common.invalid_delta_frame_id_minus1) {
-        ctx->base.err_detail = "Invalid delta_frame_id_minus1";
-        return AOM_CODEC_ERROR;
+      if (cpi->common.seq_params.frame_id_numbers_present_flag) {
+        if (cpi->common.invalid_delta_frame_id_minus1) {
+          ctx->base.err_detail = "Invalid delta_frame_id_minus1";
+          return AOM_CODEC_ERROR;
+        }
       }
-#endif
-      if (!frame_size) continue;
+#endif  // CONFIG_REFERENCE_BUFFER
+      if (frame_size) {
+        if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
 
-      if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
+        ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
+        ctx->pending_cx_data_sz += frame_size;
 
-      ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
-      ctx->pending_cx_data_sz += frame_size;
+        cx_data += frame_size;
+        cx_data_sz -= frame_size;
 
-      cx_data += frame_size;
-      cx_data_sz -= frame_size;
-
-      // invisible frames get packed with the next visible frame
-      if (!cpi->common.show_frame) continue;
+        index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2;
 
+        is_frame_visible = cpi->common.show_frame;
+      }
+    }
+    if (is_frame_visible) {
       // insert superframe index if needed
       if (ctx->pending_frame_count > 1) {
-        const size_t index_size = write_superframe_index(ctx);
-        cx_data += index_size;
-        cx_data_sz -= index_size;
+#if CONFIG_DEBUG
+        assert(index_size >= write_superframe_index(ctx));
+#else
+        write_superframe_index(ctx);
+#endif
       }
 
       // Add the frame packet to the list of returned packets.
@@ -1294,14 +1333,13 @@ static const aom_codec_cx_pkt_t *encoder_get_cxdata(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
                                           va_list args) {
-  aom_ref_frame_t *const frame = va_arg(args, aom_ref_frame_t *);
+  av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
-    av1_set_reference_enc(ctx->cpi, ref_frame_to_av1_reframe(frame->frame_type),
-                          &sd);
+    av1_set_reference_enc(ctx->cpi, frame->idx, &sd);
     return AOM_CODEC_OK;
   } else {
     return AOM_CODEC_INVALID_PARAM;
@@ -1310,14 +1348,13 @@ static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
                                            va_list args) {
-  aom_ref_frame_t *const frame = va_arg(args, aom_ref_frame_t *);
+  av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
-    av1_copy_reference_enc(ctx->cpi,
-                           ref_frame_to_av1_reframe(frame->frame_type), &sd);
+    av1_copy_reference_enc(ctx->cpi, frame->idx, &sd);
     return AOM_CODEC_OK;
   } else {
     return AOM_CODEC_INVALID_PARAM;
@@ -1450,22 +1487,32 @@ static aom_codec_err_t ctrl_set_color_space(aom_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
-#if CONFIG_COLORSPACE_HEADERS
 static aom_codec_err_t ctrl_set_transfer_function(aom_codec_alg_priv_t *ctx,
                                                   va_list args) {
+#if CONFIG_COLORSPACE_HEADERS
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
   extra_cfg.transfer_function = CAST(AV1E_SET_TRANSFER_FUNCTION, args);
   return update_extra_cfg(ctx, &extra_cfg);
+#else
+  (void)ctx;
+  (void)args;
+  return AOM_CODEC_UNSUP_FEATURE;
+#endif
 }
 
 static aom_codec_err_t ctrl_set_chroma_sample_position(
     aom_codec_alg_priv_t *ctx, va_list args) {
+#if CONFIG_COLORSPACE_HEADERS
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
   extra_cfg.chroma_sample_position =
       CAST(AV1E_SET_CHROMA_SAMPLE_POSITION, args);
   return update_extra_cfg(ctx, &extra_cfg);
-}
+#else
+  (void)ctx;
+  (void)args;
+  return AOM_CODEC_UNSUP_FEATURE;
 #endif
+}
 
 static aom_codec_err_t ctrl_set_color_range(aom_codec_alg_priv_t *ctx,
                                             va_list args) {
@@ -1500,11 +1547,11 @@ static aom_codec_err_t ctrl_set_ans_window_size_log2(aom_codec_alg_priv_t *ctx,
 #endif
 
 static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
-  { AOM_COPY_REFERENCE, ctrl_copy_reference },
+  { AV1_COPY_REFERENCE, ctrl_copy_reference },
   { AOME_USE_REFERENCE, ctrl_use_reference },
 
   // Setters
-  { AOM_SET_REFERENCE, ctrl_set_reference },
+  { AV1_SET_REFERENCE, ctrl_set_reference },
   { AOM_SET_POSTPROC, ctrl_set_previewpp },
   { AOME_SET_ROI_MAP, ctrl_set_roi_map },
   { AOME_SET_ACTIVEMAP, ctrl_set_active_map },
@@ -1537,6 +1584,9 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1E_SET_QM_MIN, ctrl_set_qm_min },
   { AV1E_SET_QM_MAX, ctrl_set_qm_max },
 #endif
+#if CONFIG_DIST_8X8
+  { AV1E_SET_ENABLE_DIST_8X8, ctrl_set_enable_dist_8x8 },
+#endif
   { AV1E_SET_NUM_TG, ctrl_set_num_tg },
   { AV1E_SET_MTU, ctrl_set_mtu },
 #if CONFIG_TEMPMV_SIGNALING
@@ -1550,10 +1600,8 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
   { AV1E_SET_TUNE_CONTENT, ctrl_set_tune_content },
   { AV1E_SET_COLOR_SPACE, ctrl_set_color_space },
-#if CONFIG_COLORSPACE_HEADERS
   { AV1E_SET_TRANSFER_FUNCTION, ctrl_set_transfer_function },
   { AV1E_SET_CHROMA_SAMPLE_POSITION, ctrl_set_chroma_sample_position },
-#endif
   { AV1E_SET_COLOR_RANGE, ctrl_set_color_range },
   { AV1E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity },
   { AV1E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval },
@@ -1597,16 +1645,18 @@ static aom_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
 
         AOM_RC_ONE_PASS,  // g_pass
 
-        25,  // g_lag_in_frames
+        17,  // g_lag_in_frames
 
-        0,                  // rc_dropframe_thresh
-        RESIZE_NONE,        // rc_resize_mode
-        SCALE_DENOMINATOR,  // rc_resize_numerator
-        SCALE_DENOMINATOR,  // rc_resize_kf_numerator
+        0,                // rc_dropframe_thresh
+        RESIZE_NONE,      // rc_resize_mode
+        SCALE_NUMERATOR,  // rc_resize_denominator
+        SCALE_NUMERATOR,  // rc_resize_kf_denominator
 
-        0,                  // rc_superres_mode
-        SCALE_DENOMINATOR,  // rc_superres_numerator
-        SCALE_DENOMINATOR,  // rc_superres_kf_numerator
+        0,                // rc_superres_mode
+        SCALE_NUMERATOR,  // rc_superres_denominator
+        SCALE_NUMERATOR,  // rc_superres_kf_denominator
+        63,               // rc_superres_qthresh
+        63,               // rc_superres_kf_qthresh
 
         AOM_VBR,      // rc_end_usage
         { NULL, 0 },  // rc_twopass_stats_in
@@ -1630,6 +1680,10 @@ static aom_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
         0,            // kf_min_dist
         9999,         // kf_max_dist
         0,            // large_scale_tile
+        0,            // tile_width_count
+        0,            // tile_height_count
+        { 0 },        // tile_widths
+        { 0 },        // tile_heights
     } },
 };
 
diff --git a/third_party/aom/av1/av1_dx.mk b/third_party/aom/av1/av1_dx.mk
index 1a54ea22a..6f113c3c6 100644
--- a/third_party/aom/av1/av1_dx.mk
+++ b/third_party/aom/av1/av1_dx.mk
@@ -32,6 +32,7 @@ AV1_DX_SRCS-yes += decoder/decoder.c
 AV1_DX_SRCS-yes += decoder/decoder.h
 AV1_DX_SRCS-yes += decoder/dsubexp.c
 AV1_DX_SRCS-yes += decoder/dsubexp.h
+AV1_DX_SRCS-yes += decoder/symbolrate.h
 
 ifeq ($(CONFIG_ACCOUNTING),yes)
 AV1_DX_SRCS-yes += decoder/accounting.h
@@ -56,11 +57,6 @@ AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.h
 AV1_DX_SRCS-yes += encoder/dct.c
 AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
-AV1_DX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
-
-ifneq ($(CONFIG_HIGHBITDEPTH),yes)
-AV1_DX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c
-endif
 
 AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct4x4_msa.c
 AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct8x8_msa.c
diff --git a/third_party/aom/av1/av1_dx_iface.c b/third_party/aom/av1/av1_dx_iface.c
index df2c81dc9..c2f433d38 100644
--- a/third_party/aom/av1/av1_dx_iface.c
+++ b/third_party/aom/av1/av1_dx_iface.c
@@ -153,6 +153,7 @@ static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
   return AOM_CODEC_OK;
 }
 
+#if !CONFIG_OBU
 static int parse_bitdepth_colorspace_sampling(BITSTREAM_PROFILE profile,
                                               struct aom_read_bit_buffer *rb) {
   aom_color_space_t color_space;
@@ -200,6 +201,7 @@ static int parse_bitdepth_colorspace_sampling(BITSTREAM_PROFILE profile,
   }
   return 1;
 }
+#endif
 
 static aom_codec_err_t decoder_peek_si_internal(
     const uint8_t *data, unsigned int data_sz, aom_codec_stream_info_t *si,
@@ -229,9 +231,18 @@ static aom_codec_err_t decoder_peek_si_internal(
 
     data += index_size;
     data_sz -= index_size;
+#if CONFIG_OBU
+    if (data + data_sz <= data) return AOM_CODEC_INVALID_PARAM;
+#endif
   }
 
   {
+#if CONFIG_OBU
+    // Proper fix needed
+    si->is_kf = 1;
+    intra_only_flag = 1;
+    si->h = 1;
+#else
     int show_frame;
     int error_resilient;
     struct aom_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
@@ -261,35 +272,35 @@ static aom_codec_err_t decoder_peek_si_internal(
 
     si->is_kf = !aom_rb_read_bit(&rb);
     show_frame = aom_rb_read_bit(&rb);
+    if (!si->is_kf) {
+      if (!show_frame) intra_only_flag = show_frame ? 0 : aom_rb_read_bit(&rb);
+    }
     error_resilient = aom_rb_read_bit(&rb);
 #if CONFIG_REFERENCE_BUFFER
-    {
+    SequenceHeader seq_params = { 0, 0, 0 };
+    if (si->is_kf) {
       /* TODO: Move outside frame loop or inside key-frame branch */
-      int frame_id_len;
-      SequenceHeader seq_params;
-      read_sequence_header(&seq_params);
+      read_sequence_header(&seq_params, &rb);
 #if CONFIG_EXT_TILE
       if (large_scale_tile) seq_params.frame_id_numbers_present_flag = 0;
 #endif  // CONFIG_EXT_TILE
-      if (seq_params.frame_id_numbers_present_flag) {
-        frame_id_len = seq_params.frame_id_length_minus7 + 7;
-        aom_rb_read_literal(&rb, frame_id_len);
-      }
     }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
+#if CONFIG_REFERENCE_BUFFER
+    if (seq_params.frame_id_numbers_present_flag) {
+      int frame_id_len;
+      frame_id_len = seq_params.frame_id_length_minus7 + 7;
+      aom_rb_read_literal(&rb, frame_id_len);
+    }
+#endif  // CONFIG_REFERENCE_BUFFER
     if (si->is_kf) {
-      if (!av1_read_sync_code(&rb)) return AOM_CODEC_UNSUP_BITSTREAM;
-
       if (!parse_bitdepth_colorspace_sampling(profile, &rb))
         return AOM_CODEC_UNSUP_BITSTREAM;
       av1_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
     } else {
-      intra_only_flag = show_frame ? 0 : aom_rb_read_bit(&rb);
-
       rb.bit_offset += error_resilient ? 0 : 2;  // reset_frame_context
 
       if (intra_only_flag) {
-        if (!av1_read_sync_code(&rb)) return AOM_CODEC_UNSUP_BITSTREAM;
         if (profile > PROFILE_0) {
           if (!parse_bitdepth_colorspace_sampling(profile, &rb))
             return AOM_CODEC_UNSUP_BITSTREAM;
@@ -298,6 +309,7 @@ static aom_codec_err_t decoder_peek_si_internal(
         av1_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
       }
     }
+#endif  // CONFIG_OBU
   }
   if (is_intra_only != NULL) *is_intra_only = intra_only_flag;
   return AOM_CODEC_OK;
@@ -876,7 +888,7 @@ static aom_codec_err_t decoder_set_fb_fn(
 
 static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
                                           va_list args) {
-  aom_ref_frame_t *const data = va_arg(args, aom_ref_frame_t *);
+  av1_ref_frame_t *const data = va_arg(args, av1_ref_frame_t *);
 
   // Only support this function in serial decode.
   if (ctx->frame_parallel_decode) {
@@ -885,13 +897,12 @@ static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
   }
 
   if (data) {
-    aom_ref_frame_t *const frame = (aom_ref_frame_t *)data;
+    av1_ref_frame_t *const frame = data;
     YV12_BUFFER_CONFIG sd;
     AVxWorker *const worker = ctx->frame_workers;
     FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
     image2yuvconfig(&frame->img, &sd);
-    return av1_set_reference_dec(&frame_worker_data->pbi->common,
-                                 ref_frame_to_av1_reframe(frame->frame_type),
+    return av1_set_reference_dec(&frame_worker_data->pbi->common, frame->idx,
                                  &sd);
   } else {
     return AOM_CODEC_INVALID_PARAM;
@@ -900,7 +911,7 @@ static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
                                            va_list args) {
-  const aom_ref_frame_t *const frame = va_arg(args, aom_ref_frame_t *);
+  const av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
 
   // Only support this function in serial decode.
   if (ctx->frame_parallel_decode) {
@@ -913,8 +924,7 @@ static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
     AVxWorker *const worker = ctx->frame_workers;
     FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
     image2yuvconfig(&frame->img, &sd);
-    return av1_copy_reference_dec(frame_worker_data->pbi,
-                                  (AOM_REFFRAME)frame->frame_type, &sd);
+    return av1_copy_reference_dec(frame_worker_data->pbi, frame->idx, &sd);
   } else {
     return AOM_CODEC_INVALID_PARAM;
   }
@@ -1209,10 +1219,10 @@ static aom_codec_err_t ctrl_set_inspection_callback(aom_codec_alg_priv_t *ctx,
 }
 
 static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
-  { AOM_COPY_REFERENCE, ctrl_copy_reference },
+  { AV1_COPY_REFERENCE, ctrl_copy_reference },
 
   // Setters
-  { AOM_SET_REFERENCE, ctrl_set_reference },
+  { AV1_SET_REFERENCE, ctrl_set_reference },
   { AOM_SET_POSTPROC, ctrl_set_postproc },
   { AOM_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options },
   { AOM_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options },
diff --git a/third_party/aom/av1/av1_iface_common.h b/third_party/aom/av1/av1_iface_common.h
index f0260cafe..6c9a2a6cb 100644
--- a/third_party/aom/av1/av1_iface_common.h
+++ b/third_party/aom/av1/av1_iface_common.h
@@ -142,13 +142,4 @@ static aom_codec_err_t image2yuvconfig(const aom_image_t *img,
   return AOM_CODEC_OK;
 }
 
-static AOM_REFFRAME ref_frame_to_av1_reframe(aom_ref_frame_type_t frame) {
-  switch (frame) {
-    case AOM_LAST_FRAME: return AOM_LAST_FLAG;
-    case AOM_GOLD_FRAME: return AOM_GOLD_FLAG;
-    case AOM_ALTR_FRAME: return AOM_ALT_FLAG;
-  }
-  assert(0 && "Invalid Reference Frame");
-  return AOM_LAST_FLAG;
-}
 #endif  // AV1_AV1_IFACE_COMMON_H_
diff --git a/third_party/aom/av1/common/alloccommon.c b/third_party/aom/av1/common/alloccommon.c
index c37f1ea50..fd635686f 100644
--- a/third_party/aom/av1/common/alloccommon.c
+++ b/third_party/aom/av1/common/alloccommon.c
@@ -19,9 +19,28 @@
 #include "av1/common/entropymv.h"
 #include "av1/common/onyxc_int.h"
 
+int av1_get_MBs(int width, int height) {
+  const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
+  const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
+  const int mi_cols = aligned_width >> MI_SIZE_LOG2;
+  const int mi_rows = aligned_height >> MI_SIZE_LOG2;
+
+#if CONFIG_CB4X4
+  const int mb_cols = (mi_cols + 2) >> 2;
+  const int mb_rows = (mi_rows + 2) >> 2;
+#else
+  const int mb_cols = (mi_cols + 1) >> 1;
+  const int mb_rows = (mi_rows + 1) >> 1;
+#endif
+  return mb_rows * mb_cols;
+}
+
 void av1_set_mb_mi(AV1_COMMON *cm, int width, int height) {
-  // TODO(jingning): Fine tune the loop filter operations and bring this
-  // back to integer multiple of 4 for cb4x4.
+  // Ensure that the decoded width and height are both multiples of
+  // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
+  // subsampling is used).
+  // This simplifies the implementation of various experiments,
+  // eg. cdef, which operates on units of 8x8 luma pixels.
   const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
   const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
 
@@ -72,6 +91,36 @@ static void free_seg_map(AV1_COMMON *cm) {
   if (!cm->frame_parallel_decode) {
     cm->last_frame_seg_map = NULL;
   }
+  cm->seg_map_alloc_size = 0;
+}
+
+static void free_scratch_buffers(AV1_COMMON *cm) {
+  (void)cm;
+#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
+  for (int i = 0; i < 4; ++i) {
+    if (cm->ncobmcaw_buf[i]) {
+      aom_free(cm->ncobmcaw_buf[i]);
+      cm->ncobmcaw_buf[i] = NULL;
+    }
+  }
+#endif  // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
+}
+
+static int alloc_scratch_buffers(AV1_COMMON *cm) {
+  (void)cm;
+#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
+  // If not allocated already, allocate
+  if (!cm->ncobmcaw_buf[0] && !cm->ncobmcaw_buf[1] && !cm->ncobmcaw_buf[2] &&
+      !cm->ncobmcaw_buf[3]) {
+    for (int i = 0; i < 4; ++i) {
+      CHECK_MEM_ERROR(
+          cm, cm->ncobmcaw_buf[i],
+          (uint8_t *)aom_memalign(
+              16, (1 + CONFIG_HIGHBITDEPTH) * MAX_MB_PLANE * MAX_SB_SQUARE));
+    }
+  }
+#endif  // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
+  return 0;
 }
 
 void av1_free_ref_frame_buffers(BufferPool *pool) {
@@ -85,7 +134,14 @@ void av1_free_ref_frame_buffers(BufferPool *pool) {
     }
     aom_free(pool->frame_bufs[i].mvs);
     pool->frame_bufs[i].mvs = NULL;
+#if CONFIG_MFMV
+    aom_free(pool->frame_bufs[i].tpl_mvs);
+    pool->frame_bufs[i].tpl_mvs = NULL;
+#endif
     aom_free_frame_buffer(&pool->frame_bufs[i].buf);
+#if CONFIG_HASH_ME
+    av1_hash_table_destroy(&pool->frame_bufs[i].hash_table);
+#endif
   }
 }
 
@@ -108,6 +164,33 @@ void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
   aom_free(cm->rst_internal.tmpbuf);
   CHECK_MEM_ERROR(cm, cm->rst_internal.tmpbuf,
                   (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  // Allocate internal storage for the loop restoration stripe boundary lines
+  for (p = 0; p < MAX_MB_PLANE; ++p) {
+    int w = p == 0 ? width : ROUND_POWER_OF_TWO(width, cm->subsampling_x);
+    int align_bits = 5;  // align for efficiency
+    int stride = ALIGN_POWER_OF_TWO(w, align_bits);
+    int num_stripes = (height + 63) / 64;
+    // for each processing stripe: 2 lines above, 2 below
+    int buf_size = num_stripes * 2 * stride;
+    uint8_t *above_buf, *below_buf;
+
+    aom_free(cm->rst_internal.stripe_boundary_above[p]);
+    aom_free(cm->rst_internal.stripe_boundary_below[p]);
+
+#if CONFIG_HIGHBITDEPTH
+    if (cm->use_highbitdepth) buf_size = buf_size * 2;
+#endif
+    CHECK_MEM_ERROR(cm, above_buf,
+                    (uint8_t *)aom_memalign(1 << align_bits, buf_size));
+    CHECK_MEM_ERROR(cm, below_buf,
+                    (uint8_t *)aom_memalign(1 << align_bits, buf_size));
+    cm->rst_internal.stripe_boundary_above[p] = above_buf;
+    cm->rst_internal.stripe_boundary_below[p] = below_buf;
+    cm->rst_internal.stripe_boundary_stride[p] = stride;
+  }
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 }
 
 void av1_free_restoration_buffers(AV1_COMMON *cm) {
@@ -123,12 +206,14 @@ void av1_free_context_buffers(AV1_COMMON *cm) {
   int i;
   cm->free_mi(cm);
   free_seg_map(cm);
+  free_scratch_buffers(cm);
   for (i = 0; i < MAX_MB_PLANE; i++) {
     aom_free(cm->above_context[i]);
     cm->above_context[i] = NULL;
   }
   aom_free(cm->above_seg_context);
   cm->above_seg_context = NULL;
+  cm->above_context_alloc_cols = 0;
 #if CONFIG_VAR_TX
   aom_free(cm->above_txfm_context);
   cm->above_txfm_context = NULL;
@@ -155,6 +240,7 @@ int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
     free_seg_map(cm);
     if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
   }
+  if (alloc_scratch_buffers(cm)) goto fail;
 
   if (cm->above_context_alloc_cols < cm->mi_cols) {
     // TODO(geza.lore): These are bigger than they need to be.
diff --git a/third_party/aom/av1/common/alloccommon.h b/third_party/aom/av1/common/alloccommon.h
index 51863cd04..0d420f825 100644
--- a/third_party/aom/av1/common/alloccommon.h
+++ b/third_party/aom/av1/common/alloccommon.h
@@ -37,6 +37,7 @@ int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height);
 void av1_free_state_buffers(struct AV1Common *cm);
 
 void av1_set_mb_mi(struct AV1Common *cm, int width, int height);
+int av1_get_MBs(int width, int height);
 
 void av1_swap_current_and_last_seg_map(struct AV1Common *cm);
 
diff --git a/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c b/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c
index 68184c510..b29228e43 100644
--- a/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c
+++ b/third_party/aom/av1/common/arm/neon/iht4x4_add_neon.c
@@ -148,13 +148,13 @@ void av1_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
 
   TRANSPOSE4X4(&q8s16, &q9s16);
 
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
-    case 0:  // idct_idct is not supported. Fall back to C
+    case DCT_DCT:  // idct_idct is not supported. Fall back to C
       av1_iht4x4_16_add_c(input, dest, dest_stride, txfm_param);
       return;
       break;
-    case 1:  // iadst_idct
+    case ADST_DCT:  // iadst_idct
       // generate constants
       GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
       GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
@@ -168,7 +168,7 @@ void av1_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
       // then transform columns
       IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
       break;
-    case 2:  // idct_iadst
+    case DCT_ADST:  // idct_iadst
       // generate constantsyy
       GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
       GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
@@ -182,7 +182,7 @@ void av1_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
       // then transform columns
       IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
       break;
-    case 3:  // iadst_iadst
+    case ADST_ADST:  // iadst_iadst
       // generate constants
       GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
 
diff --git a/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c b/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c
index a98449589..4cd43a99d 100644
--- a/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c
+++ b/third_party/aom/av1/common/arm/neon/iht8x8_add_neon.c
@@ -478,13 +478,13 @@ void av1_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
   TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
                &q15s16);
 
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
-    case 0:  // idct_idct is not supported. Fall back to C
+    case DCT_DCT:  // idct_idct is not supported. Fall back to C
       av1_iht8x8_64_add_c(input, dest, dest_stride, txfm_param);
       return;
       break;
-    case 1:  // iadst_idct
+    case ADST_DCT:  // iadst_idct
       // generate IDCT constants
       // GENERATE_IDCT_CONSTANTS
 
@@ -503,7 +503,7 @@ void av1_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
       IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
                   &q15s16);
       break;
-    case 2:  // idct_iadst
+    case DCT_ADST:  // idct_iadst
       // generate IADST constants
       // GENERATE_IADST_CONSTANTS
 
@@ -522,7 +522,7 @@ void av1_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
       IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
                  &q15s16);
       break;
-    case 3:  // iadst_iadst
+    case ADST_ADST:  // iadst_iadst
       // generate IADST constants
       // GENERATE_IADST_CONSTANTS
 
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d.c b/third_party/aom/av1/common/av1_fwd_txfm1d.c
index cfe274185..c9c7f437e 100644
--- a/third_party/aom/av1/common/av1_fwd_txfm1d.c
+++ b/third_party/aom/av1/common/av1_fwd_txfm1d.c
@@ -1547,6 +1547,16 @@ void av1_fidentity32_c(const int32_t *input, int32_t *output,
   for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
   range_check(0, input, output, 32, stage_range[0]);
 }
+
+#if CONFIG_TX64X64
+void av1_fidentity64_c(const int32_t *input, int32_t *output,
+                       const int8_t *cos_bit, const int8_t *stage_range) {
+  (void)cos_bit;
+  for (int i = 0; i < 64; ++i)
+    output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+  range_check(0, input, output, 64, stage_range[0]);
+}
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d.h b/third_party/aom/av1/common/av1_fwd_txfm1d.h
index f6419303a..f880239f7 100644
--- a/third_party/aom/av1/common/av1_fwd_txfm1d.h
+++ b/third_party/aom/av1/common/av1_fwd_txfm1d.h
@@ -26,8 +26,10 @@ void av1_fdct16_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
 void av1_fdct32_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
 void av1_fdct64_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
+#endif  // CONFIG_TX64X64
 
 void av1_fadst4_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
@@ -46,6 +48,10 @@ void av1_fidentity16_c(const int32_t *input, int32_t *output,
                        const int8_t *cos_bit, const int8_t *stage_range);
 void av1_fidentity32_c(const int32_t *input, int32_t *output,
                        const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
+void av1_fidentity64_c(const int32_t *input, int32_t *output,
+                       const int8_t *cos_bit, const int8_t *stage_range);
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
 
 #ifdef __cplusplus
diff --git a/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h b/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h
index 60026da21..f2ed93151 100644
--- a/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h
+++ b/third_party/aom/av1/common/av1_fwd_txfm1d_cfg.h
@@ -19,11 +19,11 @@
 static const int8_t fwd_shift_4[3] = { 2, 0, 0 };
 
 // stage range
-static const int8_t fwd_stage_range_col_dct_4[4] = { 15, 16, 17, 17 };
-static const int8_t fwd_stage_range_row_dct_4[4] = { 17, 18, 18, 18 };
-static const int8_t fwd_stage_range_col_adst_4[6] = { 15, 15, 16, 17, 17, 17 };
-static const int8_t fwd_stage_range_row_adst_4[6] = { 17, 17, 17, 18, 18, 18 };
-static const int8_t fwd_stage_range_idx_4[1] = { 18 };
+static const int8_t fwd_stage_range_col_dct_4[4] = { 0, 1, 2, 2 };
+static const int8_t fwd_stage_range_row_dct_4[4] = { 2, 3, 3, 3 };
+static const int8_t fwd_stage_range_col_adst_4[6] = { 0, 0, 1, 2, 2, 2 };
+static const int8_t fwd_stage_range_row_adst_4[6] = { 2, 2, 2, 3, 3, 3 };
+static const int8_t fwd_stage_range_idx_4[1] = { 0 };
 
 // cos bit
 static const int8_t fwd_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
@@ -36,13 +36,11 @@ static const int8_t fwd_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
 static const int8_t fwd_shift_8[3] = { 2, -1, 0 };
 
 // stage range
-static const int8_t fwd_stage_range_col_dct_8[6] = { 15, 16, 17, 18, 18, 18 };
-static const int8_t fwd_stage_range_row_dct_8[6] = { 17, 18, 19, 19, 19, 19 };
-static const int8_t fwd_stage_range_col_adst_8[8] = { 15, 15, 16, 17,
-                                                      17, 18, 18, 18 };
-static const int8_t fwd_stage_range_row_adst_8[8] = { 17, 17, 17, 18,
-                                                      18, 19, 19, 19 };
-static const int8_t fwd_stage_range_idx_8[1] = { 19 };
+static const int8_t fwd_stage_range_col_dct_8[6] = { 0, 1, 2, 3, 3, 3 };
+static const int8_t fwd_stage_range_row_dct_8[6] = { 3, 4, 5, 5, 5, 5 };
+static const int8_t fwd_stage_range_col_adst_8[8] = { 0, 0, 1, 2, 2, 3, 3, 3 };
+static const int8_t fwd_stage_range_row_adst_8[8] = { 3, 3, 3, 4, 4, 5, 5, 5 };
+static const int8_t fwd_stage_range_idx_8[1] = { 0 };
 
 // cos bit
 static const int8_t fwd_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
@@ -59,15 +57,14 @@ static const int8_t fwd_cos_bit_row_adst_8[8] = {
 static const int8_t fwd_shift_16[3] = { 2, -2, 0 };
 
 // stage range
-static const int8_t fwd_stage_range_col_dct_16[8] = { 15, 16, 17, 18,
-                                                      19, 19, 19, 19 };
-static const int8_t fwd_stage_range_row_dct_16[8] = { 17, 18, 19, 20,
-                                                      20, 20, 20, 20 };
-static const int8_t fwd_stage_range_col_adst_16[10] = { 15, 15, 16, 17, 17,
-                                                        18, 18, 19, 19, 19 };
-static const int8_t fwd_stage_range_row_adst_16[10] = { 17, 17, 17, 18, 18,
-                                                        19, 19, 20, 20, 20 };
-static const int8_t fwd_stage_range_idx_16[1] = { 20 };
+static const int8_t fwd_stage_range_col_dct_16[8] = { 0, 1, 2, 3, 4, 4, 4, 4 };
+static const int8_t fwd_stage_range_row_dct_16[8] = { 4, 5, 6, 7, 7, 7, 7, 7 };
+static const int8_t fwd_stage_range_col_adst_16[10] = { 0, 0, 1, 2, 2,
+                                                        3, 3, 4, 4, 4 };
+static const int8_t fwd_stage_range_row_adst_16[10] = {
+  4, 4, 4, 5, 5, 6, 6, 7, 7, 7,
+};
+static const int8_t fwd_stage_range_idx_16[1] = { 0 };
 
 // cos bit
 static const int8_t fwd_cos_bit_col_dct_16[8] = {
@@ -86,17 +83,15 @@ static const int8_t fwd_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
 static const int8_t fwd_shift_32[3] = { 2, -4, 0 };
 
 // stage range
-static const int8_t fwd_stage_range_col_dct_32[10] = { 15, 16, 17, 18, 19,
-                                                       20, 20, 20, 20, 20 };
-static const int8_t fwd_stage_range_row_dct_32[10] = { 16, 17, 18, 19, 20,
-                                                       20, 20, 20, 20, 20 };
-static const int8_t fwd_stage_range_col_adst_32[12] = {
-  15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_stage_range_row_adst_32[12] = {
-  16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
-};
-static const int8_t fwd_stage_range_idx_32[1] = { 20 };
+static const int8_t fwd_stage_range_col_dct_32[10] = { 0, 1, 2, 3, 4,
+                                                       5, 5, 5, 5, 5 };
+static const int8_t fwd_stage_range_row_dct_32[10] = { 5, 6, 7, 8, 9,
+                                                       9, 9, 9, 9, 9 };
+static const int8_t fwd_stage_range_col_adst_32[12] = { 0, 0, 1, 2, 2, 3,
+                                                        3, 4, 4, 5, 5, 5 };
+static const int8_t fwd_stage_range_row_adst_32[12] = { 5, 5, 5, 6, 6, 7,
+                                                        7, 8, 8, 9, 9, 9 };
+static const int8_t fwd_stage_range_idx_32[1] = { 0 };
 
 // cos bit
 static const int8_t fwd_cos_bit_col_dct_32[10] = { 12, 12, 12, 12, 12,
@@ -113,11 +108,11 @@ static const int8_t fwd_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
 static const int8_t fwd_shift_64[3] = { 0, -2, -2 };
 
 // stage range
-static const int8_t fwd_stage_range_col_dct_64[12] = { 13, 14, 15, 16, 17, 18,
-                                                       19, 19, 19, 19, 19, 19 };
-static const int8_t fwd_stage_range_row_dct_64[12] = { 17, 18, 19, 20, 21, 22,
-                                                       22, 22, 22, 22, 22, 22 };
-static const int8_t fwd_stage_range_idx_64[1] = { 22 };
+static const int8_t fwd_stage_range_col_dct_64[12] = { 0, 1, 2, 3, 4, 5,
+                                                       6, 6, 6, 6, 6, 6 };
+static const int8_t fwd_stage_range_row_dct_64[12] = { 6,  7,  8,  9,  10, 11,
+                                                       11, 11, 11, 11, 11, 11 };
+static const int8_t fwd_stage_range_idx_64[1] = { 0 };
 
 // cos bit
 static const int8_t fwd_cos_bit_col_dct_64[12] = { 15, 15, 15, 15, 15, 14,
diff --git a/third_party/aom/av1/common/av1_fwd_txfm2d.c b/third_party/aom/av1/common/av1_fwd_txfm2d.c
index c124f3af7..740c63322 100644
--- a/third_party/aom/av1/common/av1_fwd_txfm2d.c
+++ b/third_party/aom/av1/common/av1_fwd_txfm2d.c
@@ -24,6 +24,9 @@ static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
     case TXFM_TYPE_DCT8: return av1_fdct8_new;
     case TXFM_TYPE_DCT16: return av1_fdct16_new;
     case TXFM_TYPE_DCT32: return av1_fdct32_new;
+#if CONFIG_TX64X64
+    case TXFM_TYPE_DCT64: return av1_fdct64_new;
+#endif  // CONFIG_TX64X64
     case TXFM_TYPE_ADST4: return av1_fadst4_new;
     case TXFM_TYPE_ADST8: return av1_fadst8_new;
     case TXFM_TYPE_ADST16: return av1_fadst16_new;
@@ -33,14 +36,42 @@ static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
     case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
     case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
     case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
+#if CONFIG_TX64X64
+    case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
     default: assert(0); return NULL;
   }
 }
 
+void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
+                             const TXFM_2D_FLIP_CFG *cfg, int bd) {
+  // Note when assigning txfm_size_col, we use the txfm_size from the
+  // row configuration and vice versa. This is intentionally done to
+  // accurately perform rectangular transforms. When the transform is
+  // rectangular, the number of columns will be the same as the
+  // txfm_size stored in the row cfg struct. It will make no difference
+  // for square transforms.
+  const int txfm_size_col = cfg->row_cfg->txfm_size;
+  const int txfm_size_row = cfg->col_cfg->txfm_size;
+  // Take the shift from the larger dimension in the rectangular case.
+  const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
+                                                        : cfg->col_cfg->shift;
+  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
+  for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
+    stage_range_col[i] = cfg->col_cfg->stage_range[i] + shift[0] + bd + 1;
+  }
+
+  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
+  for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
+    stage_range_row[i] =
+        cfg->row_cfg->stage_range[i] + shift[0] + shift[1] + bd + 1;
+  }
+}
+
 static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
                                 const int stride, const TXFM_2D_FLIP_CFG *cfg,
-                                int32_t *buf) {
+                                int32_t *buf, int bd) {
   int c, r;
   // Note when assigning txfm_size_col, we use the txfm_size from the
   // row configuration and vice versa. This is intentionally done to
@@ -53,8 +84,12 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
   // Take the shift from the larger dimension in the rectangular case.
   const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
                                                         : cfg->col_cfg->shift;
-  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
-  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
+  int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+  int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+  assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
+  assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
+  av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);
+
   const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
   const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
   const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
@@ -108,93 +143,146 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
 }
 
 void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd) {
+                          TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int32_t txfm_buf[4 * 8];
+  int16_t rinput[4 * 8];
+  TX_SIZE tx_size = TX_4X8;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int16(rinput, rw, input, stride, w, h);
+  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
+  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
+  transpose_int32(output, w, txfm_buf, rw, rw, rh);
+#else
   int32_t txfm_buf[4 * 8];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X8);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
+#endif
 }
 
 void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd) {
+                          TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[8 * 4];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X4);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
-                           int tx_type, int bd) {
+                           TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int32_t txfm_buf[8 * 16];
+  int16_t rinput[8 * 16];
+  TX_SIZE tx_size = TX_8X16;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int16(rinput, rw, input, stride, w, h);
+  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
+  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
+  transpose_int32(output, w, txfm_buf, rw, rw, rh);
+#else
   int32_t txfm_buf[8 * 16];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X16);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
+#endif
 }
 
 void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
-                           int tx_type, int bd) {
+                           TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[16 * 8];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X8);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
-                            int tx_type, int bd) {
+                            TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int32_t txfm_buf[16 * 32];
+  int16_t rinput[16 * 32];
+  TX_SIZE tx_size = TX_16X32;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int16(rinput, rw, input, stride, w, h);
+  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
+  fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
+  transpose_int32(output, w, txfm_buf, rw, rw, rh);
+#else
   int32_t txfm_buf[16 * 32];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X32);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
+#endif
 }
 
 void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
-                            int tx_type, int bd) {
+                            TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[32 * 16];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X16);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd) {
+                          TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[4 * 4];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X4);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd) {
+                          TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[8 * 8];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X8);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
-                            int tx_type, int bd) {
+                            TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[16 * 16];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X16);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
 void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
-                            int tx_type, int bd) {
+                            TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[32 * 32];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
+#if CONFIG_TX64X64
 void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
-                            int tx_type, int bd) {
+                            TX_TYPE tx_type, int bd) {
   int32_t txfm_buf[64 * 64];
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
-  (void)bd;
-  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
+}
+
+void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
+                            TX_TYPE tx_type, int bd) {
+  int32_t txfm_buf[32 * 64];
+  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_32x64_cfg(tx_type);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
 }
 
+void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
+                            TX_TYPE tx_type, int bd) {
+  int32_t txfm_buf[64 * 32];
+  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x32_cfg(tx_type);
+  fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
+}
+#endif  // CONFIG_TX64X64
+
 static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
   // DCT
   {
@@ -261,19 +349,52 @@ static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
 #endif  // CONFIG_EXT_TX
 };
 
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) {
   TXFM_2D_FLIP_CFG cfg;
   set_flip_cfg(tx_type, &cfg);
-  const int tx_type_col = vtx_tab[tx_type];
-  const int tx_type_row = htx_tab[tx_type];
-  const int tx_size_col = txsize_vert_map[tx_size];
-  const int tx_size_row = txsize_horz_map[tx_size];
+  const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
+  const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
+  const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
+  const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
   cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
   cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
   return cfg;
 }
 
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
+#if CONFIG_TX64X64
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type) {
+  TXFM_2D_FLIP_CFG cfg;
+  const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
+  const TX_SIZE tx_size_row = txsize_horz_map[TX_32X64];
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
+      cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
+      cfg.ud_flip = 0;
+      cfg.lr_flip = 0;
+      break;
+    default: assert(0);
+  }
+  return cfg;
+}
+
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type) {
+  TXFM_2D_FLIP_CFG cfg;
+  const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
+  const TX_SIZE tx_size_col = txsize_vert_map[TX_64X32];
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
+      cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
+      cfg.ud_flip = 0;
+      cfg.lr_flip = 0;
+      break;
+    default: assert(0);
+  }
+  return cfg;
+}
+
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type) {
   TXFM_2D_FLIP_CFG cfg;
   switch (tx_type) {
     case DCT_DCT:
@@ -289,3 +410,4 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
   }
   return cfg;
 }
+#endif  // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.c b/third_party/aom/av1/common/av1_inv_txfm1d.c
index 3399b7cb9..51f4b6362 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d.c
+++ b/third_party/aom/av1/common/av1_inv_txfm1d.c
@@ -19,24 +19,40 @@ void range_check_func(int32_t stage, const int32_t *input, const int32_t *buf,
   const int64_t maxValue = (1LL << (bit - 1)) - 1;
   const int64_t minValue = -(1LL << (bit - 1));
 
+  int in_range = 1;
+
   for (int i = 0; i < size; ++i) {
     if (buf[i] < minValue || buf[i] > maxValue) {
-      fprintf(stderr, "Error: coeffs contain out-of-range values\n");
-      fprintf(stderr, "stage: %d\n", stage);
-      fprintf(stderr, "node: %d\n", i);
-      fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", minValue,
-              maxValue);
-      fprintf(stderr, "coeffs: ");
-
-      fprintf(stderr, "[");
-      for (int j = 0; j < size; j++) {
-        if (j > 0) fprintf(stderr, ", ");
-        fprintf(stderr, "%d", input[j]);
-      }
-      fprintf(stderr, "]\n");
-      assert(0);
+      in_range = 0;
+    }
+  }
+
+  if (!in_range) {
+    fprintf(stderr, "Error: coeffs contain out-of-range values\n");
+    fprintf(stderr, "stage: %d\n", stage);
+    fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", minValue,
+            maxValue);
+
+    fprintf(stderr, "coeffs: ");
+
+    fprintf(stderr, "[");
+    for (int j = 0; j < size; j++) {
+      if (j > 0) fprintf(stderr, ", ");
+      fprintf(stderr, "%d", input[j]);
+    }
+    fprintf(stderr, "]\n");
+
+    fprintf(stderr, "   buf: ");
+
+    fprintf(stderr, "[");
+    for (int j = 0; j < size; j++) {
+      if (j > 0) fprintf(stderr, ", ");
+      fprintf(stderr, "%d", buf[j]);
     }
+    fprintf(stderr, "]\n\n");
   }
+
+  assert(in_range);
 }
 
 #define range_check(stage, input, buf, size, bit) \
@@ -1577,6 +1593,16 @@ void av1_iidentity32_c(const int32_t *input, int32_t *output,
   for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
   range_check(0, input, output, 32, stage_range[0]);
 }
+
+#if CONFIG_TX64X64
+void av1_iidentity64_c(const int32_t *input, int32_t *output,
+                       const int8_t *cos_bit, const int8_t *stage_range) {
+  (void)cos_bit;
+  for (int i = 0; i < 64; ++i)
+    output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+  range_check(0, input, output, 64, stage_range[0]);
+}
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d.h b/third_party/aom/av1/common/av1_inv_txfm1d.h
index 037a3c6bc..8996f7c9d 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d.h
+++ b/third_party/aom/av1/common/av1_inv_txfm1d.h
@@ -26,8 +26,10 @@ void av1_idct16_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
 void av1_idct32_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
 void av1_idct64_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
+#endif  // CONFIG_TX64X64
 
 void av1_iadst4_new(const int32_t *input, int32_t *output,
                     const int8_t *cos_bit, const int8_t *stage_range);
@@ -46,6 +48,10 @@ void av1_iidentity16_c(const int32_t *input, int32_t *output,
                        const int8_t *cos_bit, const int8_t *stage_range);
 void av1_iidentity32_c(const int32_t *input, int32_t *output,
                        const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
+void av1_iidentity64_c(const int32_t *input, int32_t *output,
+                       const int8_t *cos_bit, const int8_t *stage_range);
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
 
 #ifdef __cplusplus
diff --git a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
index f30f91576..8bcf84e05 100644
--- a/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
+++ b/third_party/aom/av1/common/av1_inv_txfm1d_cfg.h
@@ -13,16 +13,31 @@
 #define AV1_INV_TXFM2D_CFG_H_
 #include "av1/common/av1_inv_txfm1d.h"
 
+// sum of fwd_shift_##
+#if CONFIG_CHROMA_2X2
+#if CONFIG_TX64X64
+static const int8_t fwd_shift_sum[TX_SIZES] = { 3, 2, 1, 0, -2, -4 };
+#else   // CONFIG_TX64X64
+static const int8_t fwd_shift_sum[TX_SIZES] = { 3, 2, 1, 0, -2 };
+#endif  // CONFIG_TX64X64
+#else   // CONFIG_CHROMA_2X2
+#if CONFIG_TX64X64
+static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2, -4 };
+#else  // CONFIG_TX64X64
+static const int8_t fwd_shift_sum[TX_SIZES] = { 2, 1, 0, -2 };
+#endif  // CONFIG_TX64X64
+#endif  // CONFIG_CHROMA_2X2
+
 //  ---------------- 4x4 1D config -----------------------
 // shift
 static const int8_t inv_shift_4[2] = { 0, -4 };
 
 // stage range
-static const int8_t inv_stage_range_col_dct_4[4] = { 18, 18, 17, 17 };
-static const int8_t inv_stage_range_row_dct_4[4] = { 18, 18, 18, 18 };
-static const int8_t inv_stage_range_col_adst_4[6] = { 18, 18, 18, 18, 17, 17 };
-static const int8_t inv_stage_range_row_adst_4[6] = { 18, 18, 18, 18, 18, 18 };
-static const int8_t inv_stage_range_idx_4[1] = { 18 };
+static const int8_t inv_stage_range_col_dct_4[4] = { 3, 3, 2, 2 };
+static const int8_t inv_stage_range_row_dct_4[4] = { 3, 3, 3, 3 };
+static const int8_t inv_stage_range_col_adst_4[6] = { 3, 3, 3, 3, 2, 2 };
+static const int8_t inv_stage_range_row_adst_4[6] = { 3, 3, 3, 3, 3, 3 };
+static const int8_t inv_stage_range_idx_4[1] = { 0 };
 
 // cos bit
 static const int8_t inv_cos_bit_col_dct_4[4] = { 13, 13, 13, 13 };
@@ -35,13 +50,11 @@ static const int8_t inv_cos_bit_row_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
 static const int8_t inv_shift_8[2] = { 0, -5 };
 
 // stage range
-static const int8_t inv_stage_range_col_dct_8[6] = { 19, 19, 19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_8[6] = { 19, 19, 19, 19, 19, 19 };
-static const int8_t inv_stage_range_col_adst_8[8] = { 19, 19, 19, 19,
-                                                      19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_adst_8[8] = { 19, 19, 19, 19,
-                                                      19, 19, 19, 19 };
-static const int8_t inv_stage_range_idx_8[1] = { 19 };
+static const int8_t inv_stage_range_col_dct_8[6] = { 5, 5, 5, 5, 4, 4 };
+static const int8_t inv_stage_range_row_dct_8[6] = { 5, 5, 5, 5, 5, 5 };
+static const int8_t inv_stage_range_col_adst_8[8] = { 5, 5, 5, 5, 5, 5, 4, 4 };
+static const int8_t inv_stage_range_row_adst_8[8] = { 5, 5, 5, 5, 5, 5, 5, 5 };
+static const int8_t inv_stage_range_idx_8[1] = { 0 };
 
 // cos bit
 static const int8_t inv_cos_bit_col_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
@@ -58,15 +71,13 @@ static const int8_t inv_cos_bit_row_adst_8[8] = {
 static const int8_t inv_shift_16[2] = { -1, -5 };
 
 // stage range
-static const int8_t inv_stage_range_col_dct_16[8] = { 19, 19, 19, 19,
-                                                      19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_16[8] = { 20, 20, 20, 20,
-                                                      20, 20, 20, 20 };
-static const int8_t inv_stage_range_col_adst_16[10] = { 19, 19, 19, 19, 19,
-                                                        19, 19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_adst_16[10] = { 20, 20, 20, 20, 20,
-                                                        20, 20, 20, 20, 20 };
-static const int8_t inv_stage_range_idx_16[1] = { 20 };
+static const int8_t inv_stage_range_col_dct_16[8] = { 7, 7, 7, 7, 7, 7, 6, 6 };
+static const int8_t inv_stage_range_row_dct_16[8] = { 7, 7, 7, 7, 7, 7, 7, 7 };
+static const int8_t inv_stage_range_col_adst_16[10] = { 7, 7, 7, 7, 7,
+                                                        7, 7, 7, 6, 6 };
+static const int8_t inv_stage_range_row_adst_16[10] = { 7, 7, 7, 7, 7,
+                                                        7, 7, 7, 7, 7 };
+static const int8_t inv_stage_range_idx_16[1] = { 0 };
 
 // cos bit
 static const int8_t inv_cos_bit_col_dct_16[8] = {
@@ -85,17 +96,15 @@ static const int8_t inv_cos_bit_row_adst_16[10] = { 12, 12, 12, 12, 12,
 static const int8_t inv_shift_32[2] = { -1, -5 };
 
 // stage range
-static const int8_t inv_stage_range_col_dct_32[10] = { 19, 19, 19, 19, 19,
-                                                       19, 19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_32[10] = { 20, 20, 20, 20, 20,
-                                                       20, 20, 20, 20, 20 };
-static const int8_t inv_stage_range_col_adst_32[12] = {
-  19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
-};
-static const int8_t inv_stage_range_row_adst_32[12] = {
-  20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
-};
-static const int8_t inv_stage_range_idx_32[1] = { 20 };
+static const int8_t inv_stage_range_col_dct_32[10] = { 9, 9, 9, 9, 9,
+                                                       9, 9, 9, 8, 8 };
+static const int8_t inv_stage_range_row_dct_32[10] = { 9, 9, 9, 9, 9,
+                                                       9, 9, 9, 9, 9 };
+static const int8_t inv_stage_range_col_adst_32[12] = { 9, 9, 9, 9, 9, 9,
+                                                        9, 9, 9, 9, 8, 8 };
+static const int8_t inv_stage_range_row_adst_32[12] = { 9, 9, 9, 9, 9, 9,
+                                                        9, 9, 9, 9, 9, 9 };
+static const int8_t inv_stage_range_idx_32[1] = { 0 };
 
 // cos bit
 static const int8_t inv_cos_bit_col_dct_32[10] = { 13, 13, 13, 13, 13,
@@ -109,14 +118,15 @@ static const int8_t inv_cos_bit_row_adst_32[12] = { 12, 12, 12, 12, 12, 12,
 
 //  ---------------- 64x64 1D constants -----------------------
 // shift
-static const int8_t inv_shift_64[2] = { -1, -7 };
+static const int8_t inv_shift_64[2] = { -1, -5 };
 
 // stage range
-static const int8_t inv_stage_range_col_dct_64[12] = { 19, 19, 19, 19, 19, 19,
-                                                       19, 19, 19, 19, 18, 18 };
-static const int8_t inv_stage_range_row_dct_64[12] = { 20, 20, 20, 20, 20, 20,
-                                                       20, 20, 20, 20, 20, 20 };
-static const int8_t inv_stage_range_idx_64[1] = { 20 };
+static const int8_t inv_stage_range_col_dct_64[12] = { 11, 11, 11, 11, 11, 11,
+                                                       11, 11, 11, 11, 10, 10 };
+static const int8_t inv_stage_range_row_dct_64[12] = { 11, 11, 11, 11, 11, 11,
+                                                       11, 11, 11, 11, 11, 11 };
+
+static const int8_t inv_stage_range_idx_64[1] = { 0 };
 
 // cos bit
 static const int8_t inv_cos_bit_col_dct_64[12] = { 13, 13, 13, 13, 13, 13,
@@ -126,9 +136,8 @@ static const int8_t inv_cos_bit_row_dct_64[12] = { 12, 12, 12, 12, 12, 12,
 
 //  ---------------- row config inv_dct_4 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num
-  // 0,  // .log_scale
+  4,                          // .txfm_size
+  4,                          // .stage_num
   inv_shift_4,                // .shift
   inv_stage_range_row_dct_4,  // .stage_range
   inv_cos_bit_row_dct_4,      // .cos_bit
@@ -137,9 +146,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_4 = {
 
 //  ---------------- row config inv_dct_8 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num
-  // 0,  // .log_scale
+  8,                          // .txfm_size
+  6,                          // .stage_num
   inv_shift_8,                // .shift
   inv_stage_range_row_dct_8,  // .stage_range
   inv_cos_bit_row_dct_8,      // .cos_bit_
@@ -147,9 +155,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_8 = {
 };
 //  ---------------- row config inv_dct_16 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num
-  // 0,  // .log_scale
+  16,                          // .txfm_size
+  8,                           // .stage_num
   inv_shift_16,                // .shift
   inv_stage_range_row_dct_16,  // .stage_range
   inv_cos_bit_row_dct_16,      // .cos_bit
@@ -158,15 +165,15 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_16 = {
 
 //  ---------------- row config inv_dct_32 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num
-  // 1,  // .log_scale
+  32,                          // .txfm_size
+  10,                          // .stage_num
   inv_shift_32,                // .shift
   inv_stage_range_row_dct_32,  // .stage_range
   inv_cos_bit_row_dct_32,      // .cos_bit_row
   TXFM_TYPE_DCT32              // .txfm_type
 };
 
+#if CONFIG_TX64X64
 //  ---------------- row config inv_dct_64 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
   64,                          // .txfm_size
@@ -176,12 +183,12 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
   inv_cos_bit_row_dct_64,      // .cos_bit
   TXFM_TYPE_DCT64,             // .txfm_type_col
 };
+#endif  // CONFIG_TX64X64
 
 //  ---------------- row config inv_adst_4 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num
-  // 0,  // .log_scale
+  4,                           // .txfm_size
+  6,                           // .stage_num
   inv_shift_4,                 // .shift
   inv_stage_range_row_adst_4,  // .stage_range
   inv_cos_bit_row_adst_4,      // .cos_bit
@@ -190,9 +197,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
 
 //  ---------------- row config inv_adst_8 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num
-  // 0,  // .log_scale
+  8,                           // .txfm_size
+  8,                           // .stage_num
   inv_shift_8,                 // .shift
   inv_stage_range_row_adst_8,  // .stage_range
   inv_cos_bit_row_adst_8,      // .cos_bit
@@ -201,9 +207,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_8 = {
 
 //  ---------------- row config inv_adst_16 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num
-  // 0,  // .log_scale
+  16,                           // .txfm_size
+  10,                           // .stage_num
   inv_shift_16,                 // .shift
   inv_stage_range_row_adst_16,  // .stage_range
   inv_cos_bit_row_adst_16,      // .cos_bit
@@ -212,9 +217,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_16 = {
 
 //  ---------------- row config inv_adst_32 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num
-  // 1,  // .log_scale
+  32,                           // .txfm_size
+  12,                           // .stage_num
   inv_shift_32,                 // .shift
   inv_stage_range_row_adst_32,  // .stage_range
   inv_cos_bit_row_adst_32,      // .cos_bit
@@ -223,9 +227,8 @@ static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_32 = {
 
 //  ---------------- col config inv_dct_4 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_4 = {
-  4,  // .txfm_size
-  4,  // .stage_num
-  // 0,  // .log_scale
+  4,                          // .txfm_size
+  4,                          // .stage_num
   inv_shift_4,                // .shift
   inv_stage_range_col_dct_4,  // .stage_range
   inv_cos_bit_col_dct_4,      // .cos_bit
@@ -234,9 +237,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_4 = {
 
 //  ---------------- col config inv_dct_8 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8 = {
-  8,  // .txfm_size
-  6,  // .stage_num
-  // 0,  // .log_scale
+  8,                          // .txfm_size
+  6,                          // .stage_num
   inv_shift_8,                // .shift
   inv_stage_range_col_dct_8,  // .stage_range
   inv_cos_bit_col_dct_8,      // .cos_bit_
@@ -244,9 +246,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_8 = {
 };
 //  ---------------- col config inv_dct_16 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_16 = {
-  16,  // .txfm_size
-  8,   // .stage_num
-  // 0,  // .log_scale
+  16,                          // .txfm_size
+  8,                           // .stage_num
   inv_shift_16,                // .shift
   inv_stage_range_col_dct_16,  // .stage_range
   inv_cos_bit_col_dct_16,      // .cos_bit
@@ -255,9 +256,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_16 = {
 
 //  ---------------- col config inv_dct_32 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_32 = {
-  32,  // .txfm_size
-  10,  // .stage_num
-  // 1,  // .log_scale
+  32,                          // .txfm_size
+  10,                          // .stage_num
   inv_shift_32,                // .shift
   inv_stage_range_col_dct_32,  // .stage_range
   inv_cos_bit_col_dct_32,      // .cos_bit_col
@@ -276,9 +276,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_dct_64 = {
 
 //  ---------------- col config inv_adst_4 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4 = {
-  4,  // .txfm_size
-  6,  // .stage_num
-  // 0,  // .log_scale
+  4,                           // .txfm_size
+  6,                           // .stage_num
   inv_shift_4,                 // .shift
   inv_stage_range_col_adst_4,  // .stage_range
   inv_cos_bit_col_adst_4,      // .cos_bit
@@ -287,9 +286,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_4 = {
 
 //  ---------------- col config inv_adst_8 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8 = {
-  8,  // .txfm_size
-  8,  // .stage_num
-  // 0,  // .log_scale
+  8,                           // .txfm_size
+  8,                           // .stage_num
   inv_shift_8,                 // .shift
   inv_stage_range_col_adst_8,  // .stage_range
   inv_cos_bit_col_adst_8,      // .cos_bit
@@ -298,9 +296,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_8 = {
 
 //  ---------------- col config inv_adst_16 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_16 = {
-  16,  // .txfm_size
-  10,  // .stage_num
-  // 0,  // .log_scale
+  16,                           // .txfm_size
+  10,                           // .stage_num
   inv_shift_16,                 // .shift
   inv_stage_range_col_adst_16,  // .stage_range
   inv_cos_bit_col_adst_16,      // .cos_bit
@@ -309,9 +306,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_16 = {
 
 //  ---------------- col config inv_adst_32 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_32 = {
-  32,  // .txfm_size
-  12,  // .stage_num
-  // 1,  // .log_scale
+  32,                           // .txfm_size
+  12,                           // .stage_num
   inv_shift_32,                 // .shift
   inv_stage_range_col_adst_32,  // .stage_range
   inv_cos_bit_col_adst_32,      // .cos_bit
@@ -322,9 +318,8 @@ static const TXFM_1D_CFG inv_txfm_1d_col_cfg_adst_32 = {
 // identity does not need to differentiate between row and col
 //  ---------------- row/col config inv_identity_4 ----------
 static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_4 = {
-  4,  // .txfm_size
-  1,  // .stage_num
-  // 0,  // .log_scale
+  4,                      // .txfm_size
+  1,                      // .stage_num
   inv_shift_4,            // .shift
   inv_stage_range_idx_4,  // .stage_range
   NULL,                   // .cos_bit
@@ -333,9 +328,8 @@ static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_4 = {
 
 //  ---------------- row/col config inv_identity_8 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_8 = {
-  8,  // .txfm_size
-  1,  // .stage_num
-  // 0,  // .log_scale
+  8,                      // .txfm_size
+  1,                      // .stage_num
   inv_shift_8,            // .shift
   inv_stage_range_idx_8,  // .stage_range
   NULL,                   // .cos_bit
@@ -344,9 +338,8 @@ static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_8 = {
 
 //  ---------------- row/col config inv_identity_16 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_16 = {
-  16,  // .txfm_size
-  1,   // .stage_num
-  // 0,  // .log_scale
+  16,                      // .txfm_size
+  1,                       // .stage_num
   inv_shift_16,            // .shift
   inv_stage_range_idx_16,  // .stage_range
   NULL,                    // .cos_bit
@@ -355,13 +348,24 @@ static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_16 = {
 
 //  ---------------- row/col config inv_identity_32 ----------------
 static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_32 = {
-  32,  // .txfm_size
-  1,   // .stage_num
-  // 1,  // .log_scale
+  32,                      // .txfm_size
+  1,                       // .stage_num
   inv_shift_32,            // .shift
   inv_stage_range_idx_32,  // .stage_range
   NULL,                    // .cos_bit
   TXFM_TYPE_IDENTITY32,    // .txfm_type
 };
+
+#if CONFIG_TX64X64
+//  ---------------- row/col config inv_identity_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_64 = {
+  64,                      // .txfm_size
+  1,                       // .stage_num
+  inv_shift_64,            // .shift
+  inv_stage_range_idx_64,  // .stage_range
+  NULL,                    // .cos_bit
+  TXFM_TYPE_IDENTITY64,    // .txfm_type
+};
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
 #endif  // AV1_INV_TXFM2D_CFG_H_
diff --git a/third_party/aom/av1/common/av1_inv_txfm2d.c b/third_party/aom/av1/common/av1_inv_txfm2d.c
index 58845f19b..031d11b40 100644
--- a/third_party/aom/av1/common/av1_inv_txfm2d.c
+++ b/third_party/aom/av1/common/av1_inv_txfm2d.c
@@ -22,6 +22,9 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
     case TXFM_TYPE_DCT8: return av1_idct8_new;
     case TXFM_TYPE_DCT16: return av1_idct16_new;
     case TXFM_TYPE_DCT32: return av1_idct32_new;
+#if CONFIG_TX64X64
+    case TXFM_TYPE_DCT64: return av1_idct64_new;
+#endif  // CONFIG_TX64X64
     case TXFM_TYPE_ADST4: return av1_iadst4_new;
     case TXFM_TYPE_ADST8: return av1_iadst8_new;
     case TXFM_TYPE_ADST16: return av1_iadst16_new;
@@ -31,6 +34,9 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
     case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
     case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
     case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
+#if CONFIG_TX64X64
+    case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
+#endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
     default: assert(0); return NULL;
   }
@@ -43,14 +49,22 @@ static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
       NULL,
 #endif
       &inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
-      &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
+      &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32,
+#if CONFIG_TX64X64
+      &inv_txfm_1d_col_cfg_dct_64
+#endif  // CONFIG_TX64X64
+  },
   // ADST
   {
 #if CONFIG_CHROMA_2X2
       NULL,
 #endif
       &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
-      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
+#if CONFIG_TX64X64
+      NULL
+#endif  // CONFIG_TX64X64
+  },
 #if CONFIG_EXT_TX
   // FLIPADST
   {
@@ -58,14 +72,22 @@ static const TXFM_1D_CFG *inv_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
       NULL,
 #endif
       &inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
-      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+      &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
+#if CONFIG_TX64X64
+      NULL
+#endif  // CONFIG_TX64X64
+  },
   // IDENTITY
   {
 #if CONFIG_CHROMA_2X2
       NULL,
 #endif
       &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
-      &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
+      &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
+#if CONFIG_TX64X64
+      &inv_txfm_1d_cfg_identity_64
+#endif  // CONFIG_TX64X64
+  },
 #endif  // CONFIG_EXT_TX
 };
 
@@ -76,14 +98,22 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
       NULL,
 #endif
       &inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
-      &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
+      &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
+#if CONFIG_TX64X64
+      &inv_txfm_1d_row_cfg_dct_64,
+#endif  // CONFIG_TX64X64
+  },
   // ADST
   {
 #if CONFIG_CHROMA_2X2
       NULL,
 #endif
       &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
-      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
+#if CONFIG_TX64X64
+      NULL
+#endif  // CONFIG_TX64X64
+  },
 #if CONFIG_EXT_TX
   // FLIPADST
   {
@@ -91,30 +121,39 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
       NULL,
 #endif
       &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
-      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+      &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
+#if CONFIG_TX64X64
+      NULL
+#endif  // CONFIG_TX64X64
+  },
   // IDENTITY
   {
 #if CONFIG_CHROMA_2X2
       NULL,
 #endif
       &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
-      &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
+      &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
+#if CONFIG_TX64X64
+      &inv_txfm_1d_cfg_identity_64
+#endif  // CONFIG_TX64X64
+  },
 #endif  // CONFIG_EXT_TX
 };
 
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size) {
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) {
   TXFM_2D_FLIP_CFG cfg;
   set_flip_cfg(tx_type, &cfg);
-  const int tx_type_col = vtx_tab[tx_type];
-  const int tx_type_row = htx_tab[tx_type];
-  const int tx_size_col = txsize_vert_map[tx_size];
-  const int tx_size_row = txsize_horz_map[tx_size];
+  const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
+  const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
+  const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
+  const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
   cfg.col_cfg = inv_txfm_col_cfg_ls[tx_type_col][tx_size_col];
   cfg.row_cfg = inv_txfm_row_cfg_ls[tx_type_row][tx_size_row];
   return cfg;
 }
 
-TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
+#if CONFIG_TX64X64
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(TX_TYPE tx_type) {
   TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
   switch (tx_type) {
     case DCT_DCT:
@@ -127,9 +166,62 @@ TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
   return cfg;
 }
 
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_32x64_cfg(int tx_type) {
+  TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
+      cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_32;
+      set_flip_cfg(tx_type, &cfg);
+      break;
+    default: assert(0);
+  }
+  return cfg;
+}
+
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x32_cfg(int tx_type) {
+  TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
+  switch (tx_type) {
+    case DCT_DCT:
+      cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_32;
+      cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
+      set_flip_cfg(tx_type, &cfg);
+      break;
+    default: assert(0);
+  }
+  return cfg;
+}
+#endif  // CONFIG_TX64X64
+
+void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
+                             const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
+                             int bd) {
+  // Note when assigning txfm_size_col, we use the txfm_size from the
+  // row configuration and vice versa. This is intentionally done to
+  // accurately perform rectangular transforms. When the transform is
+  // rectangular, the number of columns will be the same as the
+  // txfm_size stored in the row cfg struct. It will make no difference
+  // for square transforms.
+  const int txfm_size_col = cfg->row_cfg->txfm_size;
+  const int txfm_size_row = cfg->col_cfg->txfm_size;
+  // Take the shift from the larger dimension in the rectangular case.
+  const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
+                                                        : cfg->col_cfg->shift;
+  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
+  for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
+    stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1;
+  }
+  // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
+  for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
+    stage_range_col[i] =
+        cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] + bd + 1;
+  }
+}
+
 static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
                                     int stride, TXFM_2D_FLIP_CFG *cfg,
-                                    int32_t *txfm_buf, int bd) {
+                                    int32_t *txfm_buf, int8_t fwd_shift,
+                                    int bd) {
   // Note when assigning txfm_size_col, we use the txfm_size from the
   // row configuration and vice versa. This is intentionally done to
   // accurately perform rectangular transforms. When the transform is
@@ -141,8 +233,12 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
   // Take the shift from the larger dimension in the rectangular case.
   const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
                                                         : cfg->col_cfg->shift;
-  const int8_t *stage_range_col = cfg->col_cfg->stage_range;
-  const int8_t *stage_range_row = cfg->row_cfg->stage_range;
+  int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+  int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+  assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
+  assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
+  av1_gen_inv_stage_range(stage_range_col, stage_range_row, cfg, fwd_shift, bd);
+
   const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
   const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
   const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->col_cfg->txfm_type);
@@ -198,74 +294,158 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
 
 static INLINE void inv_txfm2d_add_facade(const int32_t *input, uint16_t *output,
                                          int stride, int32_t *txfm_buf,
-                                         int tx_type, int tx_size, int bd) {
+                                         TX_TYPE tx_type, TX_SIZE tx_size,
+                                         int bd) {
   TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(tx_type, tx_size);
-  inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, bd);
+  TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
+  inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf,
+                   fwd_shift_sum[tx_size_sqr], bd);
 }
 
 void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
-                              int stride, int tx_type, int bd) {
+                              int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[4 * 8 + 8 + 8];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X8, bd);
 }
 
 void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
-                              int stride, int tx_type, int bd) {
+                              int stride, TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int txfm_buf[8 * 4 + 8 + 8];
+  int32_t rinput[8 * 4];
+  uint16_t routput[8 * 4];
+  TX_SIZE tx_size = TX_8X4;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int32(rinput, rw, input, w, w, h);
+  transpose_uint16(routput, rw, output, stride, w, h);
+  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
+  transpose_uint16(output, stride, routput, rw, rw, rh);
+#else
   int txfm_buf[8 * 4 + 4 + 4];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
+#endif
 }
 
 void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
-                               int stride, int tx_type, int bd) {
+                               int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[8 * 16 + 16 + 16];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X16, bd);
 }
 
 void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
-                               int stride, int tx_type, int bd) {
+                               int stride, TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int txfm_buf[16 * 8 + 16 + 16];
+  int32_t rinput[16 * 8];
+  uint16_t routput[16 * 8];
+  TX_SIZE tx_size = TX_16X8;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int32(rinput, rw, input, w, w, h);
+  transpose_uint16(routput, rw, output, stride, w, h);
+  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
+  transpose_uint16(output, stride, routput, rw, rw, rh);
+#else
   int txfm_buf[16 * 8 + 8 + 8];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
+#endif
 }
 
 void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
-                                int stride, int tx_type, int bd) {
+                                int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[16 * 32 + 32 + 32];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X32, bd);
 }
 
 void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
-                                int stride, int tx_type, int bd) {
+                                int stride, TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int txfm_buf[32 * 16 + 32 + 32];
+  int32_t rinput[32 * 16];
+  uint16_t routput[32 * 16];
+  TX_SIZE tx_size = TX_32X16;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int32(rinput, rw, input, w, w, h);
+  transpose_uint16(routput, rw, output, stride, w, h);
+  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
+  transpose_uint16(output, stride, routput, rw, rw, rh);
+#else
   int txfm_buf[32 * 16 + 16 + 16];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
+#endif
 }
 
 void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
-                              int stride, int tx_type, int bd) {
+                              int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[4 * 4 + 4 + 4];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_4X4, bd);
 }
 
 void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
-                              int stride, int tx_type, int bd) {
+                              int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[8 * 8 + 8 + 8];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X8, bd);
 }
 
 void av1_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
-                                int stride, int tx_type, int bd) {
+                                int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[16 * 16 + 16 + 16];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X16, bd);
 }
 
 void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
-                                int stride, int tx_type, int bd) {
+                                int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[32 * 32 + 32 + 32];
   inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd);
 }
 
+#if CONFIG_TX64X64
 void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
-                                int stride, int tx_type, int bd) {
+                                int stride, TX_TYPE tx_type, int bd) {
   int txfm_buf[64 * 64 + 64 + 64];
-  TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_64x64_cfg(tx_type);
-  inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, bd);
+  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X64, bd);
+}
+
+void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
+                                int stride, TX_TYPE tx_type, int bd) {
+#if CONFIG_TXMG
+  int txfm_buf[64 * 32 + 64 + 64];
+  int32_t rinput[64 * 32];
+  uint16_t routput[64 * 32];
+  TX_SIZE tx_size = TX_64X32;
+  TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
+  TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
+  int w = tx_size_wide[tx_size];
+  int h = tx_size_high[tx_size];
+  int rw = h;
+  int rh = w;
+  transpose_int32(rinput, rw, input, w, w, h);
+  transpose_uint16(routput, rw, output, stride, w, h);
+  inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
+  transpose_uint16(output, stride, routput, rw, rw, rh);
+#else
+  int txfm_buf[64 * 32 + 64 + 64];
+  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X32, bd);
+#endif
+}
+
+void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
+                                int stride, TX_TYPE tx_type, int bd) {
+  int txfm_buf[64 * 32 + 64 + 64];
+  inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
 }
+#endif  // CONFIG_TX64X64
diff --git a/third_party/aom/av1/common/av1_loopfilter.c b/third_party/aom/av1/common/av1_loopfilter.c
index 10df7fa91..95f7a8687 100644
--- a/third_party/aom/av1/common/av1_loopfilter.c
+++ b/third_party/aom/av1/common/av1_loopfilter.c
@@ -13,15 +13,28 @@
 
 #include "./aom_config.h"
 #include "./aom_dsp_rtcd.h"
-#include "av1/common/av1_loopfilter.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
-
+#include "av1/common/av1_loopfilter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
 #include "av1/common/seg_common.h"
 
+#if CONFIG_LOOPFILTER_LEVEL
+static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
+  { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
+  { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
+  { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
+};
+
+#if CONFIG_EXT_DELTA_Q
+static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
+  { 0, 1 }, { 2, 2 }, { 3, 3 }
+};
+#endif  // CONFIG_EXT_DELTA_Q
+#endif  // CONFIG_LOOPFILTER_LEVEL
+
 #if CONFIG_LPF_DIRECT
 static void pick_filter_pixel_left(uint8_t *const src, uint8_t *const line,
                                    int *const orig_pos, int length, int row,
@@ -278,6 +291,29 @@ static int pick_min_grad_direct(uint8_t *const src, int length, int row,
 
 #define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1
 #define PARALLEL_DEBLOCKING_DISABLE_15TAP 0
+#if CONFIG_DEBLOCK_13TAP
+#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
+#else
+#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
+#endif
+
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+extern void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
+                                 const uint8_t *limit, const uint8_t *thresh);
+
+extern void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh);
+
+extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
+                                          const uint8_t *blimit,
+                                          const uint8_t *limit,
+                                          const uint8_t *thresh, int bd);
+
+extern void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch,
+                                        const uint8_t *blimit,
+                                        const uint8_t *limit,
+                                        const uint8_t *thresh, int bd);
+#endif
 
 // 64 bit masks for left transform size. Each 1 represents a position where
 // we should apply a loop filter across the left border of an 8x8 block
@@ -376,7 +412,9 @@ static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = {
   0x0000000000000101ULL,  // BLOCK_4X16,
   0x0000000000000001ULL,  // BLOCK_16X4,
   0x0000000001010101ULL,  // BLOCK_8X32,
-  0x0000000000000001ULL,  // BLOCK_32X8
+  0x0000000000000001ULL,  // BLOCK_32X8,
+  0x0101010101010101ULL,  // BLOCK_16X64,
+  0x0000000000000101ULL,  // BLOCK_64X16
 };
 
 // 64 bit mask to shift and set for each prediction size.
@@ -402,7 +440,9 @@ static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = {
   0x0000000000000001ULL,  // BLOCK_4X16,
   0x0000000000000003ULL,  // BLOCK_16X4,
   0x0000000000000001ULL,  // BLOCK_8X32,
-  0x000000000000000fULL,  // BLOCK_32X8
+  0x000000000000000fULL,  // BLOCK_32X8,
+  0x0000000000000003ULL,  // BLOCK_16X64,
+  0x00000000000000ffULL,  // BLOCK_64X16
 };
 // 64 bit mask to shift and set for each prediction size. A bit is set for
 // each 8x8 block that would be in the top left most block of the given block
@@ -429,7 +469,9 @@ static const uint64_t size_mask[BLOCK_SIZES_ALL] = {
   0x0000000000000101ULL,  // BLOCK_4X16,
   0x0000000000000003ULL,  // BLOCK_16X4,
   0x0000000001010101ULL,  // BLOCK_8X32,
-  0x000000000000000fULL,  // BLOCK_32X8
+  0x000000000000000fULL,  // BLOCK_32X8,
+  0x0303030303030303ULL,  // BLOCK_16X64,
+  0x000000000000ffffULL,  // BLOCK_64X16
 };
 
 // These are used for masking the left and above 32x32 borders.
@@ -486,7 +528,9 @@ static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = {
   0x0001,  // BLOCK_4X16,
   0x0001,  // BLOCK_16X4,
   0x0011,  // BLOCK_8X32,
-  0x0001,  // BLOCK_32X8
+  0x0001,  // BLOCK_32X8,
+  0x1111,  // BLOCK_16X64,
+  0x0001,  // BLOCK_64X16,
 };
 
 // 16 bit above mask to shift and set for uv each prediction size.
@@ -512,7 +556,9 @@ static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = {
   0x0001,  // BLOCK_4X16,
   0x0001,  // BLOCK_16X4,
   0x0001,  // BLOCK_8X32,
-  0x0003,  // BLOCK_32X8
+  0x0003,  // BLOCK_32X8,
+  0x0001,  // BLOCK_16X64,
+  0x000f,  // BLOCK_64X16
 };
 
 // 64 bit mask to shift and set for each uv prediction size
@@ -538,28 +584,26 @@ static const uint16_t size_mask_uv[BLOCK_SIZES_ALL] = {
   0x0001,  // BLOCK_4X16,
   0x0001,  // BLOCK_16X4,
   0x0011,  // BLOCK_8X32,
-  0x0003,  // BLOCK_32X8
+  0x0003,  // BLOCK_32X8,
+  0x1111,  // BLOCK_16X64,
+  0x000f,  // BLOCK_64X16
 };
 static const uint16_t left_border_uv = 0x1111;
 static const uint16_t above_border_uv = 0x000f;
 
 static const int mode_lf_lut[] = {
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
-#if CONFIG_ALT_INTRA
   0,
 #if CONFIG_SMOOTH_HV
   0, 0,
 #endif         // CONFIG_SMOOTH_HV
-#endif         // CONFIG_ALT_INTRA
   1, 1, 0, 1,  // INTER_MODES (ZEROMV == 0)
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   // 1, 1, 1, 1, 1,       // INTER_SINGLEREF_COMP_MODES
   // NOTE(zoeliu): Remove SR_NEAREST_NEWMV
   1, 1, 1, 1,             // INTER_SINGLEREF_COMP_MODES
 #endif                    // CONFIG_COMPOUND_SINGLEREF
   1, 1, 1, 1, 1, 1, 0, 1  // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
-#endif                    // CONFIG_EXT_INTER
 };
 
 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
@@ -585,7 +629,17 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
 #if CONFIG_EXT_DELTA_Q
 static uint8_t get_filter_level(const AV1_COMMON *cm,
                                 const loop_filter_info_n *lfi_n,
+#if CONFIG_LOOPFILTER_LEVEL
+                                const int dir_idx, int plane,
+#endif
+#if CONFIG_LPF_SB
+                                int mi_row, int mi_col,
+#endif
                                 const MB_MODE_INFO *mbmi) {
+#if CONFIG_LPF_SB
+  return cm->mi[mi_row * cm->mi_stride + mi_col].mbmi.filt_lvl;
+#endif
+
 #if CONFIG_SUPERTX
   const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
   assert(
@@ -596,15 +650,38 @@ static uint8_t get_filter_level(const AV1_COMMON *cm,
   const int segment_id = mbmi->segment_id;
 #endif  // CONFIG_SUPERTX
   if (cm->delta_lf_present_flag) {
+#if CONFIG_LOOPFILTER_LEVEL
+    int delta_lf;
+    if (cm->delta_lf_multi) {
+      const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
+      delta_lf = mbmi->curr_delta_lf[delta_lf_idx];
+    } else {
+      delta_lf = mbmi->current_delta_lf_from_base;
+    }
+    int lvl_seg =
+        clamp(delta_lf + cm->lf.filter_level[dir_idx], 0, MAX_LOOP_FILTER);
+#else
     int lvl_seg = clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level,
                         0, MAX_LOOP_FILTER);
+#endif
     const int scale = 1 << (lvl_seg >> 5);
+#if CONFIG_LOOPFILTER_LEVEL
+    assert(plane >= 0 && plane <= 2);
+    const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
+    if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
+      const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
+      lvl_seg =
+          clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
+                MAX_LOOP_FILTER);
+    }
+#else
     if (segfeature_active(&cm->seg, segment_id, SEG_LVL_ALT_LF)) {
       const int data = get_segdata(&cm->seg, segment_id, SEG_LVL_ALT_LF);
       lvl_seg =
           clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
                 MAX_LOOP_FILTER);
     }
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
     if (cm->lf.mode_ref_delta_enabled) {
       lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
@@ -614,7 +691,12 @@ static uint8_t get_filter_level(const AV1_COMMON *cm,
     }
     return lvl_seg;
   } else {
+#if CONFIG_LOOPFILTER_LEVEL
+    return lfi_n
+        ->lvl[segment_id][dir_idx][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+#else
     return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+#endif
   }
 }
 #else
@@ -648,12 +730,39 @@ void av1_loop_filter_init(AV1_COMMON *cm) {
     memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
 }
 
-void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl) {
+#if CONFIG_LPF_SB
+void av1_loop_filter_sb_level_init(AV1_COMMON *cm, int mi_row, int mi_col,
+                                   int lvl) {
+  const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
+  const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
+
+  int row, col;
+  for (row = mi_row_start; row < mi_row_end; ++row) {
+    for (col = mi_col_start; col < mi_col_end; ++col) {
+      // Note: can't use cm->mi_grid_visible. Because for each partition,
+      // all visible pointers will point to the first of the partition.
+      cm->mi[row * cm->mi_stride + col].mbmi.filt_lvl = lvl;
+    }
+  }
+}
+#endif  // CONFIG_LPF_SB
+
+void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
+                                int default_filt_lvl_r
+#if CONFIG_LOOPFILTER_LEVEL
+                                ,
+                                int plane
+#endif
+                                ) {
   int seg_id;
   // n_shift is the multiplier for lf_deltas
   // the multiplier is 1 for when filter_lvl is between 0 and 31;
   // 2 when filter_lvl is between 32 and 63
-  const int scale = 1 << (default_filt_lvl >> 5);
+  int scale = 1 << (default_filt_lvl >> 5);
   loop_filter_info_n *const lfi = &cm->lf_info;
   struct loopfilter *const lf = &cm->lf;
   const struct segmentation *const seg = &cm->seg;
@@ -665,29 +774,64 @@ void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl) {
   }
 
   for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
-    int lvl_seg = default_filt_lvl;
-    if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
-      const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
-      lvl_seg = clamp(
-          seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
-          MAX_LOOP_FILTER);
-    }
+    for (int dir = 0; dir < 2; ++dir) {
+      int lvl_seg = (dir == 0) ? default_filt_lvl : default_filt_lvl_r;
+#if CONFIG_LOOPFILTER_LEVEL
+      assert(plane >= 0 && plane <= 2);
+      const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
+      if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
+        const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
+        lvl_seg = clamp(
+            seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
+            0, MAX_LOOP_FILTER);
+      }
+#else
+      if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+        const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+        lvl_seg = clamp(
+            seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
+            0, MAX_LOOP_FILTER);
+      }
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
-    if (!lf->mode_ref_delta_enabled) {
-      // we could get rid of this if we assume that deltas are set to
-      // zero when not in use; encoder always uses deltas
-      memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
-    } else {
-      int ref, mode;
-      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
-      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
-
-      for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
-        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
-          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
-                                lf->mode_deltas[mode] * scale;
-          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+      if (!lf->mode_ref_delta_enabled) {
+// we could get rid of this if we assume that deltas are set to
+// zero when not in use; encoder always uses deltas
+#if CONFIG_LOOPFILTER_LEVEL
+        memset(lfi->lvl[seg_id][dir], lvl_seg, sizeof(lfi->lvl[seg_id][dir]));
+#else
+        memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
+#endif  // CONFIG_LOOPFILTER_LEVEL
+      } else {
+        int ref, mode;
+#if CONFIG_LOOPFILTER_LEVEL
+        scale = 1 << (lvl_seg >> 5);
+
+        const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+        lfi->lvl[seg_id][dir][INTRA_FRAME][0] =
+            clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+        for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
+          for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+            const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+                                  lf->mode_deltas[mode] * scale;
+            lfi->lvl[seg_id][dir][ref][mode] =
+                clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+          }
         }
+#else
+        (void)default_filt_lvl_r;
+        const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+        lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+        for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
+          for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+            const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+                                  lf->mode_deltas[mode] * scale;
+            lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+          }
+        }
+#endif
       }
     }
   }
@@ -1384,7 +1528,15 @@ static void build_masks(AV1_COMMON *const cm,
   const TX_SIZE tx_size_uv_above =
       txsize_vert_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
+#else
+#if CONFIG_LPF_SB
+  const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
+#else
   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
+#endif  // CONFIG_LPF_SB
+#endif
 #else
   const int filter_level = get_filter_level(lfi_n, mbmi);
   (void)cm;
@@ -1478,7 +1630,15 @@ static void build_y_mask(AV1_COMMON *const cm,
   const BLOCK_SIZE block_size = mbmi->sb_type;
 #endif
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
+#else
+#if CONFIG_LPF_SB
+  const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
+#else
   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
+#endif  // CONFIG_LPF_SB
+#endif
 #else
   const int filter_level = get_filter_level(lfi_n, mbmi);
   (void)cm;
@@ -1548,6 +1708,9 @@ static void update_tile_boundary_filter_mask(AV1_COMMON *const cm,
 void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
                     MODE_INFO **mi, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm) {
+#if CONFIG_EXT_PARTITION
+  assert(0 && "Not yet updated");
+#endif  // CONFIG_EXT_PARTITION
   int idx_32, idx_16, idx_8;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
   MODE_INFO **mip = mi;
@@ -1575,9 +1738,6 @@ void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
   int i;
   const int max_rows = AOMMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
   const int max_cols = AOMMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
-#if CONFIG_EXT_PARTITION
-  assert(0 && "Not yet updated");
-#endif  // CONFIG_EXT_PARTITION
 
   av1_zero(*lfm);
   assert(mip[0] != NULL);
@@ -1898,8 +2058,8 @@ static void filter_selectively_vert(
         orig_pos[i] = -1;
       }
 
-      int direct = pick_min_grad_direct(src, left_filt_len, row, col, width,
-                                        height, pitch, 1, 0);
+      const int direct = pick_min_grad_direct(src, left_filt_len, row, col,
+                                              width, height, pitch, 1, 0);
 
       pick_filter_block_vert(src, block, orig_pos, left_filt_len, row, col,
                              width, height, pitch, pivot, line_length, 1,
@@ -1928,8 +2088,8 @@ static void filter_selectively_vert(
         orig_pos[i] = -1;
       }
 
-      int direct = pick_min_grad_direct(src, 4, row, col + 4, width, height,
-                                        pitch, 1, 0);
+      const int direct = pick_min_grad_direct(src, 4, row, col + 4, width,
+                                              height, pitch, 1, 0);
 
       pick_filter_block_vert(src, block, orig_pos, 4, row, col + 4, width,
                              height, pitch, pivot, line_length, 1, direct);
@@ -2083,7 +2243,18 @@ static void get_filter_level_and_masks_non420(
 
 // Filter level can vary per MI
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+    if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi)))
+      continue;
+#else
+#if CONFIG_LPF_SB
+    if (!(lfl_r[c_step] =
+              get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi)))
+      continue;
+#else
     if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, mbmi))) continue;
+#endif  // CONFIG_LPF_SB
+#endif
 #else
     if (!(lfl_r[c_step] = get_filter_level(&cm->lf_info, mbmi))) continue;
 #endif
@@ -2249,7 +2420,7 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm,
                                       &col_masks);
 
     // Disable filtering on the leftmost column or tile boundary
-    unsigned int border_mask = ~(mi_col == 0);
+    unsigned int border_mask = ~(mi_col == 0 ? 1 : 0);
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
     MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
     if (av1_disable_loopfilter_on_tile_boundary(cm) &&
@@ -2588,7 +2759,13 @@ static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = {
       4 - 1,    // BLOCK_4X16,
       16 - 1,   // BLOCK_16X4,
       8 - 1,    // BLOCK_8X32,
-      32 - 1    // BLOCK_32X8
+      32 - 1,   // BLOCK_32X8,
+      16 - 1,   // BLOCK_16X64,
+      64 - 1,   // BLOCK_64X16
+#if CONFIG_EXT_PARTITION
+      32 - 1,   // BLOCK_32X128
+      128 - 1,  // BLOCK_128X32
+#endif          // CONFIG_EXT_PARTITION
   },
   // mask for horizontal edges filtering
   {
@@ -2618,7 +2795,13 @@ static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = {
       16 - 1,   // BLOCK_4X16,
       4 - 1,    // BLOCK_16X4,
       32 - 1,   // BLOCK_8X32,
-      8 - 1     // BLOCK_32X8
+      8 - 1,    // BLOCK_32X8,
+      64 - 1,   // BLOCK_16X64,
+      16 - 1,   // BLOCK_64X16
+#if CONFIG_EXT_PARTITION
+      128 - 1,  // BLOCK_32X128
+      32 - 1,   // BLOCK_128X32
+#endif          // CONFIG_EXT_PARTITION
   },
 };
 
@@ -2640,6 +2823,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
       16 - 1,  // TX_16X8
       16 - 1,  // TX_16X32
       32 - 1,  // TX_32X16
+#if CONFIG_TX64X64
+      32 - 1,  // TX_32X64
+      64 - 1,  // TX_64X32
+#endif         // CONFIG_TX64X64
       4 - 1,   // TX_4X16
       16 - 1,  // TX_16X4
       8 - 1,   // TX_8X32
@@ -2662,6 +2849,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
       8 - 1,   // TX_16X8
       32 - 1,  // TX_16X32
       16 - 1,  // TX_32X16
+#if CONFIG_TX64X64
+      64 - 1,  // TX_32X64
+      32 - 1,  // TX_64X32
+#endif         // CONFIG_TX64X64
       16 - 1,  // TX_4X16
       4 - 1,   // TX_16X4
       32 - 1,  // TX_8X32
@@ -2669,15 +2860,16 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
   }
 };
 
-static TX_SIZE av1_get_transform_size(const MODE_INFO *const pCurr,
-                                      const EDGE_DIR edgeDir, const int mi_row,
+static TX_SIZE av1_get_transform_size(const MODE_INFO *const mi,
+                                      const EDGE_DIR edge_dir, const int mi_row,
                                       const int mi_col, const int plane,
-                                      const struct macroblockd_plane *pPlane,
-                                      const uint32_t scaleHorz,
-                                      const uint32_t scaleVert) {
-  const MB_MODE_INFO *mbmi = &pCurr->mbmi;
-  TX_SIZE tx_size = (plane == PLANE_TYPE_Y) ? mbmi->tx_size
-                                            : av1_get_uv_tx_size(mbmi, pPlane);
+                                      const struct macroblockd_plane *plane_ptr,
+                                      const uint32_t scale_horz,
+                                      const uint32_t scale_vert) {
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  TX_SIZE tx_size = (plane == AOM_PLANE_Y)
+                        ? mbmi->tx_size
+                        : av1_get_uv_tx_size(mbmi, plane_ptr);
   assert(tx_size < TX_SIZES_ALL);
 
 #if CONFIG_VAR_TX
@@ -2690,7 +2882,7 @@ static TX_SIZE av1_get_transform_size(const MODE_INFO *const pCurr,
   const int idx_r = mi_row & MAX_MIB_MASK;
   const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
   const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
-  const BLOCK_SIZE sb_type = pCurr->mbmi.sb_type;
+  const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
   const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
   const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
 
@@ -2702,40 +2894,40 @@ static TX_SIZE av1_get_transform_size(const MODE_INFO *const pCurr,
 
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     const BLOCK_SIZE bsize =
-        AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scaleHorz][scaleVert]);
+        AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scale_horz][scale_vert]);
 #else
-    const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scaleHorz][scaleVert];
+    const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scale_horz][scale_vert];
 #endif
     const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
 
     assert(mb_tx_size < TX_SIZES_ALL);
 
-    tx_size = (plane == PLANE_TYPE_UV)
-                  ? uv_txsize_lookup[bsize][mb_tx_size][0][0]
-                  : mb_tx_size;
+    tx_size = (plane == AOM_PLANE_Y)
+                  ? mb_tx_size
+                  : uv_txsize_lookup[bsize][mb_tx_size][0][0];
     assert(tx_size < TX_SIZES_ALL);
   }
 #else
   (void)mi_row;
   (void)mi_col;
-  (void)scaleHorz;
-  (void)scaleVert;
+  (void)scale_horz;
+  (void)scale_vert;
 #endif  // CONFIG_VAR_TX
 
   // since in case of chrominance or non-square transorm need to convert
   // transform size into transform size in particular direction.
   // for vertical edge, filter direction is horizontal, for horizontal
   // edge, filter direction is vertical.
-  tx_size = (VERT_EDGE == edgeDir) ? txsize_horz_map[tx_size]
-                                   : txsize_vert_map[tx_size];
+  tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
+                                    : txsize_vert_map[tx_size];
   return tx_size;
 }
 
 typedef struct AV1_DEBLOCKING_PARAMETERS {
   // length of the filter applied to the outer edge
-  uint32_t filterLength;
+  uint32_t filter_length;
   // length of the filter applied to the inner edge
-  uint32_t filterLengthInternal;
+  uint32_t filter_length_internal;
   // deblocking limits
   const uint8_t *lim;
   const uint8_t *mblim;
@@ -2743,291 +2935,595 @@ typedef struct AV1_DEBLOCKING_PARAMETERS {
 } AV1_DEBLOCKING_PARAMETERS;
 
 static void set_lpf_parameters(
-    AV1_DEBLOCKING_PARAMETERS *const pParams, const MODE_INFO **const ppCurr,
-    const ptrdiff_t modeStep, const AV1_COMMON *const cm,
-    const EDGE_DIR edgeDir, const uint32_t x, const uint32_t y,
-    const uint32_t width, const uint32_t height, const int plane,
-    const struct macroblockd_plane *const pPlane, const uint32_t scaleHorz,
-    const uint32_t scaleVert) {
+    AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
+    const AV1_COMMON *const cm, const EDGE_DIR edge_dir, const uint32_t x,
+    const uint32_t y, const int plane,
+    const struct macroblockd_plane *const plane_ptr) {
   // reset to initial values
-  pParams->filterLength = 0;
-  pParams->filterLengthInternal = 0;
+  params->filter_length = 0;
+  params->filter_length_internal = 0;
+
   // no deblocking is required
+  const uint32_t width = plane_ptr->dst.width;
+  const uint32_t height = plane_ptr->dst.height;
   if ((width <= x) || (height <= y)) {
     return;
   }
 
-  const int mi_row = (y << scaleVert) >> MI_SIZE_LOG2;
-  const int mi_col = (x << scaleHorz) >> MI_SIZE_LOG2;
-  const MB_MODE_INFO *mbmi = &ppCurr[0]->mbmi;
+  const uint32_t scale_horz = plane_ptr->subsampling_x;
+  const uint32_t scale_vert = plane_ptr->subsampling_y;
+  const int mi_row = (y << scale_vert) >> MI_SIZE_LOG2;
+  const int mi_col = (x << scale_horz) >> MI_SIZE_LOG2;
+  MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
+  const MB_MODE_INFO *mbmi = &mi[0]->mbmi;
 
   {
     const TX_SIZE ts =
-        av1_get_transform_size(ppCurr[0], edgeDir, mi_row, mi_col, plane,
-                               pPlane, scaleHorz, scaleVert);
+        av1_get_transform_size(mi[0], edge_dir, mi_row, mi_col, plane,
+                               plane_ptr, scale_horz, scale_vert);
 
 #if CONFIG_EXT_DELTA_Q
-    const uint32_t currLevel = get_filter_level(cm, &cm->lf_info, mbmi);
+#if CONFIG_LOOPFILTER_LEVEL
+    const uint32_t curr_level =
+        get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
 #else
-    const uint32_t currLevel = get_filter_level(&cm->lf_info, mbmi);
+#if CONFIG_LPF_SB
+    const uint32_t curr_level =
+        get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi);
+#else
+    const uint32_t curr_level = get_filter_level(cm, &cm->lf_info, mbmi);
+#endif  // CONFIG_LPF_SB
+#endif
+#else
+    const uint32_t curr_level = get_filter_level(&cm->lf_info, mbmi);
 #endif  // CONFIG_EXT_DELTA_Q
 
-    const int currSkipped = mbmi->skip && is_inter_block(mbmi);
-    const uint32_t coord = (VERT_EDGE == edgeDir) ? (x) : (y);
-    uint32_t level = currLevel;
+    const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
+    const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
+    uint32_t level = curr_level;
     // prepare outer edge parameters. deblock the edge if it's an edge of a TU
     if (coord) {
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
-      MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col;
+      MODE_INFO *const mi_bound = cm->mi + mi_row * cm->mi_stride + mi_col;
       if (!av1_disable_loopfilter_on_tile_boundary(cm) ||
-          ((VERT_EDGE == edgeDir) &&
-           (0 == (mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
-          ((HORZ_EDGE == edgeDir) &&
-           (0 == (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
+          ((VERT_EDGE == edge_dir) &&
+           (0 == (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
+          ((HORZ_EDGE == edge_dir) &&
+           (0 == (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       {
-        const int32_t tuEdge =
-            (coord & av1_transform_masks[edgeDir][ts]) ? (0) : (1);
-        if (tuEdge) {
-          const MODE_INFO *const pPrev = *(ppCurr - modeStep);
-          const int pvRow =
-              (VERT_EDGE == edgeDir) ? (mi_row) : (mi_row - (1 << scaleVert));
-          const int pvCol =
-              (VERT_EDGE == edgeDir) ? (mi_col - (1 << scaleHorz)) : (mi_col);
-          const TX_SIZE pvTs =
-              av1_get_transform_size(pPrev, edgeDir, pvRow, pvCol, plane,
-                                     pPlane, scaleHorz, scaleVert);
+        const int32_t tu_edge =
+            (coord & av1_transform_masks[edge_dir][ts]) ? (0) : (1);
+        if (tu_edge) {
+          const MODE_INFO *const mi_prev = *(mi - mode_step);
+          const int pv_row =
+              (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
+          const int pv_col =
+              (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
+          const TX_SIZE pv_ts =
+              av1_get_transform_size(mi_prev, edge_dir, pv_row, pv_col, plane,
+                                     plane_ptr, scale_horz, scale_vert);
 
 #if CONFIG_EXT_DELTA_Q
-          const uint32_t pvLvl =
-              get_filter_level(cm, &cm->lf_info, &pPrev->mbmi);
+#if CONFIG_LOOPFILTER_LEVEL
+          const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, edge_dir,
+                                                   plane, &mi_prev->mbmi);
+#else
+#if CONFIG_LPF_SB
+          const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, pv_row,
+                                                   pv_col, &mi_prev->mbmi);
+#else
+          const uint32_t pv_lvl =
+              get_filter_level(cm, &cm->lf_info, &mi_prev->mbmi);
+#endif  // CONFIG_LPF_SB
+#endif
 #else
-          const uint32_t pvLvl = get_filter_level(&cm->lf_info, &pPrev->mbmi);
+          const uint32_t pv_lvl =
+              get_filter_level(&cm->lf_info, &mi_prev->mbmi);
 #endif  // CONFIG_EXT_DELTA_Q
 
-          const int pvSkip = pPrev->mbmi.skip && is_inter_block(&pPrev->mbmi);
-          const int32_t puEdge =
+          const int pv_skip =
+              mi_prev->mbmi.skip && is_inter_block(&mi_prev->mbmi);
+          const int32_t pu_edge =
               (coord &
-               av1_prediction_masks[edgeDir]
-                                   [ss_size_lookup[mbmi->sb_type][scaleHorz]
-                                                  [scaleVert]])
+               av1_prediction_masks[edge_dir]
+                                   [ss_size_lookup[mbmi->sb_type][scale_horz]
+                                                  [scale_vert]])
                   ? (0)
                   : (1);
           // if the current and the previous blocks are skipped,
           // deblock the edge if the edge belongs to a PU's edge only.
-          if ((currLevel || pvLvl) && (!pvSkip || !currSkipped || puEdge)) {
-            const TX_SIZE minTs = AOMMIN(ts, pvTs);
-            if (TX_4X4 >= minTs) {
-              pParams->filterLength = 4;
-            } else if (TX_8X8 == minTs) {
-              pParams->filterLength = 8;
+          if ((curr_level || pv_lvl) &&
+              (!pv_skip || !curr_skipped || pu_edge)) {
+            const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
+            if (TX_4X4 >= min_ts) {
+              params->filter_length = 4;
+            } else if (TX_8X8 == min_ts) {
+              params->filter_length = 8;
             } else {
-              pParams->filterLength = 16;
+              params->filter_length = 16;
 #if PARALLEL_DEBLOCKING_15TAPLUMAONLY
               // No wide filtering for chroma plane
               if (plane != 0) {
-                pParams->filterLength = 8;
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+                params->filter_length = 6;
+#else
+                params->filter_length = 8;
+#endif
               }
 #endif
             }
 
 #if PARALLEL_DEBLOCKING_DISABLE_15TAP
-            pParams->filterLength = (TX_4X4 >= AOMMIN(ts, pvTs)) ? (4) : (8);
+            params->filter_length = (TX_4X4 >= AOMMIN(ts, pv_ts)) ? (4) : (8);
 #endif  // PARALLEL_DEBLOCKING_DISABLE_15TAP
 
             // update the level if the current block is skipped,
             // but the previous one is not
-            level = (currLevel) ? (currLevel) : (pvLvl);
+            level = (curr_level) ? (curr_level) : (pv_lvl);
           }
         }
       }
 
 #if !CONFIG_CB4X4
       // prepare internal edge parameters
-      if (currLevel && !currSkipped) {
-        pParams->filterLengthInternal = (TX_4X4 >= ts) ? (4) : (0);
+      if (curr_level && !curr_skipped) {
+        params->filter_length_internal = (TX_4X4 >= ts) ? (4) : (0);
       }
 #endif
 
       // prepare common parameters
-      if (pParams->filterLength || pParams->filterLengthInternal) {
+      if (params->filter_length || params->filter_length_internal) {
         const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
-        pParams->lim = limits->lim;
-        pParams->mblim = limits->mblim;
-        pParams->hev_thr = limits->hev_thr;
+        params->lim = limits->lim;
+        params->mblim = limits->mblim;
+        params->hev_thr = limits->hev_thr;
       }
     }
   }
 }
 
-static void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
-                                        const int plane,
-                                        const MACROBLOCKD_PLANE *const pPlane,
-                                        const MODE_INFO **ppModeInfo,
-                                        const uint32_t cuX,
-                                        const uint32_t cuY) {
+static void av1_filter_block_plane_vert(
+    const AV1_COMMON *const cm, const int plane,
+    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
+    const uint32_t mi_col) {
   const int col_step = MI_SIZE >> MI_SIZE_LOG2;
   const int row_step = MI_SIZE >> MI_SIZE_LOG2;
-  const uint32_t scaleHorz = pPlane->subsampling_x;
-  const uint32_t scaleVert = pPlane->subsampling_y;
-  const uint32_t width = pPlane->dst.width;
-  const uint32_t height = pPlane->dst.height;
-  uint8_t *const pDst = pPlane->dst.buf;
-  const int dstStride = pPlane->dst.stride;
-  for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += row_step) {
-    uint8_t *p = pDst + y * MI_SIZE * dstStride;
-    for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += col_step) {
+  const uint32_t scale_horz = plane_ptr->subsampling_x;
+  const uint32_t scale_vert = plane_ptr->subsampling_y;
+  uint8_t *const dst_ptr = plane_ptr->dst.buf;
+  const int dst_stride = plane_ptr->dst.stride;
+#if CONFIG_LPF_SB
+  int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
+  y_range = AOMMIN(y_range, cm->mi_rows);
+  y_range >>= scale_vert;
+
+  int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
+  x_range = AOMMIN(x_range, cm->mi_cols);
+  x_range >>= scale_horz;
+#else
+  const int y_range = (MAX_MIB_SIZE >> scale_vert);
+  const int x_range = (MAX_MIB_SIZE >> scale_horz);
+#endif  // CONFIG_LPF_SB
+  for (int y = 0; y < y_range; y += row_step) {
+    uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
+    for (int x = 0; x < x_range; x += col_step) {
       // inner loop always filter vertical edges in a MI block. If MI size
       // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
       // If 4x4 trasnform is used, it will then filter the internal edge
       //  aligned with a 4x4 block
-      const MODE_INFO **const pCurr =
-          ppModeInfo + (y << scaleVert) * cm->mi_stride + (x << scaleHorz);
+      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
       AV1_DEBLOCKING_PARAMETERS params;
       memset(&params, 0, sizeof(params));
 
-      set_lpf_parameters(&params, pCurr, ((ptrdiff_t)1 << scaleHorz), cm,
-                         VERT_EDGE, cuX + x * MI_SIZE, cuY + y * MI_SIZE, width,
-                         height, plane, pPlane, scaleHorz, scaleVert);
+      set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, VERT_EDGE,
+                         curr_x, curr_y, plane, plane_ptr);
+
+#if CONFIG_LPF_DIRECT
+      uint8_t *const src = plane_ptr->dst.buf0;
+      const int width = cm->width >> scale_horz;
+      const int height = cm->height >> scale_vert;
+      const int pivot = 8;
+      const int line_length = 16;
+      uint8_t block[128];
+      int orig_pos[128];
+      const int vert_or_horz = 0;  // 0: vertical
+      const int unit = 1;
+      int i;
+      for (i = 0; i < 128; ++i) {
+        block[i] = 0;
+        orig_pos[i] = -1;
+      }
+
+      if (params.filter_length) {
+        const int filt_len = params.filter_length == 16 ? 8 : 4;
+        const int direct =
+            pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
+                                 dst_stride, unit, vert_or_horz);
+
+        pick_filter_block_vert(src, block, orig_pos, filt_len, curr_y, curr_x,
+                               width, height, dst_stride, pivot, line_length,
+                               unit, direct);
+        uint8_t *const filt_start = block + pivot;
+        switch (params.filter_length) {
+          // apply 4-tap filtering
+          case 4:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
+                                        line_length, params.mblim, params.lim,
+                                        params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_vertical_4(filt_start, line_length, params.mblim,
+                                 params.lim, params.hev_thr);
+            break;
+          // apply 8-tap filtering
+          case 8:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(filt_start),
+                                        line_length, params.mblim, params.lim,
+                                        params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_vertical_8(filt_start, line_length, params.mblim,
+                                 params.lim, params.hev_thr);
+            break;
+          // apply 16-tap filtering
+          case 16:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(filt_start),
+                                         line_length, params.mblim, params.lim,
+                                         params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_vertical_16(filt_start, line_length, params.mblim,
+                                  params.lim, params.hev_thr);
+            break;
+          // no filtering
+          default: break;
+        }
+
+        for (i = 0; i < 128; ++i) {
+          if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
+        }
+      }
+
+      if (params.filter_length_internal) {
+        for (i = 0; i < 128; ++i) {
+          block[i] = 0;
+          orig_pos[i] = -1;
+        }
+
+        const int direct =
+            pick_min_grad_direct(src, 4, curr_y, curr_x + 4, width, height,
+                                 dst_stride, unit, vert_or_horz);
 
-      switch (params.filterLength) {
+        pick_filter_block_vert(src, block, orig_pos, 4, curr_y, curr_x + 4,
+                               width, height, dst_stride, pivot, line_length,
+                               unit, direct);
+
+        uint8_t *const filt_start = block + pivot;
+#if CONFIG_HIGHBITDEPTH
+        if (cm->use_highbitdepth)
+          aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
+                                    line_length, params.mblim, params.lim,
+                                    params.hev_thr, cm->bit_depth);
+        else
+#endif  // CONFIG_HIGHBITDEPTH
+          aom_lpf_vertical_4(filt_start, line_length, params.mblim, params.lim,
+                             params.hev_thr);
+
+        for (i = 0; i < 128; ++i) {
+          if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
+        }
+      }
+#else  // !CONFIG_LPF_DIRECT
+      switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
-            aom_highbd_lpf_vertical_4_c(CONVERT_TO_SHORTPTR(p), dstStride,
+            aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
+                                      params.mblim, params.lim, params.hev_thr,
+                                      cm->bit_depth);
+          else
+#endif  // CONFIG_HIGHBITDEPTH
+            aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
+                               params.hev_thr);
+          break;
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+        case 6:  // apply 6-tap filter for chroma plane only
+          assert(plane != 0);
+#if CONFIG_HIGHBITDEPTH
+          if (cm->use_highbitdepth)
+            aom_highbd_lpf_vertical_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
                                         params.mblim, params.lim,
                                         params.hev_thr, cm->bit_depth);
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_vertical_4_c(p, dstStride, params.mblim, params.lim,
+            aom_lpf_vertical_6_c(p, dst_stride, params.mblim, params.lim,
                                  params.hev_thr);
           break;
+#endif
         // apply 8-tap filtering
         case 8:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
-            aom_highbd_lpf_vertical_8_c(CONVERT_TO_SHORTPTR(p), dstStride,
-                                        params.mblim, params.lim,
-                                        params.hev_thr, cm->bit_depth);
+            aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
+                                      params.mblim, params.lim, params.hev_thr,
+                                      cm->bit_depth);
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_vertical_8_c(p, dstStride, params.mblim, params.lim,
-                                 params.hev_thr);
+            aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
+                               params.hev_thr);
           break;
         // apply 16-tap filtering
         case 16:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
-            aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dstStride,
+#if CONFIG_DEBLOCK_13TAP
+            // TODO(olah): Remove _c once SIMD for 13-tap is available
+            aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dst_stride,
                                          params.mblim, params.lim,
                                          params.hev_thr, cm->bit_depth);
+#else
+            aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
+                                       params.mblim, params.lim, params.hev_thr,
+                                       cm->bit_depth);
+#endif
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_vertical_16_c(p, dstStride, params.mblim, params.lim,
+#if CONFIG_DEBLOCK_13TAP
+            aom_lpf_vertical_16_c(p, dst_stride, params.mblim, params.lim,
                                   params.hev_thr);
+#else
+          aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
+                              params.hev_thr);
+#endif
           break;
         // no filtering
         default: break;
       }
       // process the internal edge
-      if (params.filterLengthInternal) {
+      if (params.filter_length_internal) {
 #if CONFIG_HIGHBITDEPTH
         if (cm->use_highbitdepth)
-          aom_highbd_lpf_vertical_4_c(CONVERT_TO_SHORTPTR(p + 4), dstStride,
-                                      params.mblim, params.lim, params.hev_thr,
-                                      cm->bit_depth);
+          aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p + 4), dst_stride,
+                                    params.mblim, params.lim, params.hev_thr,
+                                    cm->bit_depth);
         else
 #endif  // CONFIG_HIGHBITDEPTH
-          aom_lpf_vertical_4_c(p + 4, dstStride, params.mblim, params.lim,
-                               params.hev_thr);
+          aom_lpf_vertical_4(p + 4, dst_stride, params.mblim, params.lim,
+                             params.hev_thr);
       }
+#endif  // CONFIG_LPF_DIRECT
       // advance the destination pointer
       p += MI_SIZE;
     }
   }
 }
 
-static void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
-                                        const int plane,
-                                        const MACROBLOCKD_PLANE *const pPlane,
-                                        const MODE_INFO **ppModeInfo,
-                                        const uint32_t cuX,
-                                        const uint32_t cuY) {
+static void av1_filter_block_plane_horz(
+    const AV1_COMMON *const cm, const int plane,
+    const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
+    const uint32_t mi_col) {
   const int col_step = MI_SIZE >> MI_SIZE_LOG2;
   const int row_step = MI_SIZE >> MI_SIZE_LOG2;
-  const uint32_t scaleHorz = pPlane->subsampling_x;
-  const uint32_t scaleVert = pPlane->subsampling_y;
-  const uint32_t width = pPlane->dst.width;
-  const uint32_t height = pPlane->dst.height;
-  uint8_t *const pDst = pPlane->dst.buf;
-  const int dstStride = pPlane->dst.stride;
-  for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += row_step) {
-    uint8_t *p = pDst + y * MI_SIZE * dstStride;
-    for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += col_step) {
+  const uint32_t scale_horz = plane_ptr->subsampling_x;
+  const uint32_t scale_vert = plane_ptr->subsampling_y;
+  uint8_t *const dst_ptr = plane_ptr->dst.buf;
+  const int dst_stride = plane_ptr->dst.stride;
+#if CONFIG_LPF_SB
+  int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
+  y_range = AOMMIN(y_range, cm->mi_rows);
+  y_range >>= scale_vert;
+
+  int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
+  x_range = AOMMIN(x_range, cm->mi_cols);
+  x_range >>= scale_horz;
+#else
+  const int y_range = (MAX_MIB_SIZE >> scale_vert);
+  const int x_range = (MAX_MIB_SIZE >> scale_horz);
+#endif  // CONFIG_LPF_SB
+  for (int y = 0; y < y_range; y += row_step) {
+    uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
+    for (int x = 0; x < x_range; x += col_step) {
       // inner loop always filter vertical edges in a MI block. If MI size
       // is 8x8, it will first filter the vertical edge aligned with a 8x8
       // block. If 4x4 trasnform is used, it will then filter the internal
       // edge aligned with a 4x4 block
-      const MODE_INFO **const pCurr =
-          ppModeInfo + (y << scaleVert) * cm->mi_stride + (x << scaleHorz);
+      const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+      const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
       AV1_DEBLOCKING_PARAMETERS params;
       memset(&params, 0, sizeof(params));
-      set_lpf_parameters(&params, pCurr, (cm->mi_stride << scaleVert), cm,
-                         HORZ_EDGE, cuX + x * MI_SIZE, cuY + y * MI_SIZE, width,
-                         height, plane, pPlane, scaleHorz, scaleVert);
-      switch (params.filterLength) {
+
+      set_lpf_parameters(&params, (cm->mi_stride << scale_vert), cm, HORZ_EDGE,
+                         curr_x, curr_y, plane, plane_ptr);
+
+#if CONFIG_LPF_DIRECT
+      uint8_t *const src = plane_ptr->dst.buf0;
+      const int width = cm->width >> scale_horz;
+      const int height = cm->height >> scale_vert;
+      const int pivot = 8;
+      const int line_length = 16;
+      uint8_t block[256];
+      int orig_pos[256];
+      const int vert_or_horz = 1;  // 1: horizontal
+      const int unit = 1;
+      int i;
+      for (i = 0; i < 256; ++i) {
+        block[i] = 0;
+        orig_pos[i] = -1;
+      }
+
+      if (params.filter_length) {
+        const int filt_len = params.filter_length == 16 ? 8 : 4;
+        const int direct =
+            pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
+                                 dst_stride, unit, vert_or_horz);
+
+        pick_filter_block_horz(src, block, orig_pos, filt_len, curr_y, curr_x,
+                               width, height, dst_stride, pivot, line_length,
+                               unit, direct);
+        uint8_t *const filt_start = block + pivot * line_length;
+        switch (params.filter_length) {
+          // apply 4-tap filtering
+          case 4:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
+                                          line_length, params.mblim, params.lim,
+                                          params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
+                                   params.lim, params.hev_thr);
+            break;
+          // apply 8-tap filtering
+          case 8:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(filt_start),
+                                          line_length, params.mblim, params.lim,
+                                          params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_horizontal_8(filt_start, line_length, params.mblim,
+                                   params.lim, params.hev_thr);
+            break;
+          // apply 16-tap filtering
+          case 16:
+#if CONFIG_HIGHBITDEPTH
+            if (cm->use_highbitdepth)
+              aom_highbd_lpf_horizontal_edge_16(
+                  CONVERT_TO_SHORTPTR(filt_start), line_length, params.mblim,
+                  params.lim, params.hev_thr, cm->bit_depth);
+            else
+#endif  // CONFIG_HIGHBITDEPTH
+              aom_lpf_horizontal_edge_16(filt_start, line_length, params.mblim,
+                                         params.lim, params.hev_thr);
+            break;
+          // no filtering
+          default: break;
+        }
+
+        for (i = 0; i < 256; ++i) {
+          if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
+        }
+      }
+      if (params.filter_length_internal) {
+        for (i = 0; i < 256; ++i) {
+          block[i] = 0;
+          orig_pos[i] = -1;
+        }
+
+        const int direct =
+            pick_min_grad_direct(src, 4, curr_y + 4, curr_x, width, height,
+                                 dst_stride, unit, vert_or_horz);
+
+        pick_filter_block_horz(src, block, orig_pos, 4, curr_y + 4, curr_x,
+                               width, height, dst_stride, pivot, line_length,
+                               unit, direct);
+
+        uint8_t *const filt_start = block + pivot * line_length;
+#if CONFIG_HIGHBITDEPTH
+        if (cm->use_highbitdepth)
+          aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
+                                      line_length, params.mblim, params.lim,
+                                      params.hev_thr, cm->bit_depth);
+        else
+#endif  // CONFIG_HIGHBITDEPTH
+          aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
+                               params.lim, params.hev_thr);
+
+        for (i = 0; i < 256; ++i) {
+          if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
+        }
+      }
+#else  // !CONFIG_LPF_DIRECT
+      switch (params.filter_length) {
         // apply 4-tap filtering
         case 4:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
-            aom_highbd_lpf_horizontal_4_c(CONVERT_TO_SHORTPTR(p), dstStride,
+            aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
+                                        params.mblim, params.lim,
+                                        params.hev_thr, cm->bit_depth);
+          else
+#endif  // CONFIG_HIGHBITDEPTH
+            aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
+                                 params.hev_thr);
+          break;
+#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
+        // apply 6-tap filtering
+        case 6: assert(plane != 0);
+#if CONFIG_HIGHBITDEPTH
+          if (cm->use_highbitdepth)
+            aom_highbd_lpf_horizontal_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
                                           params.mblim, params.lim,
                                           params.hev_thr, cm->bit_depth);
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_horizontal_4_c(p, dstStride, params.mblim, params.lim,
+            aom_lpf_horizontal_6_c(p, dst_stride, params.mblim, params.lim,
                                    params.hev_thr);
           break;
+#endif
         // apply 8-tap filtering
         case 8:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
-            aom_highbd_lpf_horizontal_8_c(CONVERT_TO_SHORTPTR(p), dstStride,
-                                          params.mblim, params.lim,
-                                          params.hev_thr, cm->bit_depth);
+            aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
+                                        params.mblim, params.lim,
+                                        params.hev_thr, cm->bit_depth);
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_horizontal_8_c(p, dstStride, params.mblim, params.lim,
-                                   params.hev_thr);
+            aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
+                                 params.hev_thr);
           break;
         // apply 16-tap filtering
         case 16:
 #if CONFIG_HIGHBITDEPTH
           if (cm->use_highbitdepth)
+#if CONFIG_DEBLOCK_13TAP
+            // TODO(olah): Remove _c once SIMD for 13-tap is available
             aom_highbd_lpf_horizontal_edge_16_c(
-                CONVERT_TO_SHORTPTR(p), dstStride, params.mblim, params.lim,
+                CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
+                params.hev_thr, cm->bit_depth);
+#else
+            aom_highbd_lpf_horizontal_edge_16(
+                CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
                 params.hev_thr, cm->bit_depth);
+#endif
           else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_lpf_horizontal_edge_16_c(p, dstStride, params.mblim, params.lim,
-                                         params.hev_thr);
+#if CONFIG_DEBLOCK_13TAP
+            aom_lpf_horizontal_edge_16_c(p, dst_stride, params.mblim,
+                                         params.lim, params.hev_thr);
+#else
+          aom_lpf_horizontal_edge_16(p, dst_stride, params.mblim, params.lim,
+                                     params.hev_thr);
+#endif
           break;
         // no filtering
         default: break;
       }
       // process the internal edge
-      if (params.filterLengthInternal) {
+      if (params.filter_length_internal) {
 #if CONFIG_HIGHBITDEPTH
         if (cm->use_highbitdepth)
-          aom_highbd_lpf_horizontal_4_c(CONVERT_TO_SHORTPTR(p + 4 * dstStride),
-                                        dstStride, params.mblim, params.lim,
-                                        params.hev_thr, cm->bit_depth);
+          aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p + 4 * dst_stride),
+                                      dst_stride, params.mblim, params.lim,
+                                      params.hev_thr, cm->bit_depth);
         else
 #endif  // CONFIG_HIGHBITDEPTH
-          aom_lpf_horizontal_4_c(p + 4 * dstStride, dstStride, params.mblim,
-                                 params.lim, params.hev_thr);
+          aom_lpf_horizontal_4(p + 4 * dst_stride, dst_stride, params.mblim,
+                               params.lim, params.hev_thr);
       }
+#endif  // CONFIG_LPF_DIRECT
       // advance the destination pointer
       p += MI_SIZE;
     }
@@ -3036,9 +3532,12 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
 #endif  // CONFIG_PARALLEL_DEBLOCKING
 
 void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
-                          struct macroblockd_plane planes[MAX_MB_PLANE],
-                          int start, int stop, int y_only) {
-#if CONFIG_UV_LVL
+                          struct macroblockd_plane *planes, int start, int stop,
+#if CONFIG_LPF_SB
+                          int col_start, int col_end,
+#endif
+                          int y_only) {
+#if CONFIG_LOOPFILTER_LEVEL
   // y_only no longer has its original meaning.
   // Here it means which plane to filter
   // when y_only = {0, 1, 2}, it means we are searching for filter level for
@@ -3047,8 +3546,15 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
   const int plane_end = plane_start + 1;
 #else
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
-#endif  // CONFIG_UV_LVL
+  const int plane_start = 0;
+  const int plane_end = num_planes;
+#endif  // CONFIG_LOOPFILTER_LEVEL
+#if !CONFIG_LPF_SB
+  const int col_start = 0;
+  const int col_end = cm->mi_cols;
+#endif  // CONFIG_LPF_SB
   int mi_row, mi_col;
+  int plane;
 
 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
     CONFIG_CB4X4
@@ -3062,19 +3568,13 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 #if CONFIG_VAR_TX
     for (int i = 0; i < MAX_MB_PLANE; ++i)
-      memset(cm->left_txfm_context[i], TX_32X32, MAX_MIB_SIZE
-                                                     << TX_UNIT_HIGH_LOG2);
+      memset(cm->left_txfm_context[i], TX_32X32,
+             MAX_MIB_SIZE << TX_UNIT_HIGH_LOG2);
 #endif  // CONFIG_VAR_TX
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
-      int plane;
-
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
 
-#if CONFIG_UV_LVL
       for (plane = plane_start; plane < plane_end; ++plane) {
-#else
-      for (plane = 0; plane < num_planes; ++plane) {
-#endif  // CONFIG_UV_LVL
         av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
                                           mi_row, mi_col, plane);
         av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
@@ -3086,38 +3586,20 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
 
   // filter all vertical edges in every 64x64 super block
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
-    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+    for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
-#if CONFIG_UV_LVL
-      for (int planeIdx = plane_start; planeIdx < plane_end; ++planeIdx) {
-#else
-      for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
-#endif  // CONFIG_UV_LVL
-        const int32_t scaleHorz = planes[planeIdx].subsampling_x;
-        const int32_t scaleVert = planes[planeIdx].subsampling_y;
-        av1_filter_block_plane_vert(
-            cm, planeIdx, &planes[planeIdx], (const MODE_INFO **)(mi + mi_col),
-            (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert);
+      for (plane = plane_start; plane < plane_end; ++plane) {
+        av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
       }
     }
   }
 
   // filter all horizontal edges in every 64x64 super block
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
-    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+    for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
-#if CONFIG_UV_LVL
-      for (int planeIdx = plane_start; planeIdx < plane_end; ++planeIdx) {
-#else
-      for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
-#endif  // CONFIG_UV_LVL
-        const int32_t scaleHorz = planes[planeIdx].subsampling_x;
-        const int32_t scaleVert = planes[planeIdx].subsampling_y;
-        av1_filter_block_plane_horz(
-            cm, planeIdx, &planes[planeIdx], (const MODE_INFO **)(mi + mi_col),
-            (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert);
+      for (plane = plane_start; plane < plane_end; ++plane) {
+        av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
       }
     }
   }
@@ -3127,30 +3609,20 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
 
 #if CONFIG_PARALLEL_DEBLOCKING
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
-    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
       // filter all vertical edges in every 64x64 super block
-      for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
-        const int32_t scaleHorz = planes[planeIdx].subsampling_x;
-        const int32_t scaleVert = planes[planeIdx].subsampling_y;
-        av1_filter_block_plane_vert(
-            cm, planeIdx, planes + planeIdx, (const MODE_INFO **)(mi + mi_col),
-            (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert);
+      for (plane = plane_start; plane < plane_end; plane += 1) {
+        av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
       }
     }
   }
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
-    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
       // filter all horizontal edges in every 64x64 super block
-      for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
-        const int32_t scaleHorz = planes[planeIdx].subsampling_x;
-        const int32_t scaleVert = planes[planeIdx].subsampling_y;
-        av1_filter_block_plane_horz(
-            cm, planeIdx, planes + planeIdx, (const MODE_INFO **)(mi + mi_col),
-            (mi_col * MI_SIZE) >> scaleHorz, (mi_row * MI_SIZE) >> scaleVert);
+      for (plane = plane_start; plane < plane_end; plane += 1) {
+        av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
       }
     }
   }
@@ -3170,8 +3642,6 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
-      int plane;
-
       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
 
       // TODO(JBB): Make setup_mask work for non 420.
@@ -3205,13 +3675,60 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
 }
 
 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                           MACROBLOCKD *xd, int frame_filter_level, int y_only,
-                           int partial_frame) {
+                           MACROBLOCKD *xd, int frame_filter_level,
+#if CONFIG_LOOPFILTER_LEVEL
+                           int frame_filter_level_r,
+#endif
+                           int y_only, int partial_frame
+#if CONFIG_LPF_SB
+                           ,
+                           int mi_row, int mi_col
+#endif
+                           ) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  int orig_filter_level[2] = { cm->lf.filter_level[0], cm->lf.filter_level[1] };
+#else
   int orig_filter_level = cm->lf.filter_level;
 #endif
+#endif
+
+#if CONFIG_LPF_SB
+  if (partial_frame && !frame_filter_level) return;
+#else
+#if CONFIG_LOOPFILTER_LEVEL
+  if (!frame_filter_level && !frame_filter_level_r) return;
+#else
   if (!frame_filter_level) return;
+#endif
+#endif  // CONFIG_LPF_SB
+#if CONFIG_LPF_SB
+  int start_mi_col;
+  int end_mi_col;
+
+  // In the experiment of deblocking filtering per superblock.
+  // When partial_frame is 1, it indicates we are searching for the best filter
+  // level for current superblock. We reuse frame_filter_level as filter level
+  // for superblock, no longer for the whole frame.
+  // When partial_frame is 0, it's in the actual filtering stage for the frame
+  if (partial_frame) {
+    start_mi_row = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
+    start_mi_col = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
+    const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+    const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+    end_mi_row = AOMMIN(mi_row_range, cm->mi_rows);
+    end_mi_col = AOMMIN(mi_col_range, cm->mi_cols);
+
+    av1_loop_filter_sb_level_init(cm, mi_row, mi_col, frame_filter_level);
+  } else {
+    start_mi_row = 0;
+    mi_rows_to_filter = cm->mi_rows;
+    end_mi_row = start_mi_row + mi_rows_to_filter;
+    start_mi_col = 0;
+    end_mi_col = cm->mi_cols;
+  }
+#else
   start_mi_row = 0;
   mi_rows_to_filter = cm->mi_rows;
   if (partial_frame && cm->mi_rows > 8) {
@@ -3220,19 +3737,46 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
     mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
-  av1_loop_filter_frame_init(cm, frame_filter_level);
+#if CONFIG_LOOPFILTER_LEVEL
+  // TODO(chengchen): refactor the code such that y_only has its matching
+  // meaning. Now it means the plane to be filtered in this experiment.
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
+                             y_only);
+#else
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
+#endif
+#endif  // CONFIG_LPF_SB
+
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  cm->lf.filter_level[0] = frame_filter_level;
+  cm->lf.filter_level[1] = frame_filter_level_r;
+#else
   cm->lf.filter_level = frame_filter_level;
 #endif
+#endif
+
+#if CONFIG_LPF_SB
+  av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row,
+                       start_mi_col, end_mi_col, y_only);
+#else
   av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
+#endif  // CONFIG_LPF_SB
+
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  cm->lf.filter_level[0] = orig_filter_level[0];
+  cm->lf.filter_level[1] = orig_filter_level[1];
+#else
   cm->lf.filter_level = orig_filter_level;
 #endif
+#endif
 }
 
-void av1_loop_filter_data_reset(
-    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
-    struct AV1Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
+void av1_loop_filter_data_reset(LFWorkerData *lf_data,
+                                YV12_BUFFER_CONFIG *frame_buffer,
+                                struct AV1Common *cm,
+                                const struct macroblockd_plane *planes) {
   lf_data->frame_buffer = frame_buffer;
   lf_data->cm = cm;
   lf_data->start = 0;
@@ -3243,7 +3787,11 @@ void av1_loop_filter_data_reset(
 
 int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
   (void)unused;
+#if !CONFIG_LPF_SB
   av1_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                        lf_data->start, lf_data->stop, lf_data->y_only);
+#else
+  (void)lf_data;
+#endif  // CONFIG_LPF_SB
   return 1;
 }
diff --git a/third_party/aom/av1/common/av1_loopfilter.h b/third_party/aom/av1/common/av1_loopfilter.h
index 043081e57..ee32c368c 100644
--- a/third_party/aom/av1/common/av1_loopfilter.h
+++ b/third_party/aom/av1/common/av1_loopfilter.h
@@ -36,10 +36,12 @@ enum lf_path {
 };
 
 struct loopfilter {
-  int filter_level;
-#if CONFIG_UV_LVL
+#if CONFIG_LOOPFILTER_LEVEL
+  int filter_level[2];
   int filter_level_u;
   int filter_level_v;
+#else
+  int filter_level;
 #endif
 
   int sharpness_level;
@@ -49,14 +51,13 @@ struct loopfilter {
   uint8_t mode_ref_delta_update;
 
   // 0 = Intra, Last, Last2+Last3(CONFIG_EXT_REFS),
-  // GF, BRF(CONFIG_EXT_REFS),
-  // ARF2(CONFIG_EXT_REFS+CONFIG_ALTREF2), ARF
-  signed char ref_deltas[TOTAL_REFS_PER_FRAME];
-  signed char last_ref_deltas[TOTAL_REFS_PER_FRAME];
+  // GF, BRF(CONFIG_EXT_REFS), ARF2(CONFIG_EXT_REFS), ARF
+  int8_t ref_deltas[TOTAL_REFS_PER_FRAME];
+  int8_t last_ref_deltas[TOTAL_REFS_PER_FRAME];
 
   // 0 = ZERO_MV, MV
-  signed char mode_deltas[MAX_MODE_LF_DELTAS];
-  signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+  int8_t mode_deltas[MAX_MODE_LF_DELTAS];
+  int8_t last_mode_deltas[MAX_MODE_LF_DELTAS];
 };
 
 // Need to align this structure so when it is declared and
@@ -69,7 +70,11 @@ typedef struct {
 
 typedef struct {
   loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
+#if CONFIG_LOOPFILTER_LEVEL
+  uint8_t lvl[MAX_SEGMENTS][2][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+#else
   uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+#endif
 } loop_filter_info_n;
 
 // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
@@ -132,17 +137,42 @@ void av1_loop_filter_init(struct AV1Common *cm);
 // This should be called before av1_loop_filter_rows(),
 // av1_loop_filter_frame()
 // calls this function directly.
-void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl);
+void av1_loop_filter_frame_init(struct AV1Common *cm, int default_filt_lvl,
+                                int default_filt_lvl_r
+#if CONFIG_LOOPFILTER_LEVEL
+                                ,
+                                int plane
+#endif
+                                );
+
+#if CONFIG_LPF_SB
+void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
+                           struct macroblockd *mbd, int filter_level,
+                           int y_only, int partial_frame, int mi_row,
+                           int mi_col);
+
+// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
+void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
+                          struct AV1Common *cm,
+                          struct macroblockd_plane *planes, int start, int stop,
+                          int col_start, int col_end, int y_only);
 
+void av1_loop_filter_sb_level_init(struct AV1Common *cm, int mi_row, int mi_col,
+                                   int lvl);
+#else
 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                            struct macroblockd *mbd, int filter_level,
+#if CONFIG_LOOPFILTER_LEVEL
+                           int filter_level_r,
+#endif
                            int y_only, int partial_frame);
 
 // Apply the loop filter to [start, stop) macro block rows in frame_buffer.
 void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
                           struct AV1Common *cm,
-                          struct macroblockd_plane planes[MAX_MB_PLANE],
-                          int start, int stop, int y_only);
+                          struct macroblockd_plane *planes, int start, int stop,
+                          int y_only);
+#endif  // CONFIG_LPF_SB
 
 typedef struct LoopFilterWorkerData {
   YV12_BUFFER_CONFIG *frame_buffer;
@@ -154,9 +184,10 @@ typedef struct LoopFilterWorkerData {
   int y_only;
 } LFWorkerData;
 
-void av1_loop_filter_data_reset(
-    LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
-    struct AV1Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]);
+void av1_loop_filter_data_reset(LFWorkerData *lf_data,
+                                YV12_BUFFER_CONFIG *frame_buffer,
+                                struct AV1Common *cm,
+                                const struct macroblockd_plane *planes);
 
 // Operates on the rows described by 'lf_data'.
 int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
diff --git a/third_party/aom/av1/common/av1_rtcd_defs.pl b/third_party/aom/av1/common/av1_rtcd_defs.pl
index 88e9ea4d8..203426e59 100755
--- a/third_party/aom/av1/common/av1_rtcd_defs.pl
+++ b/third_party/aom/av1/common/av1_rtcd_defs.pl
@@ -24,7 +24,6 @@ struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
-typedef uint16_t od_dering_in;
 EOF
 }
 forward_decls qw/av1_common_forward_decls/;
@@ -64,86 +63,94 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
 # Inverse dct
 #
 if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  {
-    add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht4x4_16_add sse2/;
+  add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht4x4_16_add sse2/;
 
-    add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht4x8_32_add sse2/;
+  add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht4x8_32_add sse2/;
 
-    add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x4_32_add sse2/;
+  add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht8x4_32_add sse2/;
 
-    add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x16_128_add sse2/;
+  add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht8x16_128_add sse2/;
 
-    add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht16x8_128_add sse2/;
+  add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht16x8_128_add sse2/;
 
-    add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht16x32_512_add sse2/;
+  add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht16x32_512_add sse2/;
 
-    add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht32x16_512_add sse2/;
+  add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht32x16_512_add sse2/;
 
-    add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x8_64_add sse2/;
+  add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht8x8_64_add sse2/;
 
-    add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-      specialize qw/av1_iht16x16_256_add sse2 avx2/;
+  add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+  specialize qw/av1_iht16x16_256_add sse2 avx2/;
 
-    add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-  }
+  add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
 } else {
-  {
-    add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-    specialize qw/av1_iht4x4_16_add sse2 neon dspr2/;
+  add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
+    specialize qw/av1_iht4x4_16_add sse2 neon/;
+  }
 
-    add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht4x8_32_add sse2/;
+  add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht4x8_32_add sse2/;
 
-    add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x4_32_add sse2/;
+  add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht8x4_32_add sse2/;
 
-    add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x16_128_add sse2/;
+  add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht8x16_128_add sse2/;
 
-    add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht16x8_128_add sse2/;
+  add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht16x8_128_add sse2/;
 
-    add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht16x32_512_add sse2/;
+  add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht16x32_512_add sse2/;
 
-    add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht32x16_512_add sse2/;
+  add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  specialize qw/av1_iht32x16_512_add sse2/;
 
-    add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-    add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-      specialize qw/av1_iht8x8_64_add sse2 neon dspr2/;
+  add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
+    specialize qw/av1_iht8x8_64_add sse2 neon/;
+  }
 
-    add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-      specialize qw/av1_iht16x16_256_add sse2 avx2 dspr2/;
+  add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
+    specialize qw/av1_iht16x16_256_add sse2 avx2/;
+  }
 
-    add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+  add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
 
-    if (aom_config("CONFIG_EXT_TX") ne "yes") {
+  if (aom_config("CONFIG_EXT_TX") ne "yes") {
+    if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
       specialize qw/av1_iht4x4_16_add msa/;
+    }
+    if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
       specialize qw/av1_iht8x8_64_add msa/;
+    }
+    if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
       specialize qw/av1_iht16x16_256_add msa/;
     }
   }
@@ -153,6 +160,8 @@ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *out
 
 if (aom_config("CONFIG_TX64X64") eq "yes") {
   add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+  add_proto qw/void av1_iht32x64_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+  add_proto qw/void av1_iht64x32_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
 }
 
 if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
@@ -256,63 +265,41 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
 }
 
 #inv txfm
-add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
-add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
-add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_16x16 sse4_1/;
-add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
-add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
+  specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
+}
+add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
+  specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
+}
+add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
+  specialize qw/av1_inv_txfm2d_add_16x16 sse4_1/;
+}
+add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
+  specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
+}
+if (aom_config("CONFIG_TX64X64") eq "yes") {
+  add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
+}
 
 #
 # Encoder functions below this point.
 #
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 
-# ENCODEMB INVOKE
-
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    # the transform coefficients are held in 32-bit
-    # values, so the assembler code for  av1_block_error can no longer be used.
-    add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-    specialize qw/av1_block_error avx2/;
-
-    add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    if (aom_config("CONFIG_TX64X64") eq "yes") {
-      add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-    }
-
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-  } else {
-    add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-    specialize qw/av1_block_error avx2 msa/, "$sse2_x86inc";
-
-    add_proto qw/int64_t av1_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
-    specialize qw/av1_block_error_fp neon/, "$sse2_x86inc";
-
-    add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-
-    if (aom_config("CONFIG_TX64X64") eq "yes") {
-      add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-    }
+  # ENCODEMB INVOKE
 
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
-  }
-} else {
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     # the transform coefficients are held in 32-bit
     # values, so the assembler code for  av1_block_error can no longer be used.
@@ -328,8 +315,6 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
     if (aom_config("CONFIG_TX64X64") eq "yes") {
       add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     }
-
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
   } else {
     add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
     specialize qw/av1_block_error sse2 avx2 msa/;
@@ -347,249 +332,257 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
       add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
     }
 
-    add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-    specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
   }
 
-}
-
-# fdct functions
+  # fdct functions
 
-add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht4x4 sse2/;
+  add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
+    specialize qw/av1_fht4x4 sse2/;
+  }
 
-add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
 
-add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht8x8 sse2/;
+  add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
+    specialize qw/av1_fht8x8 sse2/;
+  }
 
-add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht16x16 sse2 avx2/;
+  add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
+    specialize qw/av1_fht16x16 sse2 avx2/;
+  }
 
-add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht32x32 sse2 avx2/;
+  add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
+    specialize qw/av1_fht32x32 sse2 avx2/;
+  }
 
-if (aom_config("CONFIG_TX64X64") eq "yes") {
-  add_proto qw/void av1_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-}
+  if (aom_config("CONFIG_TX64X64") eq "yes") {
+    add_proto qw/void av1_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    add_proto qw/void av1_fht64x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    add_proto qw/void av1_fht32x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  }
 
-add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht4x8 sse2/;
+  add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht4x8 sse2/;
 
-add_proto qw/void av1_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht8x4 sse2/;
+  add_proto qw/void av1_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht8x4 sse2/;
 
-add_proto qw/void av1_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht8x16 sse2/;
+  add_proto qw/void av1_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht8x16 sse2/;
 
-add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht16x8 sse2/;
+  add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht16x8 sse2/;
 
-add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht16x32 sse2/;
+  add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht16x32 sse2/;
 
-add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-specialize qw/av1_fht32x16 sse2/;
+  add_proto qw/void av1_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  specialize qw/av1_fht32x16 sse2/;
 
-add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
 
-add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
 
-add_proto qw/void av1_fht8x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  add_proto qw/void av1_fht8x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
 
-add_proto qw/void av1_fht32x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  add_proto qw/void av1_fht32x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
 
-if (aom_config("CONFIG_HIGHBITDEPTH") ne "yes") {
-  if (aom_config("CONFIG_EXT_TX") ne "yes") {
-    specialize qw/av1_fht4x4 msa/;
-    specialize qw/av1_fht8x8 msa/;
-    specialize qw/av1_fht16x16 msa/;
+  if (aom_config("CONFIG_HIGHBITDEPTH") ne "yes") {
+    if (aom_config("CONFIG_EXT_TX") ne "yes") {
+      if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
+        specialize qw/av1_fht4x4 msa/;
+      }
+      if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
+        specialize qw/av1_fht8x8 msa/;
+      }
+      if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
+        specialize qw/av1_fht16x16 msa/;
+      }
+    }
   }
-}
 
-add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
-
-if (aom_config("CONFIG_DPCM_INTRA") eq "yes") {
-  @sizes = (4, 8, 16, 32);
-  foreach $size (@sizes) {
-    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-      add_proto "void", "av1_hbd_dpcm_ft$size", "const int16_t *input, int stride, TX_TYPE_1D tx_type, tran_low_t *output, int dir";
-    }
-    add_proto "void", "av1_dpcm_ft$size", "const int16_t *input, int stride, TX_TYPE_1D tx_type, tran_low_t *output";
+  add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type";
+
+  #fwd txfm
+  add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  if (aom_config("CONFIG_DAALA_DCT4") ne "yes") {
+    specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
+  }
+  add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  if (aom_config("CONFIG_DAALA_DCT8") ne "yes") {
+    specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
+  }
+  add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  if (aom_config("CONFIG_DAALA_DCT16") ne "yes") {
+    specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
+  }
+  add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
+    specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
   }
-}
 
-#fwd txfm
-add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
-add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
-add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
-add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
-add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
-specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
+  if (aom_config("CONFIG_TX64X64") eq "yes") {
+    add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+    add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
+  }
+  #
+  # Motion search
+  #
+  add_proto qw/int av1_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
+  specialize qw/av1_full_search_sad sse3 sse4_1/;
+  $av1_full_search_sad_sse3=av1_full_search_sadx3;
+  $av1_full_search_sad_sse4_1=av1_full_search_sadx8;
 
-#
-# Motion search
-#
-add_proto qw/int av1_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
-specialize qw/av1_full_search_sad sse3 sse4_1/;
-$av1_full_search_sad_sse3=av1_full_search_sadx3;
-$av1_full_search_sad_sse4_1=av1_full_search_sadx8;
+  add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
 
-add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
+  add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
 
-add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
+  add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+  specialize qw/av1_temporal_filter_apply sse2 msa/;
 
-add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
-specialize qw/av1_temporal_filter_apply sse2 msa/;
+  if (aom_config("CONFIG_AOM_QM") eq "yes") {
+    add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
+  } else {
+    add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+  }
 
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-} else {
-  add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-}
+  if (aom_config("CONFIG_LGT_FROM_PRED") eq "yes") {
+    add_proto qw/void flgt2d_from_pred/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+  }
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
 
-  # ENCODEMB INVOKE
-  if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
-    add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+    # ENCODEMB INVOKE
+    if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
+      add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
 
-    add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
 
-    add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
 
-    add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
 
-    if (aom_config("CONFIG_TX64X64") eq "yes") {
-      add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      if (aom_config("CONFIG_TX64X64") eq "yes") {
+        add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
 
-      add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+        add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+      }
     }
-  }
-
-  add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
-  specialize qw/av1_highbd_block_error sse2/;
 
-  # fdct functions
-  if (aom_config("CONFIG_TX64X64") eq "yes") {
-    add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-  }
+    add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+    specialize qw/av1_highbd_block_error sse2/;
 
-  add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+    add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
 
-}
-
-if (aom_config("CONFIG_AOM_QM") eq "yes") {
-  add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
-  add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-
-  if (aom_config("CONFIG_TX64X64") eq "yes") {
-    add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
   }
 
-  add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
-} else {
   add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
   specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
 
-  add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-}
-
-add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+  add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
 
-# End av1_high encoder functions
+  # End av1_high encoder functions
 
-if (aom_config("CONFIG_EXT_INTER") eq "yes") {
   add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
   specialize qw/av1_wedge_sse_from_residuals sse2/;
   add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
   specialize qw/av1_wedge_sign_from_residuals sse2/;
   add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
   specialize qw/av1_wedge_compute_delta_squares sse2/;
-}
 
 }
 # end encoder functions
 
 # If PVQ is enabled, fwd transforms are required by decoder
 if (aom_config("CONFIG_PVQ") eq "yes") {
-# fdct functions
+  # fdct functions
 
-if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-  add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht4x4 sse2/;
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht4x4 sse2/;
 
-  add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht8x8 sse2/;
+    add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht8x8 sse2/;
 
-  add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht16x16 sse2/;
+    add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht16x16 sse2/;
 
-  add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/av1_fwht4x4 sse2/;
-} else {
-  add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht4x4 sse2 msa/;
+    add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fwht4x4 sse2/;
+  } else {
+    add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht4x4 sse2 msa/;
 
-  add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht8x8 sse2 msa/;
+    add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht8x8 sse2 msa/;
 
-  add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht16x16 sse2 msa/;
+    add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
+    specialize qw/av1_fht16x16 sse2 msa/;
 
-  add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/av1_fwht4x4 msa sse2/;
-}
+    add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/av1_fwht4x4 msa sse2/;
+  }
 
 }
 
 # Deringing Functions
 
 if (aom_config("CONFIG_CDEF") eq "yes") {
-  add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
-  add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
-  add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
-  add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
-
-  add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
-  add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
+  add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
+  if (aom_config("CONFIG_CDEF_SINGLEPASS") ne "yes") {
+    add_proto qw/void aom_clpf_block_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
+    add_proto qw/void aom_clpf_hblock_hbd/, "uint16_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
+    add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
+    add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
+    add_proto qw/void cdef_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
+    add_proto qw/void cdef_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir, int damping";
+    add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
+    add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
+    add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
+    add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
+  } else {
+    add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int max";
+  }
+
   add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
   add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
 
-# VS compiling for 32 bit targets does not support vector types in
+  # VS compiling for 32 bit targets does not support vector types in
   # structs as arguments, which makes the v256 type of the intrinsics
   # hard to support, so optimizations for this target are disabled.
   if ($opts{config} !~ /libs-x86-win32-vs.*/) {
-    specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
-    specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
-    specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/;
-    specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/;
-    specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/;
-
-    specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
-    specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+    if (aom_config("CONFIG_CDEF_SINGLEPASS") eq "yes") {
+      specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
+      specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
+      specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+      specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+    } else {
+      specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
+      specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
+      specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
+      specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
+      specialize qw/aom_clpf_hblock sse2 ssse3 sse4_1 neon/;
+      specialize qw/cdef_find_dir sse2 ssse3 sse4_1 neon/;
+      specialize qw/cdef_direction_4x4 sse2 ssse3 sse4_1 neon/;
+      specialize qw/cdef_direction_8x8 sse2 ssse3 sse4_1 neon/;
+
+      specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
+      specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
+      specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+      specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+      specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
+      specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+    }
   }
 }
 
@@ -607,16 +600,9 @@ if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
   add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
   specialize qw/av1_warp_affine sse2 ssse3/;
 
-  if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
-    add_proto qw/void av1_warp_affine_post_round/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
-  }
-
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
     specialize qw/av1_highbd_warp_affine ssse3/;
-    if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
-      add_proto qw/void av1_highbd_warp_affine_post_round/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
-    }
   }
 }
 
@@ -632,7 +618,7 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
   add_proto qw/void apply_selfguided_restoration/, "uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf";
   specialize qw/apply_selfguided_restoration sse4_1/;
 
-  add_proto qw/void av1_selfguided_restoration/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps, int32_t *tmpbuf";
+  add_proto qw/void av1_selfguided_restoration/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
   specialize qw/av1_selfguided_restoration sse4_1/;
 
   add_proto qw/void av1_highpass_filter/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
@@ -642,7 +628,7 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
     add_proto qw/void apply_selfguided_restoration_highbd/, "uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
     specialize qw/apply_selfguided_restoration_highbd sse4_1/;
 
-    add_proto qw/void av1_selfguided_restoration_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps, int32_t *tmpbuf";
+    add_proto qw/void av1_selfguided_restoration_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps";
     specialize qw/av1_selfguided_restoration_highbd sse4_1/;
 
     add_proto qw/void av1_highpass_filter_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
@@ -653,17 +639,40 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
 # CONVOLVE_ROUND/COMPOUND_ROUND functions
 
 if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
-    add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
-    specialize qw/av1_convolve_2d sse2/;
-    add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
-    specialize qw/av1_convolve_rounding avx2/;
+  add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
+  specialize qw/av1_convolve_2d sse2/;
+  add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
+  specialize qw/av1_convolve_rounding avx2/;
+
+  add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
+  if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
+    specialize qw/av1_convolve_2d_scale sse4_1/;
+  }
 
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void av1_highbd_convolve_2d/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
     specialize qw/av1_highbd_convolve_2d ssse3/;
     add_proto qw/void av1_highbd_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits, int bd";
     specialize qw/av1_highbd_convolve_rounding avx2/;
+
+    add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
+    if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
+        specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
+    }
+  }
+}
+
+# INTRA_EDGE functions
+if (aom_config("CONFIG_INTRA_EDGE") eq "yes") {
+  add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
+  specialize qw/av1_filter_intra_edge sse4_1/;
+  add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz";
+  specialize qw/av1_upsample_intra_edge sse4_1/;
+  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
+    specialize qw/av1_filter_intra_edge_high sse4_1/;
+    add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd";
+    specialize qw/av1_upsample_intra_edge_high sse4_1/;
   }
 }
 
-1;
diff --git a/third_party/aom/av1/common/av1_txfm.h b/third_party/aom/av1/common/av1_txfm.h
index 269ef5705..bd365de59 100644
--- a/third_party/aom/av1/common/av1_txfm.h
+++ b/third_party/aom/av1/common/av1_txfm.h
@@ -17,9 +17,16 @@
 #include <stdio.h>
 
 #include "av1/common/enums.h"
+#include "av1/common/blockd.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_dsp_common.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_TXFM_STAGE_NUM 12
+
 static const int cos_bit_min = 10;
 static const int cos_bit_max = 16;
 
@@ -110,27 +117,6 @@ static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
   return round_shift(result_32, bit);
 }
 
-static INLINE int get_max_bit(int x) {
-  int max_bit = -1;
-  while (x) {
-    x = x >> 1;
-    max_bit++;
-  }
-  return max_bit;
-}
-
-// TODO(angiebird): implement SSE
-static INLINE void clamp_block(int16_t *block, int block_size_row,
-                               int block_size_col, int stride, int low,
-                               int high) {
-  int i, j;
-  for (i = 0; i < block_size_row; ++i) {
-    for (j = 0; j < block_size_col; ++j) {
-      block[i * stride + j] = clamp(block[i * stride + j], low, high);
-    }
-  }
-}
-
 typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
                          const int8_t *cos_bit, const int8_t *stage_range);
 
@@ -148,6 +134,7 @@ typedef enum TXFM_TYPE {
   TXFM_TYPE_IDENTITY8,
   TXFM_TYPE_IDENTITY16,
   TXFM_TYPE_IDENTITY32,
+  TXFM_TYPE_IDENTITY64,
 } TXFM_TYPE;
 
 typedef struct TXFM_1D_CFG {
@@ -167,7 +154,7 @@ typedef struct TXFM_2D_FLIP_CFG {
   const TXFM_1D_CFG *row_cfg;
 } TXFM_2D_FLIP_CFG;
 
-static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
+static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
   switch (tx_type) {
     case DCT_DCT:
     case ADST_DCT:
@@ -209,21 +196,171 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
   }
 }
 
+#if CONFIG_TXMG
+static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
+  switch (tx_size) {
+#if CONFIG_CHROMA_2X2
+    case TX_2X2: return TX_2X2;
+#endif
+    case TX_4X4: return TX_4X4;
+    case TX_8X8: return TX_8X8;
+    case TX_16X16: return TX_16X16;
+    case TX_32X32: return TX_32X32;
+#if CONFIG_TX64X64
+    case TX_64X64: return TX_64X64;
+    case TX_32X64: return TX_64X32;
+    case TX_64X32: return TX_32X64;
+#endif
+    case TX_4X8: return TX_8X4;
+    case TX_8X4: return TX_4X8;
+    case TX_8X16: return TX_16X8;
+    case TX_16X8: return TX_8X16;
+    case TX_16X32: return TX_32X16;
+    case TX_32X16: return TX_16X32;
+    case TX_4X16: return TX_16X4;
+    case TX_16X4: return TX_4X16;
+    case TX_8X32: return TX_32X8;
+    case TX_32X8: return TX_8X32;
+    default: assert(0); return TX_INVALID;
+  }
+}
+
+static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
+  switch (tx_type) {
+    case DCT_DCT: return DCT_DCT;
+    case ADST_DCT: return DCT_ADST;
+    case DCT_ADST: return ADST_DCT;
+    case ADST_ADST: return ADST_ADST;
+#if CONFIG_EXT_TX
+    case FLIPADST_DCT: return DCT_FLIPADST;
+    case DCT_FLIPADST: return FLIPADST_DCT;
+    case FLIPADST_FLIPADST: return FLIPADST_FLIPADST;
+    case ADST_FLIPADST: return FLIPADST_ADST;
+    case FLIPADST_ADST: return ADST_FLIPADST;
+    case IDTX: return IDTX;
+    case V_DCT: return H_DCT;
+    case H_DCT: return V_DCT;
+    case V_ADST: return H_ADST;
+    case H_ADST: return V_ADST;
+    case V_FLIPADST: return H_FLIPADST;
+    case H_FLIPADST: return V_FLIPADST;
+#endif  // CONFIG_EXT_TX
+#if CONFIG_MRC_TX
+    case MRC_DCT: return MRC_DCT;
+#endif  // CONFIG_MRC_TX
+    default: assert(0); return TX_TYPES;
+  }
+}
+#endif  // CONFIG_TXMG
+
 #if CONFIG_MRC_TX
-static INLINE void get_mrc_mask(const uint8_t *pred, int pred_stride, int *mask,
-                                int mask_stride, int width, int height) {
+static INLINE int get_mrc_diff_mask_inter(const int16_t *diff, int diff_stride,
+                                          uint8_t *mask, int mask_stride,
+                                          int width, int height) {
+  // placeholder mask generation function
+  assert(SIGNAL_MRC_MASK_INTER);
+  int n_masked_vals = 0;
   for (int i = 0; i < height; ++i) {
-    for (int j = 0; j < width; ++j)
+    for (int j = 0; j < width; ++j) {
+      mask[i * mask_stride + j] = diff[i * diff_stride + j] > 100 ? 1 : 0;
+      n_masked_vals += mask[i * mask_stride + j];
+    }
+  }
+  return n_masked_vals;
+}
+
+static INLINE int get_mrc_pred_mask_inter(const uint8_t *pred, int pred_stride,
+                                          uint8_t *mask, int mask_stride,
+                                          int width, int height) {
+  // placeholder mask generation function
+  int n_masked_vals = 0;
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
+      mask[i * mask_stride + j] = pred[i * pred_stride + j] > 100 ? 1 : 0;
+      n_masked_vals += mask[i * mask_stride + j];
+    }
+  }
+  return n_masked_vals;
+}
+
+static INLINE int get_mrc_diff_mask_intra(const int16_t *diff, int diff_stride,
+                                          uint8_t *mask, int mask_stride,
+                                          int width, int height) {
+  // placeholder mask generation function
+  assert(SIGNAL_MRC_MASK_INTRA);
+  int n_masked_vals = 0;
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
+      mask[i * mask_stride + j] = diff[i * diff_stride + j] > 100 ? 1 : 0;
+      n_masked_vals += mask[i * mask_stride + j];
+    }
+  }
+  return n_masked_vals;
+}
+
+static INLINE int get_mrc_pred_mask_intra(const uint8_t *pred, int pred_stride,
+                                          uint8_t *mask, int mask_stride,
+                                          int width, int height) {
+  // placeholder mask generation function
+  int n_masked_vals = 0;
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
       mask[i * mask_stride + j] = pred[i * pred_stride + j] > 100 ? 1 : 0;
+      n_masked_vals += mask[i * mask_stride + j];
+    }
+  }
+  return n_masked_vals;
+}
+
+static INLINE int get_mrc_diff_mask(const int16_t *diff, int diff_stride,
+                                    uint8_t *mask, int mask_stride, int width,
+                                    int height, int is_inter) {
+  if (is_inter) {
+    assert(USE_MRC_INTER && "MRC invalid for inter blocks");
+    assert(SIGNAL_MRC_MASK_INTER);
+    return get_mrc_diff_mask_inter(diff, diff_stride, mask, mask_stride, width,
+                                   height);
+  } else {
+    assert(USE_MRC_INTRA && "MRC invalid for intra blocks");
+    assert(SIGNAL_MRC_MASK_INTRA);
+    return get_mrc_diff_mask_intra(diff, diff_stride, mask, mask_stride, width,
+                                   height);
+  }
+}
+
+static INLINE int get_mrc_pred_mask(const uint8_t *pred, int pred_stride,
+                                    uint8_t *mask, int mask_stride, int width,
+                                    int height, int is_inter) {
+  if (is_inter) {
+    assert(USE_MRC_INTER && "MRC invalid for inter blocks");
+    return get_mrc_pred_mask_inter(pred, pred_stride, mask, mask_stride, width,
+                                   height);
+  } else {
+    assert(USE_MRC_INTRA && "MRC invalid for intra blocks");
+    return get_mrc_pred_mask_intra(pred, pred_stride, mask, mask_stride, width,
+                                   height);
   }
 }
+
+static INLINE int is_valid_mrc_mask(int n_masked_vals, int width, int height) {
+  return !(n_masked_vals == 0 || n_masked_vals == (width * height));
+}
 #endif  // CONFIG_MRC_TX
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size);
-TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type);
+void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
+                             const TXFM_2D_FLIP_CFG *cfg, int bd);
+
+void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
+                             const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
+                             int bd);
+
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size);
+#if CONFIG_TX64X64
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type);
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type);
+TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type);
+#endif  // CONFIG_TX64X64
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size);
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
diff --git a/third_party/aom/av1/common/blockd.h b/third_party/aom/av1/common/blockd.h
index 8ea64628e..01a449a1c 100644
--- a/third_party/aom/av1/common/blockd.h
+++ b/third_party/aom/av1/common/blockd.h
@@ -31,9 +31,6 @@
 #include "av1/common/pvq_state.h"
 #include "av1/decoder/decint.h"
 #endif
-#if CONFIG_CFL
-#include "av1/common/cfl.h"
-#endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -46,8 +43,6 @@ extern "C" {
 
 #define MAX_MB_PLANE 3
 
-#if CONFIG_EXT_INTER
-
 #if CONFIG_COMPOUND_SEGMENT
 // Set COMPOUND_SEGMENT_TYPE to one of the three
 // 0: Uniform
@@ -68,20 +63,28 @@ typedef enum {
 } SEG_MASK_TYPE;
 
 #endif  // CONFIG_COMPOUND_SEGMENT
-#endif  // CONFIG_EXT_INTER
 
 typedef enum {
   KEY_FRAME = 0,
   INTER_FRAME = 1,
+#if CONFIG_OBU
+  INTRA_ONLY_FRAME = 2,  // replaces intra-only
+  S_FRAME = 3,
+#endif
   FRAME_TYPES,
 } FRAME_TYPE;
 
+static INLINE int is_comp_ref_allowed(BLOCK_SIZE bsize) {
+  (void)bsize;
+#if SUB8X8_COMP_REF
+  return 1;
+#else
+  return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
+#endif  // SUB8X8_COMP_REF
+}
+
 static INLINE int is_inter_mode(PREDICTION_MODE mode) {
-#if CONFIG_EXT_INTER
   return mode >= NEARESTMV && mode <= NEW_NEWMV;
-#else
-  return mode >= NEARESTMV && mode <= NEWMV;
-#endif  // CONFIG_EXT_INTER
 }
 
 #if CONFIG_PVQ
@@ -110,12 +113,22 @@ typedef struct PVQ_QUEUE {
 } PVQ_QUEUE;
 #endif
 
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+typedef struct superblock_mi_boundaries {
+  int mi_row_begin;
+  int mi_col_begin;
+  int mi_row_end;
+  int mi_col_end;
+} SB_MI_BD;
+
+typedef struct { int16_t KERNEL[4][MAX_SB_SIZE][MAX_SB_SIZE]; } NCOBMC_KERNELS;
+#endif
+
 typedef struct {
   uint8_t *plane[MAX_MB_PLANE];
   int stride[MAX_MB_PLANE];
 } BUFFER_SET;
 
-#if CONFIG_EXT_INTER
 static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
   return mode >= NEARESTMV && mode <= NEWMV;
 }
@@ -142,13 +155,11 @@ static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
     MB_MODE_COUNT,  // D153_PRED
     MB_MODE_COUNT,  // D207_PRED
     MB_MODE_COUNT,  // D63_PRED
-#if CONFIG_ALT_INTRA
     MB_MODE_COUNT,  // SMOOTH_PRED
 #if CONFIG_SMOOTH_HV
     MB_MODE_COUNT,  // SMOOTH_V_PRED
     MB_MODE_COUNT,  // SMOOTH_H_PRED
 #endif              // CONFIG_SMOOTH_HV
-#endif              // CONFIG_ALT_INTRA
     MB_MODE_COUNT,  // TM_PRED
     MB_MODE_COUNT,  // NEARESTMV
     MB_MODE_COUNT,  // NEARMV
@@ -190,13 +201,11 @@ static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) {
     MB_MODE_COUNT,  // D153_PRED
     MB_MODE_COUNT,  // D207_PRED
     MB_MODE_COUNT,  // D63_PRED
-#if CONFIG_ALT_INTRA
     MB_MODE_COUNT,  // SMOOTH_PRED
 #if CONFIG_SMOOTH_HV
     MB_MODE_COUNT,  // SMOOTH_V_PRED
     MB_MODE_COUNT,  // SMOOTH_H_PRED
 #endif              // CONFIG_SMOOTH_HV
-#endif              // CONFIG_ALT_INTRA
     MB_MODE_COUNT,  // TM_PRED
     MB_MODE_COUNT,  // NEARESTMV
     MB_MODE_COUNT,  // NEARMV
@@ -265,17 +274,6 @@ static INLINE int is_masked_compound_type(COMPOUND_TYPE type) {
   return 0;
 }
 
-#else   // !CONFIG_EXT_INTER
-
-static INLINE int have_nearmv_in_inter_mode(PREDICTION_MODE mode) {
-  return (mode == NEARMV);
-}
-
-static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
-  return (mode == NEWMV);
-}
-#endif  // CONFIG_EXT_INTER
-
 /* For keyframes, intra block modes are predicted by the (already decoded)
    modes for the Y blocks to the left and above us; for interframes, there
    is a single probability table. */
@@ -284,21 +282,17 @@ typedef struct {
   PREDICTION_MODE as_mode;
   int_mv as_mv[2];  // first, second inter predictor motion vectors
   int_mv pred_mv[2];
-#if CONFIG_EXT_INTER
   int_mv ref_mv[2];
-#endif  // CONFIG_EXT_INTER
 } b_mode_info;
 
 typedef int8_t MV_REFERENCE_FRAME;
 
-#if CONFIG_PALETTE
 typedef struct {
   // Number of base colors for Y (0) and UV (1)
   uint8_t palette_size[2];
   // Value of base colors for Y, U, and V
   uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
 } PALETTE_MODE_INFO;
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
 #define USE_3TAP_INTRA_FILTER 1  // 0: 4-tap; 1: 3-tap
@@ -328,9 +322,7 @@ typedef struct RD_STATS {
   int skip;  // sse should equal to dist when skip == 1
   int64_t ref_rdcost;
   int zero_rate;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  int64_t dist_y;
-#endif
+  uint8_t invalid_rate;
 #if CONFIG_RD_DEBUG
   int txb_coeff_cost[MAX_MB_PLANE];
 #if CONFIG_VAR_TX
@@ -340,7 +332,6 @@ typedef struct RD_STATS {
 #endif  // CONFIG_RD_DEBUG
 } RD_STATS;
 
-#if CONFIG_EXT_INTER
 // This struct is used to group function args that are commonly
 // sent together in functions related to interinter compound modes
 typedef struct {
@@ -354,7 +345,6 @@ typedef struct {
 #endif  // CONFIG_COMPOUND_SEGMENT
   COMPOUND_TYPE interinter_compound_type;
 } INTERINTER_COMPOUND_DATA;
-#endif  // CONFIG_EXT_INTER
 
 // This structure now relates to 8x8 block regions.
 typedef struct MB_MODE_INFO {
@@ -376,26 +366,28 @@ typedef struct MB_MODE_INFO {
 #endif                      // CONFIG_SUPERTX
   int8_t seg_id_predicted;  // valid only when temporal_update is enabled
 
+#if CONFIG_MRC_TX
+  int valid_mrc_mask;
+#endif  // CONFIG_MRC_TX
+
   // Only for INTRA blocks
   UV_PREDICTION_MODE uv_mode;
-#if CONFIG_PALETTE
+
   PALETTE_MODE_INFO palette_mode_info;
-#endif  // CONFIG_PALETTE
 #if CONFIG_INTRABC
   uint8_t use_intrabc;
 #endif  // CONFIG_INTRABC
 
-// Only for INTER blocks
-#if CONFIG_DUAL_FILTER
-  InterpFilter interp_filter[4];
-#else
-  InterpFilter interp_filter;
-#endif
+  // Only for INTER blocks
+  InterpFilters interp_filters;
   MV_REFERENCE_FRAME ref_frame[2];
   TX_TYPE tx_type;
 #if CONFIG_TXK_SEL
   TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
 #endif
+#if CONFIG_LGT_FROM_PRED
+  int use_lgt;
+#endif
 
 #if CONFIG_FILTER_INTRA
   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
@@ -409,7 +401,6 @@ typedef struct MB_MODE_INFO {
 #endif  // CONFIG_INTRA_INTERP
 #endif  // CONFIG_EXT_INTRA
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
   // interintra members
   INTERINTRA_MODE interintra_mode;
@@ -427,7 +418,6 @@ typedef struct MB_MODE_INFO {
 #if CONFIG_COMPOUND_SEGMENT
   SEG_MASK_TYPE mask_type;
 #endif  // CONFIG_COMPOUND_SEGMENT
-#endif  // CONFIG_EXT_INTER
   MOTION_MODE motion_mode;
 #if CONFIG_MOTION_VAR
   int overlappable_neighbors[2];
@@ -437,7 +427,7 @@ typedef struct MB_MODE_INFO {
   // blocks. A rectangular block is divided into two squared blocks and each
   // squared block has an interpolation mode.
   NCOBMC_MODE ncobmc_mode[2];
-#endif
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR
   int_mv mv[2];
   int_mv pred_mv[2];
@@ -451,11 +441,12 @@ typedef struct MB_MODE_INFO {
 #endif  // CONFIG_NEW_QUANT
   /* deringing gain *per-superblock* */
   int8_t cdef_strength;
-#if CONFIG_DELTA_Q
   int current_q_index;
 #if CONFIG_EXT_DELTA_Q
   int current_delta_lf_from_base;
-#endif
+#if CONFIG_LOOPFILTER_LEVEL
+  int curr_delta_lf[FRAME_LF_COUNT];
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
 #if CONFIG_RD_DEBUG
   RD_STATS rd_stats;
@@ -470,11 +461,17 @@ typedef struct MB_MODE_INFO {
 #if CONFIG_CFL
   // Index of the alpha Cb and alpha Cr combination
   int cfl_alpha_idx;
-  // Signs of alpha Cb and alpha Cr
-  CFL_SIGN_TYPE cfl_alpha_signs[CFL_PRED_PLANES];
+  // Joint sign of alpha Cb and alpha Cr
+  int cfl_alpha_signs;
 #endif
 
   BOUNDARY_TYPE boundary_info;
+#if CONFIG_LPF_SB
+  uint8_t filt_lvl;
+  int reuse_sb_lvl;
+  int sign;
+  int delta;
+#endif
 } MB_MODE_INFO;
 
 typedef struct MODE_INFO {
@@ -500,23 +497,22 @@ static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
 #if CONFIG_CFL
 static INLINE PREDICTION_MODE get_uv_mode(UV_PREDICTION_MODE mode) {
   static const PREDICTION_MODE uv2y[UV_INTRA_MODES] = {
-    DC_PRED,    // UV_DC_PRED
-    V_PRED,     // UV_V_PRED
-    H_PRED,     // UV_H_PRED
-    D45_PRED,   // UV_D45_PRED
-    D135_PRED,  // UV_D135_PRED
-    D117_PRED,  // UV_D117_PRED
-    D153_PRED,  // UV_D153_PRED
-    D207_PRED,  // UV_D207_PRED
-    D63_PRED,   // UV_D63_PRED
-#if CONFIG_ALT_INTRA
+    DC_PRED,      // UV_DC_PRED
+    V_PRED,       // UV_V_PRED
+    H_PRED,       // UV_H_PRED
+    D45_PRED,     // UV_D45_PRED
+    D135_PRED,    // UV_D135_PRED
+    D117_PRED,    // UV_D117_PRED
+    D153_PRED,    // UV_D153_PRED
+    D207_PRED,    // UV_D207_PRED
+    D63_PRED,     // UV_D63_PRED
     SMOOTH_PRED,  // UV_SMOOTH_PRED
 #if CONFIG_SMOOTH_HV
     SMOOTH_V_PRED,  // UV_SMOOTH_V_PRED
     SMOOTH_H_PRED,  // UV_SMOOTH_H_PRED
 #endif              // CONFIG_SMOOTH_HV
-#endif              // CONFIG_ALT_INTRA
     TM_PRED,        // UV_TM_PRED
+    DC_PRED,        // CFL_PRED
   };
   return uv2y[mode];
 }
@@ -578,14 +574,11 @@ static INLINE int is_global_mv_block(const MODE_INFO *mi, int block,
   const int block_size_allowed = 1;
 #else
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
-  const int block_size_allowed = (bsize >= BLOCK_8X8);
+  const int block_size_allowed =
+      AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
 #endif  // GLOBAL_SUB8X8_USED
-#if CONFIG_EXT_INTER
   return (mode == ZEROMV || mode == ZERO_ZEROMV) && type > TRANSLATION &&
          block_size_allowed;
-#else
-  return mode == ZEROMV && type > TRANSLATION && block_size_allowed;
-#endif  // CONFIG_EXT_INTER
 }
 #endif  // CONFIG_GLOBAL_MOTION
 
@@ -613,9 +606,7 @@ typedef struct macroblockd_plane {
   dequant_val_type_nuq seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES]
                                       [COEF_BANDS];
 #endif
-#if CONFIG_PALETTE
   uint8_t *color_index_map;
-#endif  // CONFIG_PALETTE
 
   // number of 4x4s in current block
   uint16_t n4_w, n4_h;
@@ -625,8 +616,8 @@ typedef struct macroblockd_plane {
   uint8_t width, height;
 
 #if CONFIG_AOM_QM
-  const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
-  const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
+  qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
+  qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES_ALL];
 #endif
   // encoder
   const int16_t *dequant;
@@ -659,6 +650,63 @@ typedef struct RefBuffer {
 typedef int16_t EobThresholdMD[TX_TYPES][EOB_THRESHOLD_NUM];
 #endif
 
+#if CONFIG_LOOP_RESTORATION
+typedef struct {
+  DECLARE_ALIGNED(16, InterpKernel, vfilter);
+  DECLARE_ALIGNED(16, InterpKernel, hfilter);
+} WienerInfo;
+
+typedef struct {
+  int ep;
+  int xqd[2];
+} SgrprojInfo;
+#endif  // CONFIG_LOOP_RESTORATION
+
+#if CONFIG_CFL
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#define CFL_SUB8X8_VAL_MI_SIZE (4)
+#define CFL_SUB8X8_VAL_MI_SQUARE \
+  (CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+typedef struct cfl_ctx {
+  // The CfL prediction buffer is used in two steps:
+  //   1. Stores Q3 reconstructed luma pixels
+  //      (only Q2 is required, but Q3 is used to avoid shifts)
+  //   2. Stores Q3 AC contributions (step1 - tx block avg)
+  int16_t pred_buf_q3[MAX_SB_SQUARE];
+
+  // Height and width currently used in the CfL prediction buffer.
+  int buf_height, buf_width;
+
+  // Height and width of the chroma prediction block currently associated with
+  // this context
+  int uv_height, uv_width;
+
+  int are_parameters_computed;
+
+  // Chroma subsampling
+  int subsampling_x, subsampling_y;
+
+  // Block level DC_PRED for each chromatic plane
+  int dc_pred[CFL_PRED_PLANES];
+
+  int mi_row, mi_col;
+
+  // Whether the reconstructed luma pixels need to be stored
+  int store_y;
+
+#if CONFIG_CB4X4
+  int is_chroma_reference;
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+  // The prediction used for sub8x8 blocks originates from multiple luma blocks,
+  // this array is used to validate that cfl_store() is called only once for
+  // each luma block
+  uint8_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#endif  // CONFIG_CB4X4
+} CFL_CTX;
+#endif  // CONFIG_CFL
+
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
   uint8_t bmode_blocks_wl;
@@ -684,7 +732,7 @@ typedef struct macroblockd {
 
   const aom_prob (*partition_probs)[PARTITION_TYPES - 1];
 
-  /* Distance of MB away from frame edges */
+  /* Distance of MB away from frame edges in subpixels (1/8th pixel)  */
   int mb_to_left_edge;
   int mb_to_right_edge;
   int mb_to_top_edge;
@@ -720,6 +768,11 @@ typedef struct macroblockd {
 #endif
 #endif
 
+#if CONFIG_LOOP_RESTORATION
+  WienerInfo wiener_info[MAX_MB_PLANE];
+  SgrprojInfo sgrproj_info[MAX_MB_PLANE];
+#endif  // CONFIG_LOOP_RESTORATION
+
   // block dimension in the unit of mode_info.
   uint8_t n8_w, n8_h;
 
@@ -737,12 +790,14 @@ typedef struct macroblockd {
   int qindex[MAX_SEGMENTS];
   int lossless[MAX_SEGMENTS];
   int corrupted;
-
+#if CONFIG_AMVR
+  int cur_frame_mv_precision_level;
+// same with that in AV1_COMMON
+#endif
   struct aom_internal_error_info *error_info;
 #if CONFIG_GLOBAL_MOTION
   WarpedMotionParams *global_motion;
 #endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_DELTA_Q
   int prev_qindex;
   int delta_qindex;
   int current_qindex;
@@ -755,19 +810,45 @@ typedef struct macroblockd {
   // superblock's actual lf and current lf.
   int prev_delta_lf_from_base;
   int current_delta_lf_from_base;
-#endif
+#if CONFIG_LOOPFILTER_LEVEL
+  // For this experiment, we have four frame filter levels for different plane
+  // and direction. So, to support the per superblock update, we need to add
+  // a few more params as below.
+  // 0: delta loop filter level for y plane vertical
+  // 1: delta loop filter level for y plane horizontal
+  // 2: delta loop filter level for u plane
+  // 3: delta loop filter level for v plane
+  // To make it consistent with the reference to each filter level in segment,
+  // we need to -1, since
+  // SEG_LVL_ALT_LF_Y_V = 1;
+  // SEG_LVL_ALT_LF_Y_H = 2;
+  // SEG_LVL_ALT_LF_U   = 3;
+  // SEG_LVL_ALT_LF_V   = 4;
+  int prev_delta_lf[FRAME_LF_COUNT];
+  int curr_delta_lf[FRAME_LF_COUNT];
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
 #if CONFIG_ADAPT_SCAN
   const EobThresholdMD *eob_threshold_md;
 #endif
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SEGMENT
+#if CONFIG_COMPOUND_SEGMENT
   DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SEGMENT
+#endif  // CONFIG_COMPOUND_SEGMENT
+
+#if CONFIG_MRC_TX
+  uint8_t *mrc_mask;
+#endif  // CONFIG_MRC_TX
 
 #if CONFIG_CFL
   CFL_CTX *cfl;
 #endif
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  uint8_t *ncobmc_pred_buf[MAX_MB_PLANE];
+  int ncobmc_pred_buf_stride[MAX_MB_PLANE];
+  SB_MI_BD sb_mi_bd;
+#endif
 } MACROBLOCKD;
 
 static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
@@ -792,13 +873,11 @@ static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
   DCT_ADST,   // D153
   DCT_ADST,   // D207
   ADST_DCT,   // D63
-#if CONFIG_ALT_INTRA
   ADST_ADST,  // SMOOTH
 #if CONFIG_SMOOTH_HV
   ADST_DCT,   // SMOOTH_V
   DCT_ADST,   // SMOOTH_H
 #endif        // CONFIG_SMOOTH_HV
-#endif        // CONFIG_ALT_INTRA
   ADST_ADST,  // TM
 };
 
@@ -816,78 +895,100 @@ static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
 static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
 #endif  // CONFIG_RECT_TX
 
-#if CONFIG_EXT_TX
-#define ALLOW_INTRA_EXT_TX 1
+static INLINE int block_signals_txsize(BLOCK_SIZE bsize) {
+#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
+  return bsize > BLOCK_4X4;
+#else
+  return bsize >= BLOCK_8X8;
+#endif
+}
 
-typedef enum {
-  // DCT only
-  EXT_TX_SET_DCTONLY = 0,
-  // DCT + Identity only
-  EXT_TX_SET_DCT_IDTX,
 #if CONFIG_MRC_TX
-  // DCT + MRC_DCT
-  EXT_TX_SET_MRC_DCT,
-  // DCT + MRC_DCT + IDTX
-  EXT_TX_SET_MRC_DCT_IDTX,
+#define USE_MRC_INTRA 0
+#define USE_MRC_INTER 1
+#define SIGNAL_MRC_MASK_INTRA (USE_MRC_INTRA && 0)
+#define SIGNAL_MRC_MASK_INTER (USE_MRC_INTER && 1)
+#define SIGNAL_ANY_MRC_MASK (SIGNAL_MRC_MASK_INTRA || SIGNAL_MRC_MASK_INTER)
 #endif  // CONFIG_MRC_TX
-  // Discrete Trig transforms w/o flip (4) + Identity (1)
-  EXT_TX_SET_DTT4_IDTX,
-  // Discrete Trig transforms w/o flip (4) + Identity (1) + 1D Hor/vert DCT (2)
-  EXT_TX_SET_DTT4_IDTX_1DDCT,
-  // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver DCT (2)
-  EXT_TX_SET_DTT9_IDTX_1DDCT,
-  // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6)
-  EXT_TX_SET_ALL16,
-  EXT_TX_SET_TYPES
-} TxSetType;
 
-#if CONFIG_MRC_TX
+#if CONFIG_EXT_TX
+#define ALLOW_INTRA_EXT_TX 1
+
 // Number of transform types in each set type
-static const int num_ext_tx_set[EXT_TX_SET_TYPES] = {
-  1, 2, 2, 3, 5, 7, 12, 16
+static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = {
+  1, 2,
+#if CONFIG_MRC_TX
+  2, 3,
+#endif  // CONFIG_MRC_TX
+  5, 7, 12, 16,
 };
 
-// Maps intra set index to the set type
-static const int ext_tx_set_type_intra[EXT_TX_SETS_INTRA] = {
-  EXT_TX_SET_DCTONLY, EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX,
-  EXT_TX_SET_MRC_DCT
+static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
+                                                      EXT_TX_SETS_INTER)] = {
+  {
+      // Intra
+      EXT_TX_SET_DCTONLY, EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX,
+#if CONFIG_MRC_TX
+      EXT_TX_SET_MRC_DCT,
+#endif  // CONFIG_MRC_TX
+  },
+  {
+      // Inter
+      EXT_TX_SET_DCTONLY, EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT,
+      EXT_TX_SET_DCT_IDTX,
+#if CONFIG_MRC_TX
+      EXT_TX_SET_MRC_DCT_IDTX,
+#endif  // CONFIG_MRC_TX
+  }
 };
 
-// Maps inter set index to the set type
-static const int ext_tx_set_type_inter[EXT_TX_SETS_INTER] = {
-  EXT_TX_SET_DCTONLY, EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT,
-  EXT_TX_SET_DCT_IDTX, EXT_TX_SET_MRC_DCT_IDTX
+#if CONFIG_MRC_TX
+static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
+  },
+  {
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+  },
 };
-
-// Maps set types above to the indices used for intra
-static const int ext_tx_set_index_intra[EXT_TX_SET_TYPES] = { 0, -1, 3,  -1,
-                                                              2, 1,  -1, -1 };
-
-// Maps set types above to the indices used for inter
-static const int ext_tx_set_index_inter[EXT_TX_SET_TYPES] = { 0,  3,  -1, 4,
-                                                              -1, -1, 2,  1 };
 #else   // CONFIG_MRC_TX
-// Number of transform types in each set type
-static const int num_ext_tx_set[EXT_TX_SET_TYPES] = { 1, 2, 5, 7, 12, 16 };
-
-// Maps intra set index to the set type
-static const int ext_tx_set_type_intra[EXT_TX_SETS_INTRA] = {
-  EXT_TX_SET_DCTONLY, EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX
-};
-
-// Maps inter set index to the set type
-static const int ext_tx_set_type_inter[EXT_TX_SETS_INTER] = {
-  EXT_TX_SET_DCTONLY, EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT,
-  EXT_TX_SET_DCT_IDTX
-};
-
-// Maps set types above to the indices used for intra
-static const int ext_tx_set_index_intra[EXT_TX_SET_TYPES] = { 0, -1, 2,
-                                                              1, -1, -1 };
-
-// Maps set types above to the indices used for inter
-static const int ext_tx_set_index_inter[EXT_TX_SET_TYPES] = {
-  0, 3, -1, -1, 2, 1
+static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+  },
+  {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  },
 };
 #endif  // CONFIG_MRC_TX
 
@@ -897,15 +998,19 @@ static INLINE TxSetType get_ext_tx_set_type(TX_SIZE tx_size, BLOCK_SIZE bs,
   const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
 #if CONFIG_CB4X4 && USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
   (void)bs;
-  if (tx_size_sqr > TX_32X32) return EXT_TX_SET_DCTONLY;
+  if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
 #else
-  if (tx_size_sqr > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
+  if (tx_size_sqr_up > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
 #endif
   if (use_reduced_set)
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
 #if CONFIG_MRC_TX
-  if (tx_size == TX_32X32)
-    return is_inter ? EXT_TX_SET_MRC_DCT_IDTX : EXT_TX_SET_MRC_DCT;
+  if (tx_size == TX_32X32) {
+    if (is_inter && USE_MRC_INTER)
+      return EXT_TX_SET_MRC_DCT_IDTX;
+    else if (!is_inter && USE_MRC_INTRA)
+      return EXT_TX_SET_MRC_DCT;
+  }
 #endif  // CONFIG_MRC_TX
   if (tx_size_sqr_up == TX_32X32)
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY;
@@ -917,133 +1022,69 @@ static INLINE TxSetType get_ext_tx_set_type(TX_SIZE tx_size, BLOCK_SIZE bs,
                                     : EXT_TX_SET_DTT4_IDTX_1DDCT);
 }
 
+// Maps tx set types to the indices.
+static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
+  {
+      // Intra
+      0, -1,
+#if CONFIG_MRC_TX
+      3, -1,
+#endif  // CONFIG_MRC_TX
+      2, 1, -1, -1,
+  },
+  {
+      // Inter
+      0, 3,
+#if CONFIG_MRC_TX
+      -1, 4,
+#endif  // CONFIG_MRC_TX
+      -1, -1, 2, 1,
+  },
+};
+
 static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter,
                                  int use_reduced_set) {
   const TxSetType set_type =
       get_ext_tx_set_type(tx_size, bs, is_inter, use_reduced_set);
-  return is_inter ? ext_tx_set_index_inter[set_type]
-                  : ext_tx_set_index_intra[set_type];
+  return ext_tx_set_index[is_inter][set_type];
 }
 
-#if CONFIG_MRC_TX
-static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] =
-    {
-#if CONFIG_CHROMA_2X2
-      { 1, 1, 1, 1, 1 },  // unused
-      { 0, 1, 1, 0, 0 },
-      { 0, 0, 0, 1, 0 },
-      { 0, 0, 0, 0, 1 },
-#else
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 },
-      { 0, 0, 1, 0 },
-      { 0, 0, 0, 1 },
-#endif  // CONFIG_CHROMA_2X2
-    };
-
-static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] =
-    {
-#if CONFIG_CHROMA_2X2
-      { 1, 1, 1, 1, 1 },  // unused
-      { 0, 1, 1, 0, 0 }, { 0, 0, 0, 1, 0 },
-      { 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 1 },
-#else
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 }, { 0, 0, 0, 1 },
-#endif  // CONFIG_CHROMA_2X2
-    };
-
-// Transform types used in each intra set
-static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
-};
-
-// Numbers of transform types used in each intra set
-static const int ext_tx_cnt_intra[EXT_TX_SETS_INTRA] = { 1, 7, 5, 2 };
-
-// Transform types used in each inter set
-static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 },
-  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
-};
-
-// Numbers of transform types used in each inter set
-static const int ext_tx_cnt_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2, 3 };
-
-// 1D Transforms used in inter set, this needs to be changed if
-// ext_tx_used_inter is changed
-static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
-  { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 }, { 1, 0, 0, 1 }
-};
-#else  // CONFIG_MRC_TX
-static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] =
-    {
-#if CONFIG_CHROMA_2X2
-      { 1, 1, 1, 1, 1 },  // unused
-      { 0, 1, 1, 0, 0 },
-      { 0, 0, 0, 1, 0 },
-#else
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 },
-      { 0, 0, 1, 0 },
-#endif  // CONFIG_CHROMA_2X2
-    };
-
-static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] =
-    {
-#if CONFIG_CHROMA_2X2
-      { 1, 1, 1, 1, 1 },  // unused
-      { 0, 1, 1, 0, 0 },
-      { 0, 0, 0, 1, 0 },
-      { 0, 0, 0, 0, 1 },
-#else
-      { 1, 1, 1, 1 },  // unused
-      { 1, 1, 0, 0 },
-      { 0, 0, 1, 0 },
-      { 0, 0, 0, 1 },
-#endif  // CONFIG_CHROMA_2X2
-    };
-
-// Transform types used in each intra set
-static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
-};
-
-// Numbers of transform types used in each intra set
-static const int ext_tx_cnt_intra[EXT_TX_SETS_INTRA] = { 1, 7, 5 };
-
-// Transform types used in each inter set
-static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
-  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
-};
-
-// Numbers of transform types used in each inter set
-static const int ext_tx_cnt_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2 };
-
-// 1D Transforms used in inter set, this needs to be changed if
-// ext_tx_used_inter is changed
-static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
-  { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
-};
-#endif  // CONFIG_MRC_TX
-
 static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter,
                                    int use_reduced_set) {
   const int set_type =
       get_ext_tx_set_type(tx_size, bs, is_inter, use_reduced_set);
-  return num_ext_tx_set[set_type];
+  return av1_num_ext_tx_set[set_type];
+}
+
+#if CONFIG_LGT_FROM_PRED
+static INLINE int is_lgt_allowed(PREDICTION_MODE mode, TX_SIZE tx_size) {
+  if (!LGT_FROM_PRED_INTRA && !is_inter_mode(mode)) return 0;
+  if (!LGT_FROM_PRED_INTER && is_inter_mode(mode)) return 0;
+
+  switch (mode) {
+    case D45_PRED:
+    case D63_PRED:
+    case D117_PRED:
+    case V_PRED:
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_V_PRED:
+#endif
+      return tx_size_wide[tx_size] <= 8;
+    case D135_PRED:
+    case D153_PRED:
+    case D207_PRED:
+    case H_PRED:
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_H_PRED:
+#endif
+      return tx_size_high[tx_size] <= 8;
+    case DC_PRED:
+    case SMOOTH_PRED: return 0;
+    case TM_PRED:
+    default: return tx_size_wide[tx_size] <= 8 || tx_size_high[tx_size] <= 8;
+  }
 }
+#endif  // CONFIG_LGT_FROM_PRED
 
 #if CONFIG_RECT_TX
 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
@@ -1063,8 +1104,8 @@ static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
     1,  // BLOCK_16X32
     1,  // BLOCK_32X16
     0,  // BLOCK_32X32
-    0,  // BLOCK_32X64
-    0,  // BLOCK_64X32
+    1,  // BLOCK_32X64
+    1,  // BLOCK_64X32
     0,  // BLOCK_64X64
 #if CONFIG_EXT_PARTITION
     0,  // BLOCK_64X128
@@ -1075,6 +1116,12 @@ static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
     0,  // BLOCK_16X4
     0,  // BLOCK_8X32
     0,  // BLOCK_32X8
+    0,  // BLOCK_16X64
+    0,  // BLOCK_64X16
+#if CONFIG_EXT_PARTITION
+    0,  // BLOCK_32X128
+    0,  // BLOCK_128X32
+#endif  // CONFIG_EXT_PARTITION
   };
 
   return LUT[bsize];
@@ -1118,6 +1165,12 @@ static INLINE int is_quarter_tx_allowed_bsize(BLOCK_SIZE bsize) {
     0,  // BLOCK_16X4
     0,  // BLOCK_8X32
     0,  // BLOCK_32X8
+    0,  // BLOCK_16X64
+    0,  // BLOCK_64X16
+#if CONFIG_EXT_PARTITION
+    0,  // BLOCK_32X128
+    0,  // BLOCK_128X32
+#endif  // CONFIG_EXT_PARTITION
   };
 
   return LUT_QTTX[bsize];
@@ -1168,13 +1221,10 @@ static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
 #define ANGLE_STEP 3
 extern const int16_t dr_intra_derivative[90];
 static const uint8_t mode_to_angle_map[] = {
-  0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
-#if CONFIG_ALT_INTRA
-  0,
+  0, 90, 180, 45, 135, 111, 157, 203, 67, 0, 0,
 #if CONFIG_SMOOTH_HV
   0, 0,
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
 };
 #if CONFIG_INTRA_INTERP
 // Returns whether filter selection is needed for a given
@@ -1210,19 +1260,6 @@ static INLINE int av1_raster_order_to_block_index(TX_SIZE tx_size,
   return (tx_size == TX_4X4) ? raster_order : (raster_order > 0) ? 2 : 0;
 }
 
-#if CONFIG_DPCM_INTRA || CONFIG_LGT
-static INLINE PREDICTION_MODE get_prediction_mode(const MODE_INFO *mi,
-                                                  int plane, TX_SIZE tx_size,
-                                                  int block_idx) {
-  const MB_MODE_INFO *const mbmi = &mi->mbmi;
-  if (is_inter_block(mbmi)) return mbmi->mode;
-
-  int block_raster_idx = av1_block_index_to_raster_order(tx_size, block_idx);
-  return (plane == PLANE_TYPE_Y) ? get_y_mode(mi, block_raster_idx)
-                                 : get_uv_mode(mbmi->uv_mode);
-}
-#endif
-
 static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type,
                                           const MACROBLOCKD *xd, int block_idx,
                                           TX_SIZE tx_size) {
@@ -1273,6 +1310,9 @@ static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type,
 #if CONFIG_EXT_TX
 #if CONFIG_MRC_TX
   if (mbmi->tx_type == MRC_DCT) {
+    assert(((is_inter_block(mbmi) && USE_MRC_INTER) ||
+            (!is_inter_block(mbmi) && USE_MRC_INTRA)) &&
+           "INVALID BLOCK TYPE FOR MRC_DCT");
     if (plane_type == PLANE_TYPE_Y) {
       assert(tx_size == TX_32X32);
       return mbmi->tx_type;
@@ -1415,7 +1455,6 @@ void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
                       int plane, TX_SIZE tx_size, int has_eob, int aoff,
                       int loff);
 
-#if CONFIG_EXT_INTER
 static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
 #if CONFIG_INTERINTRA
   // TODO(debargha): Should this be bsize < BLOCK_LARGEST?
@@ -1464,32 +1503,46 @@ static INLINE int is_interintra_allowed_bsize_group(int group) {
 static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
   return (mbmi->ref_frame[1] == INTRA_FRAME) && is_interintra_allowed(mbmi);
 }
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_VAR_TX
 static INLINE int get_vartx_max_txsize(const MB_MODE_INFO *const mbmi,
-                                       BLOCK_SIZE bsize) {
+                                       BLOCK_SIZE bsize, int subsampled) {
 #if CONFIG_CB4X4
   (void)mbmi;
-  return max_txsize_rect_lookup[bsize];
+  TX_SIZE max_txsize = max_txsize_rect_lookup[bsize];
+#else
+  TX_SIZE max_txsize = mbmi->sb_type < BLOCK_8X8
+                           ? max_txsize_rect_lookup[mbmi->sb_type]
+                           : max_txsize_rect_lookup[bsize];
 #endif  // CONFIG_C4X4
-  return mbmi->sb_type < BLOCK_8X8 ? max_txsize_rect_lookup[mbmi->sb_type]
-                                   : max_txsize_rect_lookup[bsize];
+
+#if CONFIG_EXT_PARTITION && CONFIG_TX64X64
+  // The decoder is designed so that it can process 64x64 luma pixels at a
+  // time. If this is a chroma plane with subsampling and bsize corresponds to
+  // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
+  // mustn't be used for the subsampled plane (because it would be bigger than
+  // a 64x64 luma block) so we round down to TX_32X32.
+  if (subsampled && max_txsize == TX_64X64) max_txsize = TX_32X32;
+#else
+  (void)subsampled;
+#endif
+
+  return max_txsize;
 }
 #endif  // CONFIG_VAR_TX
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 static INLINE int is_motion_variation_allowed_bsize(BLOCK_SIZE bsize) {
-  return (bsize >= BLOCK_8X8);
+  return AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
 }
 
 static INLINE int is_motion_variation_allowed_compound(
     const MB_MODE_INFO *mbmi) {
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!has_second_ref(mbmi) && !is_inter_singleref_comp_mode(mbmi->mode))
 #else
   if (!has_second_ref(mbmi))
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
     return 1;
   else
     return 0;
@@ -1503,7 +1556,15 @@ static INLINE int check_num_overlappable_neighbors(const MB_MODE_INFO *mbmi) {
   return !(mbmi->overlappable_neighbors[0] == 0 &&
            mbmi->overlappable_neighbors[1] == 0);
 }
-#endif
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+static INLINE NCOBMC_MODE ncobmc_mode_allowed_bsize(BLOCK_SIZE bsize) {
+  if (bsize < BLOCK_8X8 || bsize >= BLOCK_64X64)
+    return NO_OVERLAP;
+  else
+    return MAX_NCOBMC_MODES;
+}
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR
 
 static INLINE MOTION_MODE motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
@@ -1514,28 +1575,40 @@ static INLINE MOTION_MODE motion_mode_allowed(
 #endif
     const MODE_INFO *mi) {
   const MB_MODE_INFO *mbmi = &mi->mbmi;
+#if CONFIG_AMVR
+  if (xd->cur_frame_mv_precision_level == 0) {
+#endif
 #if CONFIG_GLOBAL_MOTION
-  const TransformationType gm_type = gm_params[mbmi->ref_frame[0]].wmtype;
-  if (is_global_mv_block(mi, block, gm_type)) return SIMPLE_TRANSLATION;
+    const TransformationType gm_type = gm_params[mbmi->ref_frame[0]].wmtype;
+    if (is_global_mv_block(mi, block, gm_type)) return SIMPLE_TRANSLATION;
 #endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_EXT_INTER
+#if CONFIG_AMVR
+  }
+#endif
   if (is_motion_variation_allowed_bsize(mbmi->sb_type) &&
       is_inter_mode(mbmi->mode) && mbmi->ref_frame[1] != INTRA_FRAME &&
       is_motion_variation_allowed_compound(mbmi)) {
-#else
-  if (is_motion_variation_allowed_bsize(mbmi->sb_type) &&
-      is_inter_mode(mbmi->mode) && is_motion_variation_allowed_compound(mbmi)) {
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR
     if (!check_num_overlappable_neighbors(mbmi)) return SIMPLE_TRANSLATION;
 #endif
 #if CONFIG_WARPED_MOTION
     if (!has_second_ref(mbmi) && mbmi->num_proj_ref[0] >= 1 &&
-        !av1_is_scaled(&(xd->block_refs[0]->sf)))
+        !av1_is_scaled(&(xd->block_refs[0]->sf))) {
+#if CONFIG_AMVR
+      if (xd->cur_frame_mv_precision_level) {
+        return OBMC_CAUSAL;
+      }
+#endif
       return WARPED_CAUSAL;
-    else
+    }
+
 #endif  // CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+    if (ncobmc_mode_allowed_bsize(mbmi->sb_type) < NO_OVERLAP)
+      return NCOBMC_ADAPT_WEIGHT;
+    else
+#endif
       return OBMC_CAUSAL;
 #else
     return SIMPLE_TRANSLATION;
@@ -1545,42 +1618,6 @@ static INLINE MOTION_MODE motion_mode_allowed(
   }
 }
 
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
-static INLINE NCOBMC_MODE ncobmc_mode_allowed_bsize(BLOCK_SIZE bsize) {
-  if (bsize < BLOCK_8X8 || bsize > BLOCK_64X64)
-    return NO_OVERLAP;
-  else
-    return (NCOBMC_MODE)(MAX_NCOBMC_MODES - 1);
-}
-
-static INLINE MOTION_MODE
-motion_mode_allowed_wrapper(int for_mv_search,
-#if CONFIG_GLOBAL_MOTION
-                            int block, const WarpedMotionParams *gm_params,
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
-                            const MACROBLOCKD *xd,
-#endif
-                            const MODE_INFO *mi) {
-  const MB_MODE_INFO *mbmi = &mi->mbmi;
-  MOTION_MODE motion_mode_for_mv_search = motion_mode_allowed(
-#if CONFIG_GLOBAL_MOTION
-      int block, const WarpedMotionParams *gm_params,
-#endif
-#if CONFIG_WARPED_MOTION
-      xd,
-#endif
-      mi);
-  int ncobmc_mode_allowed =
-      ncobmc_mode_allowed_bsize(mbmi->sb_type) && is_inter_mode(mbmi->mode);
-  if (for_mv_search)
-    return motion_mode_for_mv_search;
-  else
-    return ncobmc_mode_allowed ? NCOBMC_ADAPT_WEIGHT
-                               : motion_mode_for_mv_search;
-}
-#endif
-
 static INLINE void assert_motion_mode_valid(MOTION_MODE mode,
 #if CONFIG_GLOBAL_MOTION
                                             int block,
@@ -1590,14 +1627,6 @@ static INLINE void assert_motion_mode_valid(MOTION_MODE mode,
                                             const MACROBLOCKD *xd,
 #endif
                                             const MODE_INFO *mi) {
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  const MOTION_MODE last_motion_mode_allowed =
-      motion_mode_allowed_wrapper(0,
-#if CONFIG_GLOBAL_MOTION
-                                  block, gm_params,
-#endif  // CONFIG_GLOBAL_MOTION
-                                  mi);
-#else
   const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
       block, gm_params,
@@ -1606,7 +1635,7 @@ static INLINE void assert_motion_mode_valid(MOTION_MODE mode,
       xd,
 #endif
       mi);
-#endif
+
   // Check that the input mode is not illegal
   if (last_motion_mode_allowed < mode)
     assert(0 && "Illegal motion mode selected");
@@ -1619,9 +1648,16 @@ static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
 #endif  // CONFIG_MOTION_VAR
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
+static INLINE int av1_allow_palette(int allow_screen_content_tools,
+                                    BLOCK_SIZE sb_type) {
+  return allow_screen_content_tools && sb_type >= BLOCK_8X8 &&
+         sb_type <= BLOCK_LARGEST;
+}
+
 // Returns sub-sampled dimensions of the given block.
 // The output values for 'rows_within_bounds' and 'cols_within_bounds' will
-// differ from 'height' and 'width' when part of the block is outside the right
+// differ from 'height' and 'width' when part of the block is outside the
+// right
 // and/or bottom image boundary.
 static INLINE void av1_get_block_dimensions(BLOCK_SIZE bsize, int plane,
                                             const MACROBLOCKD *xd, int *width,
@@ -1647,6 +1683,24 @@ static INLINE void av1_get_block_dimensions(BLOCK_SIZE bsize, int plane,
   if (cols_within_bounds) *cols_within_bounds = block_cols >> pd->subsampling_x;
 }
 
+/* clang-format off */
+typedef aom_cdf_prob (*MapCdf)[PALETTE_COLOR_INDEX_CONTEXTS]
+                              [CDF_SIZE(PALETTE_COLORS)];
+typedef const int (*ColorCost)[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                              [PALETTE_COLORS];
+/* clang-format on */
+
+typedef struct {
+  int rows;
+  int cols;
+  int n_colors;
+  int plane_width;
+  int plane_height;
+  uint8_t *color_map;
+  MapCdf map_cdf;
+  ColorCost color_cost;
+} Av1ColorMapParam;
+
 #if CONFIG_GLOBAL_MOTION
 static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd) {
   const MODE_INFO *mi = xd->mi[0];
@@ -1660,23 +1714,13 @@ static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd) {
 
   // First check if all modes are ZEROMV
   if (mbmi->sb_type >= BLOCK_8X8 || unify_bsize) {
-#if CONFIG_EXT_INTER
     if (mbmi->mode != ZEROMV && mbmi->mode != ZERO_ZEROMV) return 0;
-#else
-    if (mbmi->mode != ZEROMV) return 0;
-#endif  // CONFIG_EXT_INTER
   } else {
-#if CONFIG_EXT_INTER
     if ((mi->bmi[0].as_mode != ZEROMV && mi->bmi[0].as_mode != ZERO_ZEROMV) ||
         (mi->bmi[1].as_mode != ZEROMV && mi->bmi[1].as_mode != ZERO_ZEROMV) ||
         (mi->bmi[2].as_mode != ZEROMV && mi->bmi[2].as_mode != ZERO_ZEROMV) ||
         (mi->bmi[3].as_mode != ZEROMV && mi->bmi[3].as_mode != ZERO_ZEROMV))
       return 0;
-#else
-    if (mi->bmi[0].as_mode != ZEROMV || mi->bmi[1].as_mode != ZEROMV ||
-        mi->bmi[2].as_mode != ZEROMV || mi->bmi[3].as_mode != ZEROMV)
-      return 0;
-#endif  // CONFIG_EXT_INTER
   }
 
 #if !GLOBAL_SUB8X8_USED
@@ -1695,6 +1739,38 @@ static INLINE PLANE_TYPE get_plane_type(int plane) {
   return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
 }
 
+static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
+                                   const uint8_t *src, int src_stride, int w,
+                                   int h) {
+  int r, c;
+  for (r = 0; r < h; ++r)
+    for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
+}
+
+static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
+                                    const uint16_t *src, int src_stride, int w,
+                                    int h) {
+  int r, c;
+  for (r = 0; r < h; ++r)
+    for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
+}
+
+static INLINE void transpose_int16(int16_t *dst, int dst_stride,
+                                   const int16_t *src, int src_stride, int w,
+                                   int h) {
+  int r, c;
+  for (r = 0; r < h; ++r)
+    for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
+}
+
+static INLINE void transpose_int32(int32_t *dst, int dst_stride,
+                                   const int32_t *src, int src_stride, int w,
+                                   int h) {
+  int r, c;
+  for (r = 0; r < h; ++r)
+    for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/cdef.c b/third_party/aom/av1/common/cdef.c
index ba8abbbe0..397a14845 100644
--- a/third_party/aom/av1/common/cdef.c
+++ b/third_party/aom/av1/common/cdef.c
@@ -16,7 +16,7 @@
 #include "./aom_scale_rtcd.h"
 #include "aom/aom_integer.h"
 #include "av1/common/cdef.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/reconinter.h"
 
@@ -50,8 +50,8 @@ static int is_8x8_block_skip(MODE_INFO **grid, int mi_row, int mi_col,
   return is_skip;
 }
 
-int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
-                           dering_list *dlist, int filter_skip) {
+int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
+                         cdef_list *dlist, int filter_skip) {
   int r, c;
   int maxc, maxr;
   MODE_INFO **grid;
@@ -156,82 +156,82 @@ static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
 
 void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                     MACROBLOCKD *xd) {
-  int sbr, sbc;
-  int nhsb, nvsb;
-  uint16_t src[OD_DERING_INBUF_SIZE];
+  int fbr, fbc;
+  int nhfb, nvfb;
+  uint16_t src[CDEF_INBUF_SIZE];
   uint16_t *linebuf[3];
   uint16_t *colbuf[3];
-  dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
-  unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
-  int dering_count;
-  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
-  int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+  cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
+  unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef;
+  int cdef_count;
+  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
+  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
   int stride;
   int mi_wide_l2[3];
   int mi_high_l2[3];
   int xdec[3];
   int ydec[3];
   int pli;
-  int dering_left;
+  int cdef_left;
   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
-  int nplanes = 3;
-  int chroma_dering =
-      xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
-      xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
-  nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-  nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  int nplanes = MAX_MB_PLANE;
+  int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
+                    xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
+  nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
-  row_dering = aom_malloc(sizeof(*row_dering) * (nhsb + 2) * 2);
-  memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2);
-  prev_row_dering = row_dering + 1;
-  curr_row_dering = prev_row_dering + nhsb + 2;
+  row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
+  memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
+  prev_row_cdef = row_cdef + 1;
+  curr_row_cdef = prev_row_cdef + nhfb + 2;
   for (pli = 0; pli < nplanes; pli++) {
     xdec[pli] = xd->plane[pli].subsampling_x;
     ydec[pli] = xd->plane[pli].subsampling_y;
     mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
     mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
+    if (xdec[pli] != ydec[pli]) nplanes = 1;
   }
-  stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * OD_FILT_HBORDER;
+  stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
   for (pli = 0; pli < nplanes; pli++) {
-    linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride);
+    linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
     colbuf[pli] =
         aom_malloc(sizeof(*colbuf) *
-                   ((MAX_SB_SIZE << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER) *
-                   OD_FILT_HBORDER);
+                   ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
+                   CDEF_HBORDER);
   }
-  for (sbr = 0; sbr < nvsb; sbr++) {
+  for (fbr = 0; fbr < nvfb; fbr++) {
     for (pli = 0; pli < nplanes; pli++) {
       const int block_height =
-          (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
-      fill_rect(colbuf[pli], OD_FILT_HBORDER, block_height, OD_FILT_HBORDER,
-                OD_DERING_VERY_LARGE);
+          (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
+      fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
+                CDEF_VERY_LARGE);
     }
-    dering_left = 1;
-    for (sbc = 0; sbc < nhsb; sbc++) {
-      int level, clpf_strength;
-      int uv_level, uv_clpf_strength;
+    cdef_left = 1;
+    for (fbc = 0; fbc < nhfb; fbc++) {
+      int level, sec_strength;
+      int uv_level, uv_sec_strength;
       int nhb, nvb;
       int cstart = 0;
-      curr_row_dering[sbc] = 0;
-      if (cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
-                              MI_SIZE_64X64 * sbc] == NULL ||
-          cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
-                              MI_SIZE_64X64 * sbc]
+      curr_row_cdef[fbc] = 0;
+      if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+                              MI_SIZE_64X64 * fbc] == NULL ||
+          cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+                              MI_SIZE_64X64 * fbc]
                   ->mbmi.cdef_strength == -1) {
-        dering_left = 0;
+        cdef_left = 0;
         continue;
       }
-      if (!dering_left) cstart = -OD_FILT_HBORDER;
-      nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
-      nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
+      if (!cdef_left) cstart = -CDEF_HBORDER;
+      nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
+      nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
       int tile_top, tile_left, tile_bottom, tile_right;
-      int mi_idx = MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
+      int mi_idx = MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
       MODE_INFO *const mi_tl = cm->mi + mi_idx;
       BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
       tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
       tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
 
-      if (sbr != nvsb - 1 &&
+      if (fbr != nvfb - 1 &&
           (&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
         tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
                           .mbmi.boundary_info &
@@ -239,197 +239,216 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
       else
         tile_bottom = 1;
 
-      if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
+      if (fbc != nhfb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
         tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
                      TILE_RIGHT_BOUNDARY;
       else
         tile_right = 1;
 
       const int mbmi_cdef_strength =
-          cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
-                              MI_SIZE_64X64 * sbc]
+          cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+                              MI_SIZE_64X64 * fbc]
               ->mbmi.cdef_strength;
-      level = cm->cdef_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
-      clpf_strength = cm->cdef_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
-      clpf_strength += clpf_strength == 3;
-      uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
-      uv_clpf_strength =
-          cm->cdef_uv_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
-      uv_clpf_strength += uv_clpf_strength == 3;
-      if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
-           uv_clpf_strength == 0) ||
-          (dering_count = sb_compute_dering_list(
-               cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64, dlist,
-               get_filter_skip(level) || get_filter_skip(uv_level))) == 0) {
-        dering_left = 0;
+      level = cm->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
+      sec_strength =
+          cm->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
+      sec_strength += sec_strength == 3;
+      uv_level = cm->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
+      uv_sec_strength =
+          cm->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
+      uv_sec_strength += uv_sec_strength == 3;
+      if ((level == 0 && sec_strength == 0 && uv_level == 0 &&
+           uv_sec_strength == 0) ||
+          (cdef_count = sb_compute_cdef_list(
+               cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist,
+#if CONFIG_CDEF_SINGLEPASS
+               (level & 1) || (uv_level & 1))) == 0)
+#else
+                 get_filter_skip(level) || get_filter_skip(uv_level))) == 0)
+#endif
+      {
+        cdef_left = 0;
         continue;
       }
 
-      curr_row_dering[sbc] = 1;
+      curr_row_cdef[fbc] = 1;
       for (pli = 0; pli < nplanes; pli++) {
-        uint16_t dst[MAX_SB_SIZE * MAX_SB_SIZE];
+#if !CONFIG_CDEF_SINGLEPASS
+        uint16_t dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE];
+#endif
         int coffset;
         int rend, cend;
-        int clpf_damping = cm->cdef_clpf_damping;
-        int dering_damping = cm->cdef_dering_damping;
+        int pri_damping = cm->cdef_pri_damping;
+        int sec_damping = cm->cdef_sec_damping;
         int hsize = nhb << mi_wide_l2[pli];
         int vsize = nvb << mi_high_l2[pli];
 
         if (pli) {
-          if (chroma_dering)
+          if (chroma_cdef)
             level = uv_level;
           else
             level = 0;
-          clpf_strength = uv_clpf_strength;
+          sec_strength = uv_sec_strength;
         }
 
-        if (sbc == nhsb - 1)
+        if (fbc == nhfb - 1)
           cend = hsize;
         else
-          cend = hsize + OD_FILT_HBORDER;
+          cend = hsize + CDEF_HBORDER;
 
-        if (sbr == nvsb - 1)
+        if (fbr == nvfb - 1)
           rend = vsize;
         else
-          rend = vsize + OD_FILT_VBORDER;
+          rend = vsize + CDEF_VBORDER;
 
-        coffset = sbc * MI_SIZE_64X64 << mi_wide_l2[pli];
-        if (sbc == nhsb - 1) {
+        coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
+        if (fbc == nhfb - 1) {
           /* On the last superblock column, fill in the right border with
-             OD_DERING_VERY_LARGE to avoid filtering with the outside. */
-          fill_rect(&src[cend + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-                    rend + OD_FILT_VBORDER, hsize + OD_FILT_HBORDER - cend,
-                    OD_DERING_VERY_LARGE);
+             CDEF_VERY_LARGE to avoid filtering with the outside. */
+          fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
+                    rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
+                    CDEF_VERY_LARGE);
         }
-        if (sbr == nvsb - 1) {
+        if (fbr == nvfb - 1) {
           /* On the last superblock row, fill in the bottom border with
-             OD_DERING_VERY_LARGE to avoid filtering with the outside. */
-          fill_rect(&src[(rend + OD_FILT_VBORDER) * OD_FILT_BSTRIDE],
-                    OD_FILT_BSTRIDE, OD_FILT_VBORDER,
-                    hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+             CDEF_VERY_LARGE to avoid filtering with the outside. */
+          fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
+                    CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
         }
         /* Copy in the pixels we need from the current superblock for
            deringing.*/
-        copy_sb8_16(
-            cm,
-            &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
-            OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
-            (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr, coffset + cstart,
-            xd->plane[pli].dst.stride, rend, cend - cstart);
-        if (!prev_row_dering[sbc]) {
-          copy_sb8_16(
-              cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-              xd->plane[pli].dst.buf,
-              (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
-              coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
-        } else if (sbr > 0) {
-          copy_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-                    &linebuf[pli][coffset], stride, OD_FILT_VBORDER, hsize);
+        copy_sb8_16(cm,
+                    &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
+                    CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+                    (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
+                    xd->plane[pli].dst.stride, rend, cend - cstart);
+        if (!prev_row_cdef[fbc]) {
+          copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE,
+                      xd->plane[pli].dst.buf,
+                      (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+                      coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
+        } else if (fbr > 0) {
+          copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
+                    stride, CDEF_VBORDER, hsize);
         } else {
-          fill_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE, OD_FILT_VBORDER,
-                    hsize, OD_DERING_VERY_LARGE);
+          fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
+                    CDEF_VERY_LARGE);
         }
-        if (!prev_row_dering[sbc - 1]) {
-          copy_sb8_16(
-              cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
-              (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
-              coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
-              OD_FILT_VBORDER, OD_FILT_HBORDER);
-        } else if (sbr > 0 && sbc > 0) {
-          copy_rect(src, OD_FILT_BSTRIDE,
-                    &linebuf[pli][coffset - OD_FILT_HBORDER], stride,
-                    OD_FILT_VBORDER, OD_FILT_HBORDER);
+        if (!prev_row_cdef[fbc - 1]) {
+          copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+                      (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+                      coffset - CDEF_HBORDER, xd->plane[pli].dst.stride,
+                      CDEF_VBORDER, CDEF_HBORDER);
+        } else if (fbr > 0 && fbc > 0) {
+          copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
+                    stride, CDEF_VBORDER, CDEF_HBORDER);
         } else {
-          fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER, OD_FILT_HBORDER,
-                    OD_DERING_VERY_LARGE);
+          fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+                    CDEF_VERY_LARGE);
         }
-        if (!prev_row_dering[sbc + 1]) {
-          copy_sb8_16(
-              cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
-              OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
-              (MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
-              coffset + hsize, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
-              OD_FILT_HBORDER);
-        } else if (sbr > 0 && sbc < nhsb - 1) {
-          copy_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-                    &linebuf[pli][coffset + hsize], stride, OD_FILT_VBORDER,
-                    OD_FILT_HBORDER);
+        if (!prev_row_cdef[fbc + 1]) {
+          copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
+                      CDEF_BSTRIDE, xd->plane[pli].dst.buf,
+                      (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
+                      coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER,
+                      CDEF_HBORDER);
+        } else if (fbr > 0 && fbc < nhfb - 1) {
+          copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+                    &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
+                    CDEF_HBORDER);
         } else {
-          fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-                    OD_FILT_VBORDER, OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+          fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
+                    CDEF_HBORDER, CDEF_VERY_LARGE);
         }
-        if (dering_left) {
+        if (cdef_left) {
           /* If we deringed the superblock on the left then we need to copy in
              saved pixels. */
-          copy_rect(src, OD_FILT_BSTRIDE, colbuf[pli], OD_FILT_HBORDER,
-                    rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
+          copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
+                    rend + CDEF_VBORDER, CDEF_HBORDER);
         }
         /* Saving pixels in case we need to dering the superblock on the
             right. */
-        copy_rect(colbuf[pli], OD_FILT_HBORDER, src + hsize, OD_FILT_BSTRIDE,
-                  rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
+        copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
+                  rend + CDEF_VBORDER, CDEF_HBORDER);
         copy_sb8_16(
             cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
-            (MI_SIZE_64X64 << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
-            coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
+            (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
+            coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
 
         if (tile_top) {
-          fill_rect(src, OD_FILT_BSTRIDE, OD_FILT_VBORDER,
-                    hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+          fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
+                    CDEF_VERY_LARGE);
         }
         if (tile_left) {
-          fill_rect(src, OD_FILT_BSTRIDE, vsize + 2 * OD_FILT_VBORDER,
-                    OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+          fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
+                    CDEF_VERY_LARGE);
         }
         if (tile_bottom) {
-          fill_rect(&src[(vsize + OD_FILT_VBORDER) * OD_FILT_BSTRIDE],
-                    OD_FILT_BSTRIDE, OD_FILT_VBORDER,
-                    hsize + 2 * OD_FILT_HBORDER, OD_DERING_VERY_LARGE);
+          fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
+                    CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
         }
         if (tile_right) {
-          fill_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
-                    vsize + 2 * OD_FILT_VBORDER, OD_FILT_HBORDER,
-                    OD_DERING_VERY_LARGE);
+          fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+                    vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
         }
 #if CONFIG_HIGHBITDEPTH
         if (cm->use_highbitdepth) {
-          od_dering(
-              (uint8_t *)&CONVERT_TO_SHORTPTR(
-                  xd->plane[pli]
-                      .dst.buf)[xd->plane[pli].dst.stride *
-                                    (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
-                                (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
+          cdef_filter_fb(
+#if CONFIG_CDEF_SINGLEPASS
+              NULL,
+              &CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+#else
+              (uint8_t *)&CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+#endif
+                  [xd->plane[pli].dst.stride *
+                       (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+                   (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
+#if CONFIG_CDEF_SINGLEPASS
+              xd->plane[pli].dst.stride,
+#else
               xd->plane[pli].dst.stride, dst,
-              &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
-              xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, dering_count,
-              level, clpf_strength, clpf_damping, dering_damping, coeff_shift,
-              0, 1);
+#endif
+              &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
+              ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
+#if CONFIG_CDEF_SINGLEPASS
+              sec_strength, pri_damping, sec_damping, coeff_shift);
+#else
+              sec_strength, sec_damping, pri_damping, coeff_shift, 0, 1);
+#endif
         } else {
 #endif
-          od_dering(&xd->plane[pli]
-                         .dst.buf[xd->plane[pli].dst.stride *
-                                      (MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
-                                  (sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
-                    xd->plane[pli].dst.stride, dst,
-                    &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
-                    xdec[pli], ydec[pli], dir, NULL, var, pli, dlist,
-                    dering_count, level, clpf_strength, clpf_damping,
-                    dering_damping, coeff_shift, 0, 0);
+          cdef_filter_fb(
+              &xd->plane[pli]
+                   .dst.buf[xd->plane[pli].dst.stride *
+                                (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
+                            (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
+#if CONFIG_CDEF_SINGLEPASS
+              NULL, xd->plane[pli].dst.stride,
+#else
+              xd->plane[pli].dst.stride, dst,
+#endif
+              &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
+              ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
+#if CONFIG_CDEF_SINGLEPASS
+              sec_strength, pri_damping, sec_damping, coeff_shift);
+#else
+              sec_strength, sec_damping, pri_damping, coeff_shift, 0, 0);
+#endif
 
 #if CONFIG_HIGHBITDEPTH
         }
 #endif
       }
-      dering_left = 1;
+      cdef_left = 1;
     }
     {
       unsigned char *tmp;
-      tmp = prev_row_dering;
-      prev_row_dering = curr_row_dering;
-      curr_row_dering = tmp;
+      tmp = prev_row_cdef;
+      prev_row_cdef = curr_row_cdef;
+      curr_row_cdef = tmp;
     }
   }
-  aom_free(row_dering);
+  aom_free(row_cdef);
   for (pli = 0; pli < nplanes; pli++) {
     aom_free(linebuf[pli]);
     aom_free(colbuf[pli]);
diff --git a/third_party/aom/av1/common/cdef.h b/third_party/aom/av1/common/cdef.h
index a0dd0a698..9de24bf92 100644
--- a/third_party/aom/av1/common/cdef.h
+++ b/third_party/aom/av1/common/cdef.h
@@ -8,31 +8,28 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
-#ifndef AV1_COMMON_DERING_H_
-#define AV1_COMMON_DERING_H_
+#ifndef AV1_COMMON_CDEF_H_
+#define AV1_COMMON_CDEF_H_
 
 #define CDEF_STRENGTH_BITS 7
 
-#define DERING_STRENGTHS 32
-#define CLPF_STRENGTHS 4
+#define CDEF_PRI_STRENGTHS 32
+#define CDEF_SEC_STRENGTHS 4
 
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_ports/mem.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
 #include "av1/common/onyxc_int.h"
-#include "./od_dering.h"
 
 static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
 
-static INLINE int constrain(int diff, int threshold, unsigned int damping) {
-  return threshold
-             ? sign(diff) *
-                   AOMMIN(
-                       abs(diff),
-                       AOMMAX(0, threshold - (abs(diff) >>
-                                              (damping - get_msb(threshold)))))
-             : 0;
+static INLINE int constrain(int diff, int threshold, int damping) {
+  if (!threshold) return 0;
+
+  const int shift = AOMMAX(0, damping - get_msb(threshold));
+  return sign(diff) *
+         AOMMIN(abs(diff), AOMMAX(0, threshold - (abs(diff) >> shift)));
 }
 
 #ifdef __cplusplus
@@ -40,8 +37,8 @@ extern "C" {
 #endif
 
 int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
-int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
-                           dering_list *dlist, int filter_skip);
+int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
+                         cdef_list *dlist, int filter_skip);
 void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
 
 void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
@@ -50,4 +47,4 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
 #ifdef __cplusplus
 }  // extern "C"
 #endif
-#endif  // AV1_COMMON_DERING_H_
+#endif  // AV1_COMMON_CDEF_H_
diff --git a/third_party/aom/av1/common/cdef_block.c b/third_party/aom/av1/common/cdef_block.c
new file mode 100644
index 000000000..aaa32c950
--- /dev/null
+++ b/third_party/aom/av1/common/cdef_block.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+
+#ifdef HAVE_CONFIG_H
+#include "./config.h"
+#endif
+
+#include "./aom_dsp_rtcd.h"
+#include "./av1_rtcd.h"
+#include "./cdef.h"
+
+/* Generated from gen_filter_tables.c. */
+#if !CONFIG_CDEF_SINGLEPASS || CDEF_FULL
+const int cdef_directions[8][3] = {
+  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2, -3 * CDEF_BSTRIDE + 3 },
+  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2, -1 * CDEF_BSTRIDE + 3 },
+  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2, 0 * CDEF_BSTRIDE + 3 },
+  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2, 1 * CDEF_BSTRIDE + 3 },
+  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2, 3 * CDEF_BSTRIDE + 3 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1, 3 * CDEF_BSTRIDE + 1 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0, 3 * CDEF_BSTRIDE + 0 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1, 3 * CDEF_BSTRIDE - 1 }
+};
+#else
+const int cdef_directions[8][2] = {
+  { -1 * CDEF_BSTRIDE + 1, -2 * CDEF_BSTRIDE + 2 },
+  { 0 * CDEF_BSTRIDE + 1, -1 * CDEF_BSTRIDE + 2 },
+  { 0 * CDEF_BSTRIDE + 1, 0 * CDEF_BSTRIDE + 2 },
+  { 0 * CDEF_BSTRIDE + 1, 1 * CDEF_BSTRIDE + 2 },
+  { 1 * CDEF_BSTRIDE + 1, 2 * CDEF_BSTRIDE + 2 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 1 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE + 0 },
+  { 1 * CDEF_BSTRIDE + 0, 2 * CDEF_BSTRIDE - 1 }
+};
+#endif
+
+/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
+   The search minimizes the weighted variance along all the lines in a
+   particular direction, i.e. the squared error between the input and a
+   "predicted" block where each pixel is replaced by the average along a line
+   in a particular direction. Since each direction have the same sum(x^2) term,
+   that term is never computed. See Section 2, step 2, of:
+   http://jmvalin.ca/notes/intra_paint.pdf */
+int cdef_find_dir_c(const uint16_t *img, int stride, int32_t *var,
+                    int coeff_shift) {
+  int i;
+  int32_t cost[8] = { 0 };
+  int partial[8][15] = { { 0 } };
+  int32_t best_cost = 0;
+  int best_dir = 0;
+  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
+     The output is then 840 times larger, but we don't care for finding
+     the max. */
+  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
+  for (i = 0; i < 8; i++) {
+    int j;
+    for (j = 0; j < 8; j++) {
+      int x;
+      /* We subtract 128 here to reduce the maximum range of the squared
+         partial sums. */
+      x = (img[i * stride + j] >> coeff_shift) - 128;
+      partial[0][i + j] += x;
+      partial[1][i + j / 2] += x;
+      partial[2][i] += x;
+      partial[3][3 + i - j / 2] += x;
+      partial[4][7 + i - j] += x;
+      partial[5][3 - i / 2 + j] += x;
+      partial[6][j] += x;
+      partial[7][i / 2 + j] += x;
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    cost[2] += partial[2][i] * partial[2][i];
+    cost[6] += partial[6][i] * partial[6][i];
+  }
+  cost[2] *= div_table[8];
+  cost[6] *= div_table[8];
+  for (i = 0; i < 7; i++) {
+    cost[0] += (partial[0][i] * partial[0][i] +
+                partial[0][14 - i] * partial[0][14 - i]) *
+               div_table[i + 1];
+    cost[4] += (partial[4][i] * partial[4][i] +
+                partial[4][14 - i] * partial[4][14 - i]) *
+               div_table[i + 1];
+  }
+  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
+  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
+  for (i = 1; i < 8; i += 2) {
+    int j;
+    for (j = 0; j < 4 + 1; j++) {
+      cost[i] += partial[i][3 + j] * partial[i][3 + j];
+    }
+    cost[i] *= div_table[8];
+    for (j = 0; j < 4 - 1; j++) {
+      cost[i] += (partial[i][j] * partial[i][j] +
+                  partial[i][10 - j] * partial[i][10 - j]) *
+                 div_table[2 * j + 2];
+    }
+  }
+  for (i = 0; i < 8; i++) {
+    if (cost[i] > best_cost) {
+      best_cost = cost[i];
+      best_dir = i;
+    }
+  }
+  /* Difference between the optimal variance and the variance along the
+     orthogonal direction. Again, the sum(x^2) terms cancel out. */
+  *var = best_cost - cost[(best_dir + 4) & 7];
+  /* We'd normally divide by 840, but dividing by 1024 is close enough
+     for what we're going to do with this. */
+  *var >>= 10;
+  return best_dir;
+}
+
+#if CONFIG_CDEF_SINGLEPASS
+#if CDEF_FULL
+const int cdef_pri_taps[2][3] = { { 3, 2, 1 }, { 2, 2, 2 } };
+const int cdef_sec_taps[2][2] = { { 3, 1 }, { 3, 1 } };
+#else
+const int cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
+const int cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
+#endif
+
+/* Smooth in the direction detected. */
+#if CDEF_CAP
+void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
+                         const uint16_t *in, int pri_strength, int sec_strength,
+                         int dir, int pri_damping, int sec_damping, int bsize,
+                         UNUSED int max_unused)
+#else
+void cdef_filter_block_c(uint8_t *dst8, uint16_t *dst16, int dstride,
+                         const uint16_t *in, int pri_strength, int sec_strength,
+                         int dir, int pri_damping, int sec_damping, int bsize,
+                         int max)
+#endif
+{
+  int i, j, k;
+  const int s = CDEF_BSTRIDE;
+  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
+  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+  for (i = 0; i < 4 << (bsize == BLOCK_8X8); i++) {
+    for (j = 0; j < 4 << (bsize == BLOCK_8X8); j++) {
+      int16_t sum = 0;
+      int16_t y;
+      int16_t x = in[i * s + j];
+#if CDEF_CAP
+      int max = x;
+      int min = x;
+#endif
+#if CDEF_FULL
+      for (k = 0; k < 3; k++)
+#else
+      for (k = 0; k < 2; k++)
+#endif
+      {
+        int16_t p0 = in[i * s + j + cdef_directions[dir][k]];
+        int16_t p1 = in[i * s + j - cdef_directions[dir][k]];
+        sum += pri_taps[k] * constrain(p0 - x, pri_strength, pri_damping);
+        sum += pri_taps[k] * constrain(p1 - x, pri_strength, pri_damping);
+#if CDEF_CAP
+        if (p0 != CDEF_VERY_LARGE) max = AOMMAX(p0, max);
+        if (p1 != CDEF_VERY_LARGE) max = AOMMAX(p1, max);
+        min = AOMMIN(p0, min);
+        min = AOMMIN(p1, min);
+#endif
+#if CDEF_FULL
+        if (k == 2) continue;
+#endif
+        int16_t s0 = in[i * s + j + cdef_directions[(dir + 2) & 7][k]];
+        int16_t s1 = in[i * s + j - cdef_directions[(dir + 2) & 7][k]];
+        int16_t s2 = in[i * s + j + cdef_directions[(dir + 6) & 7][k]];
+        int16_t s3 = in[i * s + j - cdef_directions[(dir + 6) & 7][k]];
+#if CDEF_CAP
+        if (s0 != CDEF_VERY_LARGE) max = AOMMAX(s0, max);
+        if (s1 != CDEF_VERY_LARGE) max = AOMMAX(s1, max);
+        if (s2 != CDEF_VERY_LARGE) max = AOMMAX(s2, max);
+        if (s3 != CDEF_VERY_LARGE) max = AOMMAX(s3, max);
+        min = AOMMIN(s0, min);
+        min = AOMMIN(s1, min);
+        min = AOMMIN(s2, min);
+        min = AOMMIN(s3, min);
+#endif
+        sum += sec_taps[k] * constrain(s0 - x, sec_strength, sec_damping);
+        sum += sec_taps[k] * constrain(s1 - x, sec_strength, sec_damping);
+        sum += sec_taps[k] * constrain(s2 - x, sec_strength, sec_damping);
+        sum += sec_taps[k] * constrain(s3 - x, sec_strength, sec_damping);
+      }
+#if CDEF_CAP
+      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), min, max);
+#else
+      y = clamp((int16_t)x + ((8 + sum - (sum < 0)) >> 4), 0, max);
+#endif
+      if (dst8)
+        dst8[i * dstride + j] = (uint8_t)y;
+      else
+        dst16[i * dstride + j] = (uint16_t)y;
+    }
+  }
+}
+
+#else
+
+/* Smooth in the direction detected. */
+void cdef_direction_8x8_c(uint16_t *y, int ystride, const uint16_t *in,
+                          int threshold, int dir, int damping) {
+  int i;
+  int j;
+  int k;
+  static const int taps[3] = { 3, 2, 1 };
+  for (i = 0; i < 8; i++) {
+    for (j = 0; j < 8; j++) {
+      int16_t sum;
+      int16_t xx;
+      int16_t yy;
+      xx = in[i * CDEF_BSTRIDE + j];
+      sum = 0;
+      for (k = 0; k < 3; k++) {
+        int16_t p0;
+        int16_t p1;
+        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
+        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
+        sum += taps[k] * constrain(p0, threshold, damping);
+        sum += taps[k] * constrain(p1, threshold, damping);
+      }
+      sum = (sum + 8) >> 4;
+      yy = xx + sum;
+      y[i * ystride + j] = yy;
+    }
+  }
+}
+
+/* Smooth in the direction detected. */
+void cdef_direction_4x4_c(uint16_t *y, int ystride, const uint16_t *in,
+                          int threshold, int dir, int damping) {
+  int i;
+  int j;
+  int k;
+  static const int taps[2] = { 4, 1 };
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      int16_t sum;
+      int16_t xx;
+      int16_t yy;
+      xx = in[i * CDEF_BSTRIDE + j];
+      sum = 0;
+      for (k = 0; k < 2; k++) {
+        int16_t p0;
+        int16_t p1;
+        p0 = in[i * CDEF_BSTRIDE + j + cdef_directions[dir][k]] - xx;
+        p1 = in[i * CDEF_BSTRIDE + j - cdef_directions[dir][k]] - xx;
+        sum += taps[k] * constrain(p0, threshold, damping);
+        sum += taps[k] * constrain(p1, threshold, damping);
+      }
+      sum = (sum + 8) >> 4;
+      yy = xx + sum;
+      y[i * ystride + j] = yy;
+    }
+  }
+}
+#endif
+
+/* Compute the primary filter strength for an 8x8 block based on the
+   directional variance difference. A high variance difference means
+   that we have a highly directional pattern (e.g. a high contrast
+   edge), so we can apply more deringing. A low variance means that we
+   either have a low contrast edge, or a non-directional texture, so
+   we want to be careful not to blur. */
+static INLINE int adjust_strength(int strength, int32_t var) {
+  const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
+  /* We use the variance of 8x8 blocks to adjust the strength. */
+  return var ? (strength * (4 + i) + 8) >> 4 : 0;
+}
+
+#if !CONFIG_CDEF_SINGLEPASS
+void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
+                               int sstride) {
+  int i, j;
+  for (i = 0; i < 8; i++)
+    for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
+}
+
+void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
+                               int sstride) {
+  int i, j;
+  for (i = 0; i < 4; i++)
+    for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
+}
+
+static void copy_block_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
+                                      cdef_list *dlist, int cdef_count,
+                                      int bsize) {
+  int bi, bx, by;
+
+  if (bsize == BLOCK_8X8) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
+                              &src[bi << (3 + 3)], 8);
+    }
+  } else if (bsize == BLOCK_4X8) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
+                              &src[bi << (3 + 2)], 4);
+      copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
+                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
+    }
+  } else if (bsize == BLOCK_8X4) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
+                              &src[bi << (2 + 3)], 8);
+      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
+                              dstride, &src[(bi << (2 + 3)) + 4], 8);
+    }
+  } else {
+    assert(bsize == BLOCK_4X4);
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
+                              &src[bi << (2 + 2)], 4);
+    }
+  }
+}
+
+void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
+                              int sstride) {
+  int i, j;
+  for (i = 0; i < 8; i++)
+    for (j = 0; j < 8; j++)
+      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
+}
+
+void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
+                              int sstride) {
+  int i, j;
+  for (i = 0; i < 4; i++)
+    for (j = 0; j < 4; j++)
+      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
+}
+
+static void copy_block_16bit_to_8bit(uint8_t *dst, int dstride,
+                                     const uint16_t *src, cdef_list *dlist,
+                                     int cdef_count, int bsize) {
+  int bi, bx, by;
+  if (bsize == BLOCK_8X8) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
+                             &src[bi << (3 + 3)], 8);
+    }
+  } else if (bsize == BLOCK_4X8) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
+                             &src[bi << (3 + 2)], 4);
+      copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
+                             dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
+    }
+  } else if (bsize == BLOCK_8X4) {
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
+                             &src[bi << (2 + 3)], 8);
+      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
+                             &src[(bi << (2 + 3)) + 4], 8);
+    }
+  } else {
+    assert(bsize == BLOCK_4X4);
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
+                             &src[bi << (2 * 2)], 4);
+    }
+  }
+}
+
+int get_filter_skip(int level) {
+  int filter_skip = level & 1;
+  if (level == 1) filter_skip = 0;
+  return filter_skip;
+}
+
+void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
+                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+                    cdef_list *dlist, int cdef_count, int level,
+                    int sec_strength, int sec_damping, int pri_damping,
+                    int coeff_shift, int skip_dering, int hbd) {
+#else
+
+void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
+                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+                    cdef_list *dlist, int cdef_count, int level,
+                    int sec_strength, int pri_damping, int sec_damping,
+                    int coeff_shift) {
+#endif
+  int bi;
+  int bx;
+  int by;
+  int bsize, bsizex, bsizey;
+
+#if CONFIG_CDEF_SINGLEPASS
+  int pri_strength = (level >> 1) << coeff_shift;
+  int filter_skip = level & 1;
+  if (!pri_strength && !sec_strength && filter_skip) {
+    pri_strength = 19 << coeff_shift;
+    sec_strength = 7 << coeff_shift;
+  }
+#else
+  int threshold = (level >> 1) << coeff_shift;
+  int filter_skip = get_filter_skip(level);
+  if (level == 1) threshold = 31 << coeff_shift;
+
+  cdef_direction_func cdef_direction[] = { cdef_direction_4x4,
+                                           cdef_direction_8x8 };
+#endif
+  sec_damping += coeff_shift - (pli != AOM_PLANE_Y);
+  pri_damping += coeff_shift - (pli != AOM_PLANE_Y);
+  bsize =
+      ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
+  bsizex = 3 - xdec;
+  bsizey = 3 - ydec;
+#if CONFIG_CDEF_SINGLEPASS
+  if (dirinit && pri_strength == 0 && sec_strength == 0)
+#else
+  if (!skip_dering)
+#endif
+  {
+#if CONFIG_CDEF_SINGLEPASS
+    // If we're here, both primary and secondary strengths are 0, and
+    // we still haven't written anything to y[] yet, so we just copy
+    // the input to y[]. This is necessary only for av1_cdef_search()
+    // and only av1_cdef_search() sets dirinit.
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+#else
+    if (pli == 0) {
+      if (!dirinit || !*dirinit) {
+        for (bi = 0; bi < cdef_count; bi++) {
+          by = dlist[bi].by;
+          bx = dlist[bi].bx;
+          dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
+                                      CDEF_BSTRIDE, &var[by][bx], coeff_shift);
+        }
+        if (dirinit) *dirinit = 1;
+      }
+    }
+    // Only run dering for non-zero threshold (which is always the case for
+    // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
+    // something out in y[] later.
+    if (threshold != 0) {
+      assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
+      for (bi = 0; bi < cdef_count; bi++) {
+        int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
+        by = dlist[bi].by;
+        bx = dlist[bi].bx;
+        (cdef_direction[bsize == BLOCK_8X8])(
+            &y[bi << (bsizex + bsizey)], 1 << bsizex,
+            &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
+            pli ? t : adjust_strength(t, var[by][bx]), dir[by][bx],
+            pri_damping);
+      }
+    }
+  }
+
+  if (sec_strength) {
+    if (threshold && !skip_dering)
+      copy_block_16bit_to_16bit(in, CDEF_BSTRIDE, y, dlist, cdef_count, bsize);
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+      int py = by << bsizey;
+      int px = bx << bsizex;
+
+      if (!filter_skip && dlist[bi].skip) continue;
+      if (!dst || hbd) {
+        // 16 bit destination if high bitdepth or 8 bit destination not given
+        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
+                                                        : aom_clpf_hblock_hbd)(
+            dst ? (uint16_t *)dst + py * dstride + px
+                : &y[bi << (bsizex + bsizey)],
+            in + py * CDEF_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
+            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
+            sec_damping);
+      } else {
+        // Do clpf and write the result to an 8 bit destination
+        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
+                                                        : aom_clpf_hblock)(
+            dst + py * dstride + px, in + py * CDEF_BSTRIDE + px, dstride,
+            CDEF_BSTRIDE, 1 << bsizex, 1 << bsizey, sec_strength << coeff_shift,
+            sec_damping);
+      }
+    }
+  } else if (threshold != 0) {
+    // No clpf, so copy instead
+    if (hbd) {
+      copy_block_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist, cdef_count,
+                                bsize);
+    } else {
+      copy_block_16bit_to_8bit(dst, dstride, y, dlist, cdef_count, bsize);
+    }
+  } else if (dirinit) {
+    // If we're here, both dering and clpf are off, and we still haven't written
+    // anything to y[] yet, so we just copy the input to y[]. This is necessary
+    // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
+    for (bi = 0; bi < cdef_count; bi++) {
+      by = dlist[bi].by;
+      bx = dlist[bi].bx;
+#endif
+      int iy, ix;
+      // TODO(stemidts/jmvalin): SIMD optimisations
+      for (iy = 0; iy < 1 << bsizey; iy++)
+        for (ix = 0; ix < 1 << bsizex; ix++)
+#if CONFIG_CDEF_SINGLEPASS
+          dst16[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
+#else
+          y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
+#endif
+              in[((by << bsizey) + iy) * CDEF_BSTRIDE + (bx << bsizex) + ix];
+    }
+#if CONFIG_CDEF_SINGLEPASS
+    return;
+#endif
+  }
+
+#if CONFIG_CDEF_SINGLEPASS
+  if (pli == 0) {
+    if (!dirinit || !*dirinit) {
+      for (bi = 0; bi < cdef_count; bi++) {
+        by = dlist[bi].by;
+        bx = dlist[bi].bx;
+        dir[by][bx] = cdef_find_dir(&in[8 * by * CDEF_BSTRIDE + 8 * bx],
+                                    CDEF_BSTRIDE, &var[by][bx], coeff_shift);
+      }
+      if (dirinit) *dirinit = 1;
+    }
+  }
+
+  assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
+  for (bi = 0; bi < cdef_count; bi++) {
+    int t = !filter_skip && dlist[bi].skip ? 0 : pri_strength;
+    int s = !filter_skip && dlist[bi].skip ? 0 : sec_strength;
+    by = dlist[bi].by;
+    bx = dlist[bi].bx;
+    if (dst8)
+      cdef_filter_block(
+          &dst8[(by << bsizey) * dstride + (bx << bsizex)], NULL, dstride,
+          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
+          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
+          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
+    else
+      cdef_filter_block(
+          NULL,
+          &dst16[dirinit ? bi << (bsizex + bsizey)
+                         : (by << bsizey) * dstride + (bx << bsizex)],
+          dirinit ? 1 << bsizex : dstride,
+          &in[(by * CDEF_BSTRIDE << bsizey) + (bx << bsizex)],
+          (pli ? t : adjust_strength(t, var[by][bx])), s, t ? dir[by][bx] : 0,
+          pri_damping, sec_damping, bsize, (256 << coeff_shift) - 1);
+  }
+#endif
+}
diff --git a/third_party/aom/av1/common/cdef_block.h b/third_party/aom/av1/common/cdef_block.h
new file mode 100644
index 000000000..bf277faad
--- /dev/null
+++ b/third_party/aom/av1/common/cdef_block.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#if !defined(_CDEF_BLOCK_H)
+#define _CDEF_BLOCK_H (1)
+
+#include "./odintrin.h"
+
+#define CDEF_BLOCKSIZE 64
+#define CDEF_BLOCKSIZE_LOG2 6
+#define CDEF_NBLOCKS (CDEF_BLOCKSIZE / 8)
+#if CONFIG_CDEF_SINGLEPASS
+#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
+#endif
+
+/* We need to buffer three vertical lines. */
+#define CDEF_VBORDER (3)
+/* We only need to buffer three horizontal pixels too, but let's align to
+   16 bytes (8 x 16 bits) to make vectorization easier. */
+#define CDEF_HBORDER (8)
+#define CDEF_BSTRIDE ALIGN_POWER_OF_TWO(CDEF_BLOCKSIZE + 2 * CDEF_HBORDER, 3)
+
+#define CDEF_VERY_LARGE (30000)
+#define CDEF_INBUF_SIZE (CDEF_BSTRIDE * (CDEF_BLOCKSIZE + 2 * CDEF_VBORDER))
+
+#if CONFIG_CDEF_SINGLEPASS
+// Filter configuration
+#define CDEF_CAP 1   // 1 = Cap change to largest diff
+#define CDEF_FULL 0  // 1 = 7x7 filter, 0 = 5x5 filter
+
+#if CDEF_FULL
+extern const int cdef_pri_taps[2][3];
+extern const int cdef_sec_taps[2][2];
+extern const int cdef_directions[8][3];
+#else
+extern const int cdef_pri_taps[2][2];
+extern const int cdef_sec_taps[2][2];
+extern const int cdef_directions[8][2];
+#endif
+
+#else  // CONFIG_CDEF_SINGLEPASS
+extern const int cdef_directions[8][3];
+#endif
+
+typedef struct {
+  uint8_t by;
+  uint8_t bx;
+  uint8_t skip;
+} cdef_list;
+
+#if CONFIG_CDEF_SINGLEPASS
+typedef void (*cdef_filter_block_func)(uint8_t *dst8, uint16_t *dst16,
+                                       int dstride, const uint16_t *in,
+                                       int pri_strength, int sec_strength,
+                                       int dir, int pri_damping,
+                                       int sec_damping, int bsize, int max);
+void copy_cdef_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
+                              cdef_list *dlist, int cdef_count, int bsize);
+#else
+typedef void (*cdef_direction_func)(uint16_t *y, int ystride,
+                                    const uint16_t *in, int threshold, int dir,
+                                    int damping);
+
+int get_filter_skip(int level);
+#endif
+
+#if CONFIG_CDEF_SINGLEPASS
+void cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride, uint16_t *in,
+                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+                    cdef_list *dlist, int cdef_count, int level,
+                    int sec_strength, int pri_damping, int sec_damping,
+                    int coeff_shift);
+#else
+void cdef_filter_fb(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in,
+                    int xdec, int ydec, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],
+                    int *dirinit, int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
+                    cdef_list *dlist, int cdef_count, int level,
+                    int sec_strength, int sec_damping, int pri_damping,
+                    int coeff_shift, int skip_dering, int hbd);
+#endif
+#endif
diff --git a/third_party/aom/av1/common/cdef_block_avx2.c b/third_party/aom/av1/common/cdef_block_avx2.c
new file mode 100644
index 000000000..5e48045c0
--- /dev/null
+++ b/third_party/aom/av1/common/cdef_block_avx2.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom_dsp/aom_simd.h"
+#define SIMD_FUNC(name) name##_avx2
+#include "./cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/od_dering_neon.c b/third_party/aom/av1/common/cdef_block_neon.c
index 99441050a..030b32531 100644
--- a/third_party/aom/av1/common/od_dering_neon.c
+++ b/third_party/aom/av1/common/cdef_block_neon.c
@@ -11,4 +11,4 @@
 
 #include "aom_dsp/aom_simd.h"
 #define SIMD_FUNC(name) name##_neon
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_block_simd.h b/third_party/aom/av1/common/cdef_block_simd.h
new file mode 100644
index 000000000..aa7d3c3ca
--- /dev/null
+++ b/third_party/aom/av1/common/cdef_block_simd.h
@@ -0,0 +1,1214 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "./cdef_block.h"
+
+/* partial A is a 16-bit vector of the form:
+   [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
+   [0  y1 y2 y3 y4 y5 y6 y7].
+   This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ...
+   (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 where the C1..C8 constants are in const1
+   and const2. */
+static INLINE v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1,
+                                    v128 const2) {
+  v128 tmp;
+  /* Reverse partial B. */
+  partialb = v128_shuffle_8(
+      partialb, v128_from_32(0x0f0e0100, 0x03020504, 0x07060908, 0x0b0a0d0c));
+  /* Interleave the x and y values of identical indices and pair x8 with 0. */
+  tmp = partiala;
+  partiala = v128_ziplo_16(partialb, partiala);
+  partialb = v128_ziphi_16(partialb, tmp);
+  /* Square and add the corresponding x and y values. */
+  partiala = v128_madd_s16(partiala, partiala);
+  partialb = v128_madd_s16(partialb, partialb);
+  /* Multiply by constant. */
+  partiala = v128_mullo_s32(partiala, const1);
+  partialb = v128_mullo_s32(partialb, const2);
+  /* Sum all results. */
+  partiala = v128_add_32(partiala, partialb);
+  return partiala;
+}
+
+static INLINE v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) {
+  v128 t0, t1, t2, t3;
+  t0 = v128_ziplo_32(x1, x0);
+  t1 = v128_ziplo_32(x3, x2);
+  t2 = v128_ziphi_32(x1, x0);
+  t3 = v128_ziphi_32(x3, x2);
+  x0 = v128_ziplo_64(t1, t0);
+  x1 = v128_ziphi_64(t1, t0);
+  x2 = v128_ziplo_64(t3, t2);
+  x3 = v128_ziphi_64(t3, t2);
+  return v128_add_32(v128_add_32(x0, x1), v128_add_32(x2, x3));
+}
+
+/* Computes cost for directions 0, 5, 6 and 7. We can call this function again
+   to compute the remaining directions. */
+static INLINE v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) {
+  v128 partial4a, partial4b, partial5a, partial5b, partial7a, partial7b;
+  v128 partial6;
+  v128 tmp;
+  /* Partial sums for lines 0 and 1. */
+  partial4a = v128_shl_n_byte(lines[0], 14);
+  partial4b = v128_shr_n_byte(lines[0], 2);
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[1], 12));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[1], 4));
+  tmp = v128_add_16(lines[0], lines[1]);
+  partial5a = v128_shl_n_byte(tmp, 10);
+  partial5b = v128_shr_n_byte(tmp, 6);
+  partial7a = v128_shl_n_byte(tmp, 4);
+  partial7b = v128_shr_n_byte(tmp, 12);
+  partial6 = tmp;
+
+  /* Partial sums for lines 2 and 3. */
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[2], 10));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[2], 6));
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[3], 8));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[3], 8));
+  tmp = v128_add_16(lines[2], lines[3]);
+  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 8));
+  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 8));
+  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 6));
+  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 10));
+  partial6 = v128_add_16(partial6, tmp);
+
+  /* Partial sums for lines 4 and 5. */
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[4], 6));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[4], 10));
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[5], 4));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[5], 12));
+  tmp = v128_add_16(lines[4], lines[5]);
+  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 6));
+  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 10));
+  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 8));
+  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 8));
+  partial6 = v128_add_16(partial6, tmp);
+
+  /* Partial sums for lines 6 and 7. */
+  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[6], 2));
+  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[6], 14));
+  partial4a = v128_add_16(partial4a, lines[7]);
+  tmp = v128_add_16(lines[6], lines[7]);
+  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 4));
+  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 12));
+  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 10));
+  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 6));
+  partial6 = v128_add_16(partial6, tmp);
+
+  /* Compute costs in terms of partial sums. */
+  partial4a =
+      fold_mul_and_sum(partial4a, partial4b, v128_from_32(210, 280, 420, 840),
+                       v128_from_32(105, 120, 140, 168));
+  partial7a =
+      fold_mul_and_sum(partial7a, partial7b, v128_from_32(210, 420, 0, 0),
+                       v128_from_32(105, 105, 105, 140));
+  partial5a =
+      fold_mul_and_sum(partial5a, partial5b, v128_from_32(210, 420, 0, 0),
+                       v128_from_32(105, 105, 105, 140));
+  partial6 = v128_madd_s16(partial6, partial6);
+  partial6 = v128_mullo_s32(partial6, v128_dup_32(105));
+
+  partial4a = hsum4(partial4a, partial5a, partial6, partial7a);
+  v128_store_unaligned(tmp_cost1, partial4a);
+  return partial4a;
+}
+
+/* transpose and reverse the order of the lines -- equivalent to a 90-degree
+   counter-clockwise rotation of the pixels. */
+static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) {
+  const v128 tr0_0 = v128_ziplo_16(in[1], in[0]);
+  const v128 tr0_1 = v128_ziplo_16(in[3], in[2]);
+  const v128 tr0_2 = v128_ziphi_16(in[1], in[0]);
+  const v128 tr0_3 = v128_ziphi_16(in[3], in[2]);
+  const v128 tr0_4 = v128_ziplo_16(in[5], in[4]);
+  const v128 tr0_5 = v128_ziplo_16(in[7], in[6]);
+  const v128 tr0_6 = v128_ziphi_16(in[5], in[4]);
+  const v128 tr0_7 = v128_ziphi_16(in[7], in[6]);
+
+  const v128 tr1_0 = v128_ziplo_32(tr0_1, tr0_0);
+  const v128 tr1_1 = v128_ziplo_32(tr0_5, tr0_4);
+  const v128 tr1_2 = v128_ziphi_32(tr0_1, tr0_0);
+  const v128 tr1_3 = v128_ziphi_32(tr0_5, tr0_4);
+  const v128 tr1_4 = v128_ziplo_32(tr0_3, tr0_2);
+  const v128 tr1_5 = v128_ziplo_32(tr0_7, tr0_6);
+  const v128 tr1_6 = v128_ziphi_32(tr0_3, tr0_2);
+  const v128 tr1_7 = v128_ziphi_32(tr0_7, tr0_6);
+
+  res[7] = v128_ziplo_64(tr1_1, tr1_0);
+  res[6] = v128_ziphi_64(tr1_1, tr1_0);
+  res[5] = v128_ziplo_64(tr1_3, tr1_2);
+  res[4] = v128_ziphi_64(tr1_3, tr1_2);
+  res[3] = v128_ziplo_64(tr1_5, tr1_4);
+  res[2] = v128_ziphi_64(tr1_5, tr1_4);
+  res[1] = v128_ziplo_64(tr1_7, tr1_6);
+  res[0] = v128_ziphi_64(tr1_7, tr1_6);
+}
+
+int SIMD_FUNC(cdef_find_dir)(const uint16_t *img, int stride, int32_t *var,
+                             int coeff_shift) {
+  int i;
+  int32_t cost[8];
+  int32_t best_cost = 0;
+  int best_dir = 0;
+  v128 lines[8];
+  for (i = 0; i < 8; i++) {
+    lines[i] = v128_load_unaligned(&img[i * stride]);
+    lines[i] =
+        v128_sub_16(v128_shr_s16(lines[i], coeff_shift), v128_dup_16(128));
+  }
+
+#if defined(__SSE4_1__)
+  /* Compute "mostly vertical" directions. */
+  __m128i dir47 = compute_directions(lines, cost + 4);
+
+  array_reverse_transpose_8x8(lines, lines);
+
+  /* Compute "mostly horizontal" directions. */
+  __m128i dir03 = compute_directions(lines, cost);
+
+  __m128i max = _mm_max_epi32(dir03, dir47);
+  max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(1, 0, 3, 2)));
+  max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(2, 3, 0, 1)));
+  best_cost = _mm_cvtsi128_si32(max);
+  __m128i t =
+      _mm_packs_epi32(_mm_cmpeq_epi32(max, dir03), _mm_cmpeq_epi32(max, dir47));
+  best_dir = _mm_movemask_epi8(_mm_packs_epi16(t, t));
+  best_dir = get_msb(best_dir ^ (best_dir - 1));  // Count trailing zeros
+#else
+  /* Compute "mostly vertical" directions. */
+  compute_directions(lines, cost + 4);
+
+  array_reverse_transpose_8x8(lines, lines);
+
+  /* Compute "mostly horizontal" directions. */
+  compute_directions(lines, cost);
+
+  for (i = 0; i < 8; i++) {
+    if (cost[i] > best_cost) {
+      best_cost = cost[i];
+      best_dir = i;
+    }
+  }
+#endif
+
+  /* Difference between the optimal variance and the variance along the
+     orthogonal direction. Again, the sum(x^2) terms cancel out. */
+  *var = best_cost - cost[(best_dir + 4) & 7];
+  /* We'd normally divide by 840, but dividing by 1024 is close enough
+     for what we're going to do with this. */
+  *var >>= 10;
+  return best_dir;
+}
+
+// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
+SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
+                             unsigned int adjdamp) {
+  v128 diff = v128_sub_16(a, b);
+  const v128 sign = v128_shr_n_s16(diff, 15);
+  diff = v128_abs_s16(diff);
+  const v128 s =
+      v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
+  return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
+}
+
+#if CONFIG_CDEF_SINGLEPASS
+// sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
+SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
+                           unsigned int adjdamp) {
+  const v256 diff16 = v256_sub_16(a, b);
+  v128 diff = v128_pack_s16_s8(v256_high_v128(diff16), v256_low_v128(diff16));
+  const v128 sign = v128_cmplt_s8(diff, v128_zero());
+  diff = v128_abs_s8(diff);
+  return v128_xor(
+      v128_add_8(sign,
+                 v128_min_u8(diff, v128_ssub_u8(v128_dup_8(strength),
+                                                v128_shr_u8(diff, adjdamp)))),
+      sign);
+}
+
+#if CDEF_CAP
+void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
+                                        const uint16_t *in, int pri_strength,
+                                        int sec_strength, int dir,
+                                        int pri_damping, int sec_damping,
+                                        UNUSED int max_unused)
+#else
+void SIMD_FUNC(cdef_filter_block_4x4_8)(uint8_t *dst, int dstride,
+                                        const uint16_t *in, int pri_strength,
+                                        int sec_strength, int dir,
+                                        int pri_damping, int sec_damping,
+                                        int max)
+#endif
+{
+  v128 p0, p1, p2, p3;
+  v256 sum, row, tap, res;
+#if CDEF_CAP
+  v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+#endif
+  int po1 = cdef_directions[dir][0];
+  int po2 = cdef_directions[dir][1];
+#if CDEF_FULL
+  int po3 = cdef_directions[dir][2];
+#endif
+  int s1o1 = cdef_directions[(dir + 2) & 7][0];
+  int s1o2 = cdef_directions[(dir + 2) & 7][1];
+  int s2o1 = cdef_directions[(dir + 6) & 7][0];
+  int s2o2 = cdef_directions[(dir + 6) & 7][1];
+
+  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
+  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+
+  if (pri_strength)
+    pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
+  if (sec_strength)
+    sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
+
+  sum = v256_zero();
+  row = v256_from_v64(v64_load_aligned(&in[0 * CDEF_BSTRIDE]),
+                      v64_load_aligned(&in[1 * CDEF_BSTRIDE]),
+                      v64_load_aligned(&in[2 * CDEF_BSTRIDE]),
+                      v64_load_aligned(&in[3 * CDEF_BSTRIDE]));
+#if CDEF_CAP
+  max = min = row;
+#endif
+
+  if (pri_strength) {
+    // Primary near taps
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[0] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+
+    // Primary far taps
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[1] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+
+#if CDEF_FULL
+    // Primary extra taps
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + po3]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + po3]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + po3]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + po3]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - po3]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - po3]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - po3]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - po3]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[2] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[2]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+#endif
+  }
+
+  if (sec_strength) {
+    // Secondary near taps
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p2 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o1]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o1]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o1]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p3 = constrain(tap, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
+    p0 = v128_add_8(p0, p1);
+    p2 = v128_add_8(p2, p3);
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]),
+                                         v256_from_v128(v128_ziphi_8(p0, p2),
+                                                        v128_ziplo_8(p0, p2))));
+
+    // Secondary far taps
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s1o2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s1o2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s1o2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s1o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s1o2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s1o2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s1o2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s1o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE + s2o2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE + s2o2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE + s2o2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE + s2o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p2 = constrain(tap, row, sec_strength, sec_damping);
+    tap = v256_from_v64(v64_load_unaligned(&in[0 * CDEF_BSTRIDE - s2o2]),
+                        v64_load_unaligned(&in[1 * CDEF_BSTRIDE - s2o2]),
+                        v64_load_unaligned(&in[2 * CDEF_BSTRIDE - s2o2]),
+                        v64_load_unaligned(&in[3 * CDEF_BSTRIDE - s2o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p3 = constrain(tap, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
+    p0 = v128_add_8(p0, p1);
+    p2 = v128_add_8(p2, p3);
+
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]),
+                                         v256_from_v128(v128_ziphi_8(p0, p2),
+                                                        v128_ziplo_8(p0, p2))));
+  }
+
+  // res = row + ((sum - (sum < 0) + 8) >> 4)
+  sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
+  res = v256_add_16(sum, v256_dup_16(8));
+  res = v256_shr_n_s16(res, 4);
+  res = v256_add_16(row, res);
+#if CDEF_CAP
+  res = v256_min_s16(v256_max_s16(res, min), max);
+#else
+  res = v256_min_s16(v256_max_s16(res, v256_zero()), v256_dup_16(max));
+#endif
+  res = v256_pack_s16_u8(res, res);
+
+  p0 = v256_low_v128(res);
+  u32_store_aligned(&dst[0 * dstride], v64_high_u32(v128_high_v64(p0)));
+  u32_store_aligned(&dst[1 * dstride], v64_low_u32(v128_high_v64(p0)));
+  u32_store_aligned(&dst[2 * dstride], v64_high_u32(v128_low_v64(p0)));
+  u32_store_aligned(&dst[3 * dstride], v64_low_u32(v128_low_v64(p0)));
+}
+
+#if CDEF_CAP
+void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
+                                        const uint16_t *in, int pri_strength,
+                                        int sec_strength, int dir,
+                                        int pri_damping, int sec_damping,
+                                        UNUSED int max_unused)
+#else
+void SIMD_FUNC(cdef_filter_block_8x8_8)(uint8_t *dst, int dstride,
+                                        const uint16_t *in, int pri_strength,
+                                        int sec_strength, int dir,
+                                        int pri_damping, int sec_damping,
+                                        int max)
+#endif
+{
+  int i;
+  v128 p0, p1, p2, p3;
+  v256 sum, row, res, tap;
+#if CDEF_CAP
+  v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+#endif
+  int po1 = cdef_directions[dir][0];
+  int po2 = cdef_directions[dir][1];
+#if CDEF_FULL
+  int po3 = cdef_directions[dir][2];
+#endif
+  int s1o1 = cdef_directions[(dir + 2) & 7][0];
+  int s1o2 = cdef_directions[(dir + 2) & 7][1];
+  int s2o1 = cdef_directions[(dir + 6) & 7][0];
+  int s2o2 = cdef_directions[(dir + 6) & 7][1];
+
+  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
+  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+
+  if (pri_strength)
+    pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
+  if (sec_strength)
+    sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
+  for (i = 0; i < 8; i += 2) {
+    sum = v256_zero();
+    row = v256_from_v128(v128_load_aligned(&in[i * CDEF_BSTRIDE]),
+                         v128_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
+
+#if CDEF_CAP
+    max = min = row;
+#endif
+    // Primary near taps
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[0] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[0]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+
+    // Primary far taps
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[1] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[1]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+
+#if CDEF_FULL
+    // Primary extra taps
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + po3]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po3]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, pri_strength, pri_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - po3]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po3]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[2] * (p0 + p1)
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(pri_taps[2]),
+                                         v256_from_v128(v128_ziphi_8(p0, p1),
+                                                        v128_ziplo_8(p0, p1))));
+#endif
+
+    // Secondary near taps
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p2 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p3 = constrain(tap, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
+    p0 = v128_add_8(p0, p1);
+    p2 = v128_add_8(p2, p3);
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[0]),
+                                         v256_from_v128(v128_ziphi_8(p0, p2),
+                                                        v128_ziplo_8(p0, p2))));
+
+    // Secondary far taps
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p0 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p1 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p2 = constrain(tap, row, sec_strength, sec_damping);
+    tap =
+        v256_from_v128(v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
+                       v128_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
+#if CDEF_CAP
+    max = v256_max_s16(max, v256_andn(tap, v256_cmpeq_16(tap, large)));
+    min = v256_min_s16(min, tap);
+#endif
+    p3 = constrain(tap, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
+    p0 = v128_add_8(p0, p1);
+    p2 = v128_add_8(p2, p3);
+    sum = v256_add_16(sum, v256_madd_us8(v256_dup_8(sec_taps[1]),
+                                         v256_from_v128(v128_ziphi_8(p0, p2),
+                                                        v128_ziplo_8(p0, p2))));
+
+    // res = row + ((sum - (sum < 0) + 8) >> 4)
+    sum = v256_add_16(sum, v256_cmplt_s16(sum, v256_zero()));
+    res = v256_add_16(sum, v256_dup_16(8));
+    res = v256_shr_n_s16(res, 4);
+    res = v256_add_16(row, res);
+#if CDEF_CAP
+    res = v256_min_s16(v256_max_s16(res, min), max);
+#else
+    res = v256_min_s16(v256_max_s16(res, v256_zero()), v256_dup_16(max));
+#endif
+    res = v256_pack_s16_u8(res, res);
+
+    p0 = v256_low_v128(res);
+    v64_store_aligned(&dst[i * dstride], v128_high_v64(p0));
+    v64_store_aligned(&dst[(i + 1) * dstride], v128_low_v64(p0));
+  }
+}
+
+#if CDEF_CAP
+void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride,
+                                         const uint16_t *in, int pri_strength,
+                                         int sec_strength, int dir,
+                                         int pri_damping, int sec_damping,
+                                         UNUSED int max_unused)
+#else
+void SIMD_FUNC(cdef_filter_block_4x4_16)(uint16_t *dst, int dstride,
+                                         const uint16_t *in, int pri_strength,
+                                         int sec_strength, int dir,
+                                         int pri_damping, int sec_damping,
+                                         int max)
+#endif
+{
+  int i;
+  v128 p0, p1, p2, p3, sum, row, res;
+#if CDEF_CAP
+  v128 max, min, large = v128_dup_16(CDEF_VERY_LARGE);
+#endif
+  int po1 = cdef_directions[dir][0];
+  int po2 = cdef_directions[dir][1];
+#if CDEF_FULL
+  int po3 = cdef_directions[dir][2];
+#endif
+  int s1o1 = cdef_directions[(dir + 2) & 7][0];
+  int s1o2 = cdef_directions[(dir + 2) & 7][1];
+  int s2o1 = cdef_directions[(dir + 6) & 7][0];
+  int s2o2 = cdef_directions[(dir + 6) & 7][1];
+
+  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
+  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+
+  if (pri_strength)
+    pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
+  if (sec_strength)
+    sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
+  for (i = 0; i < 4; i += 2) {
+    sum = v128_zero();
+    row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
+                        v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
+#if CDEF_CAP
+    min = max = row;
+#endif
+
+    // Primary near taps
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po1]));
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po1]));
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[0] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[0]), v128_add_16(p0, p1)));
+
+    // Primary far taps
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po2]));
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po2]));
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[1] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[1]), v128_add_16(p0, p1)));
+
+#if CDEF_FULL
+    // Primary extra taps
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + po3]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + po3]));
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - po3]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - po3]));
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[2] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[2]), v128_add_16(p0, p1)));
+#endif
+
+    // Secondary near taps
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o1]));
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o1]));
+    p2 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o1]));
+    p3 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o1]));
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
+                     v128_andn(p3, v128_cmpeq_16(p3, large)));
+    min = v128_min_s16(
+        v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
+#endif
+    p0 = constrain16(p0, row, sec_strength, sec_damping);
+    p1 = constrain16(p1, row, sec_strength, sec_damping);
+    p2 = constrain16(p2, row, sec_strength, sec_damping);
+    p3 = constrain16(p3, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
+    sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[0]),
+                                          v128_add_16(v128_add_16(p0, p1),
+                                                      v128_add_16(p2, p3))));
+
+    // Secondary far taps
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s1o2]));
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s1o2]));
+    p2 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + s2o2]));
+    p3 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - s2o2]));
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
+                     v128_andn(p3, v128_cmpeq_16(p3, large)));
+    min = v128_min_s16(
+        v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
+#endif
+    p0 = constrain16(p0, row, sec_strength, sec_damping);
+    p1 = constrain16(p1, row, sec_strength, sec_damping);
+    p2 = constrain16(p2, row, sec_strength, sec_damping);
+    p3 = constrain16(p3, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
+    sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[1]),
+                                          v128_add_16(v128_add_16(p0, p1),
+                                                      v128_add_16(p2, p3))));
+
+    // res = row + ((sum - (sum < 0) + 8) >> 4)
+    sum = v128_add_16(sum, v128_cmplt_s16(sum, v128_zero()));
+    res = v128_add_16(sum, v128_dup_16(8));
+    res = v128_shr_n_s16(res, 4);
+    res = v128_add_16(row, res);
+#if CDEF_CAP
+    res = v128_min_s16(v128_max_s16(res, min), max);
+#else
+    res = v128_min_s16(v128_max_s16(res, v128_zero()), v128_dup_16(max));
+#endif
+    v64_store_aligned(&dst[i * dstride], v128_high_v64(res));
+    v64_store_aligned(&dst[(i + 1) * dstride], v128_low_v64(res));
+  }
+}
+
+#if CDEF_CAP
+void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride,
+                                         const uint16_t *in, int pri_strength,
+                                         int sec_strength, int dir,
+                                         int pri_damping, int sec_damping,
+                                         UNUSED int max_unused)
+#else
+void SIMD_FUNC(cdef_filter_block_8x8_16)(uint16_t *dst, int dstride,
+                                         const uint16_t *in, int pri_strength,
+                                         int sec_strength, int dir,
+                                         int pri_damping, int sec_damping,
+                                         int max)
+#endif
+{
+  int i;
+  v128 sum, p0, p1, p2, p3, row, res;
+#if CDEF_CAP
+  v128 max, min, large = v128_dup_16(CDEF_VERY_LARGE);
+#endif
+  int po1 = cdef_directions[dir][0];
+  int po2 = cdef_directions[dir][1];
+#if CDEF_FULL
+  int po3 = cdef_directions[dir][2];
+#endif
+  int s1o1 = cdef_directions[(dir + 2) & 7][0];
+  int s1o2 = cdef_directions[(dir + 2) & 7][1];
+  int s2o1 = cdef_directions[(dir + 6) & 7][0];
+  int s2o2 = cdef_directions[(dir + 6) & 7][1];
+
+  const int *pri_taps = cdef_pri_taps[pri_strength & 1];
+  const int *sec_taps = cdef_sec_taps[pri_strength & 1];
+
+  if (pri_strength)
+    pri_damping = AOMMAX(0, pri_damping - get_msb(pri_strength));
+  if (sec_strength)
+    sec_damping = AOMMAX(0, sec_damping - get_msb(sec_strength));
+
+  for (i = 0; i < 8; i++) {
+    sum = v128_zero();
+    row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
+
+#if CDEF_CAP
+    min = max = row;
+#endif
+    // Primary near taps
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po1]);
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po1]);
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[0] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[0]), v128_add_16(p0, p1)));
+
+    // Primary far taps
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po2]);
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po2]);
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[1] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[1]), v128_add_16(p0, p1)));
+
+#if CDEF_FULL
+    // Primary extra taps
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + po3]);
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - po3]);
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    min = v128_min_s16(v128_min_s16(min, p0), p1);
+#endif
+    p0 = constrain16(p0, row, pri_strength, pri_damping);
+    p1 = constrain16(p1, row, pri_strength, pri_damping);
+
+    // sum += pri_taps[2] * (p0 + p1)
+    sum = v128_add_16(
+        sum, v128_mullo_s16(v128_dup_16(pri_taps[2]), v128_add_16(p0, p1)));
+#endif
+
+    // Secondary near taps
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o1]);
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o1]);
+    p2 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o1]);
+    p3 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o1]);
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
+                     v128_andn(p3, v128_cmpeq_16(p3, large)));
+    min = v128_min_s16(
+        v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
+#endif
+    p0 = constrain16(p0, row, sec_strength, sec_damping);
+    p1 = constrain16(p1, row, sec_strength, sec_damping);
+    p2 = constrain16(p2, row, sec_strength, sec_damping);
+    p3 = constrain16(p3, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[0] * (p0 + p1 + p2 + p3)
+    sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[0]),
+                                          v128_add_16(v128_add_16(p0, p1),
+                                                      v128_add_16(p2, p3))));
+
+    // Secondary far taps
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s1o2]);
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s1o2]);
+    p2 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + s2o2]);
+    p3 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - s2o2]);
+#if CDEF_CAP
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p0, v128_cmpeq_16(p0, large))),
+                     v128_andn(p1, v128_cmpeq_16(p1, large)));
+    max =
+        v128_max_s16(v128_max_s16(max, v128_andn(p2, v128_cmpeq_16(p2, large))),
+                     v128_andn(p3, v128_cmpeq_16(p3, large)));
+    min = v128_min_s16(
+        v128_min_s16(v128_min_s16(v128_min_s16(min, p0), p1), p2), p3);
+#endif
+    p0 = constrain16(p0, row, sec_strength, sec_damping);
+    p1 = constrain16(p1, row, sec_strength, sec_damping);
+    p2 = constrain16(p2, row, sec_strength, sec_damping);
+    p3 = constrain16(p3, row, sec_strength, sec_damping);
+
+    // sum += sec_taps[1] * (p0 + p1 + p2 + p3)
+    sum = v128_add_16(sum, v128_mullo_s16(v128_dup_16(sec_taps[1]),
+                                          v128_add_16(v128_add_16(p0, p1),
+                                                      v128_add_16(p2, p3))));
+
+    // res = row + ((sum - (sum < 0) + 8) >> 4)
+    sum = v128_add_16(sum, v128_cmplt_s16(sum, v128_zero()));
+    res = v128_add_16(sum, v128_dup_16(8));
+    res = v128_shr_n_s16(res, 4);
+    res = v128_add_16(row, res);
+#if CDEF_CAP
+    res = v128_min_s16(v128_max_s16(res, min), max);
+#else
+    res = v128_min_s16(v128_max_s16(res, v128_zero()), v128_dup_16(max));
+#endif
+    v128_store_unaligned(&dst[i * dstride], res);
+  }
+}
+
+void SIMD_FUNC(cdef_filter_block)(uint8_t *dst8, uint16_t *dst16, int dstride,
+                                  const uint16_t *in, int pri_strength,
+                                  int sec_strength, int dir, int pri_damping,
+                                  int sec_damping, int bsize, int max) {
+  if (dst8)
+    (bsize == BLOCK_8X8 ? SIMD_FUNC(cdef_filter_block_8x8_8)
+                        : SIMD_FUNC(cdef_filter_block_4x4_8))(
+        dst8, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+        sec_damping, max);
+  else
+    (bsize == BLOCK_8X8 ? SIMD_FUNC(cdef_filter_block_8x8_16)
+                        : SIMD_FUNC(cdef_filter_block_4x4_16))(
+        dst16, dstride, in, pri_strength, sec_strength, dir, pri_damping,
+        sec_damping, max);
+}
+
+#else
+
+void SIMD_FUNC(cdef_direction_4x4)(uint16_t *y, int ystride, const uint16_t *in,
+                                   int threshold, int dir, int damping) {
+  int i;
+  v128 p0, p1, sum, row, res;
+  int o1 = cdef_directions[dir][0];
+  int o2 = cdef_directions[dir][1];
+
+  if (threshold) damping -= get_msb(threshold);
+  for (i = 0; i < 4; i += 2) {
+    sum = v128_zero();
+    row = v128_from_v64(v64_load_aligned(&in[i * CDEF_BSTRIDE]),
+                        v64_load_aligned(&in[(i + 1) * CDEF_BSTRIDE]));
+
+    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o1]));
+    p0 = constrain16(p0, row, threshold, damping);
+
+    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o1]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o1]));
+    p1 = constrain16(p1, row, threshold, damping);
+
+    // sum += 4 * (p0 + p1)
+    sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
+
+    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+    p0 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE + o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE + o2]));
+    p0 = constrain16(p0, row, threshold, damping);
+
+    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+    p1 = v128_from_v64(v64_load_unaligned(&in[i * CDEF_BSTRIDE - o2]),
+                       v64_load_unaligned(&in[(i + 1) * CDEF_BSTRIDE - o2]));
+    p1 = constrain16(p1, row, threshold, damping);
+
+    // sum += 1 * (p0 + p1)
+    sum = v128_add_16(sum, v128_add_16(p0, p1));
+
+    // res = row + ((sum + 8) >> 4)
+    res = v128_add_16(sum, v128_dup_16(8));
+    res = v128_shr_n_s16(res, 4);
+    res = v128_add_16(row, res);
+    v64_store_aligned(&y[i * ystride], v128_high_v64(res));
+    v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
+  }
+}
+
+void SIMD_FUNC(cdef_direction_8x8)(uint16_t *y, int ystride, const uint16_t *in,
+                                   int threshold, int dir, int damping) {
+  int i;
+  v128 sum, p0, p1, row, res;
+  int o1 = cdef_directions[dir][0];
+  int o2 = cdef_directions[dir][1];
+  int o3 = cdef_directions[dir][2];
+
+  if (threshold) damping -= get_msb(threshold);
+  for (i = 0; i < 8; i++) {
+    sum = v128_zero();
+    row = v128_load_aligned(&in[i * CDEF_BSTRIDE]);
+
+    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o1]);
+    p0 = constrain16(p0, row, threshold, damping);
+
+    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o1]);
+    p1 = constrain16(p1, row, threshold, damping);
+
+    // sum += 3 * (p0 + p1)
+    p0 = v128_add_16(p0, p1);
+    p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
+    sum = v128_add_16(sum, p0);
+
+    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o2]);
+    p0 = constrain16(p0, row, threshold, damping);
+
+    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o2]);
+    p1 = constrain16(p1, row, threshold, damping);
+
+    // sum += 2 * (p0 + p1)
+    p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
+    sum = v128_add_16(sum, p0);
+
+    // p0 = constrain16(in[i*CDEF_BSTRIDE + offset], row, threshold, damping)
+    p0 = v128_load_unaligned(&in[i * CDEF_BSTRIDE + o3]);
+    p0 = constrain16(p0, row, threshold, damping);
+
+    // p1 = constrain16(in[i*CDEF_BSTRIDE - offset], row, threshold, damping)
+    p1 = v128_load_unaligned(&in[i * CDEF_BSTRIDE - o3]);
+    p1 = constrain16(p1, row, threshold, damping);
+
+    // sum += (p0 + p1)
+    p0 = v128_add_16(p0, p1);
+    sum = v128_add_16(sum, p0);
+
+    // res = row + ((sum + 8) >> 4)
+    res = v128_add_16(sum, v128_dup_16(8));
+    res = v128_shr_n_s16(res, 4);
+    res = v128_add_16(row, res);
+    v128_store_unaligned(&y[i * ystride], res);
+  }
+}
+
+void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
+                                       const uint16_t *src, int sstride) {
+  int i;
+  for (i = 0; i < 8; i++) {
+    v128 row = v128_load_unaligned(&src[i * sstride]);
+    row = v128_pack_s16_u8(row, row);
+    v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
+  }
+}
+
+void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
+                                       const uint16_t *src, int sstride) {
+  int i;
+  for (i = 0; i < 4; i++) {
+    v128 row = v128_load_unaligned(&src[i * sstride]);
+    row = v128_pack_s16_u8(row, row);
+    u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
+  }
+}
+
+void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
+                                        const uint16_t *src, int sstride) {
+  int i;
+  for (i = 0; i < 8; i++) {
+    v128 row = v128_load_unaligned(&src[i * sstride]);
+    v128_store_unaligned(&dst[i * dstride], row);
+  }
+}
+
+void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
+                                        const uint16_t *src, int sstride) {
+  int i;
+  for (i = 0; i < 4; i++) {
+    v64 row = v64_load_unaligned(&src[i * sstride]);
+    v64_store_unaligned(&dst[i * dstride], row);
+  }
+}
+#endif
+
+void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
+                                         const uint8_t *src, int sstride, int v,
+                                         int h) {
+  int i, j;
+  for (i = 0; i < v; i++) {
+    for (j = 0; j < (h & ~0x7); j += 8) {
+      v64 row = v64_load_unaligned(&src[i * sstride + j]);
+      v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
+    }
+    for (; j < h; j++) {
+      dst[i * dstride + j] = src[i * sstride + j];
+    }
+  }
+}
+
+void SIMD_FUNC(copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
+                                          const uint16_t *src, int sstride,
+                                          int v, int h) {
+  int i, j;
+  for (i = 0; i < v; i++) {
+    for (j = 0; j < (h & ~0x7); j += 8) {
+      v128 row = v128_load_unaligned(&src[i * sstride + j]);
+      v128_store_unaligned(&dst[i * dstride + j], row);
+    }
+    for (; j < h; j++) {
+      dst[i * dstride + j] = src[i * sstride + j];
+    }
+  }
+}
diff --git a/third_party/aom/av1/common/od_dering_sse2.c b/third_party/aom/av1/common/cdef_block_sse2.c
index 8a2a62f6c..f3de763fa 100644
--- a/third_party/aom/av1/common/od_dering_sse2.c
+++ b/third_party/aom/av1/common/cdef_block_sse2.c
@@ -11,4 +11,4 @@
 
 #include "aom_dsp/aom_simd.h"
 #define SIMD_FUNC(name) name##_sse2
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/od_dering_sse4.c b/third_party/aom/av1/common/cdef_block_sse4.c
index 0769db9fd..27e9ff32e 100644
--- a/third_party/aom/av1/common/od_dering_sse4.c
+++ b/third_party/aom/av1/common/cdef_block_sse4.c
@@ -11,4 +11,4 @@
 
 #include "aom_dsp/aom_simd.h"
 #define SIMD_FUNC(name) name##_sse4_1
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/od_dering_ssse3.c b/third_party/aom/av1/common/cdef_block_ssse3.c
index 99df62b6b..863522199 100644
--- a/third_party/aom/av1/common/od_dering_ssse3.c
+++ b/third_party/aom/av1/common/cdef_block_ssse3.c
@@ -11,4 +11,4 @@
 
 #include "aom_dsp/aom_simd.h"
 #define SIMD_FUNC(name) name##_ssse3
-#include "./od_dering_simd.h"
+#include "./cdef_block_simd.h"
diff --git a/third_party/aom/av1/common/cdef_simd.h b/third_party/aom/av1/common/cdef_simd.h
deleted file mode 100644
index 2649099a2..000000000
--- a/third_party/aom/av1/common/cdef_simd.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#ifndef AV1_COMMON_CDEF_SIMD_H_
-#define AV1_COMMON_CDEF_SIMD_H_
-
-#include "aom_dsp/aom_simd.h"
-
-// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
-SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
-                             unsigned int adjdamp) {
-  v128 diff = v128_sub_16(a, b);
-  const v128 sign = v128_shr_n_s16(diff, 15);
-  diff = v128_abs_s16(diff);
-  const v128 s =
-      v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
-  return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
-}
-
-#endif  // AV1_COMMON_CDEF_SIMD_H_
diff --git a/third_party/aom/av1/common/cfl.c b/third_party/aom/av1/common/cfl.c
index 7c88dd0c8..f9acfcbc9 100644
--- a/third_party/aom/av1/common/cfl.c
+++ b/third_party/aom/av1/common/cfl.c
@@ -13,117 +13,148 @@
 #include "av1/common/common_data.h"
 #include "av1/common/onyxc_int.h"
 
-#include "aom/internal/aom_codec_internal.h"
-
 void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
   if (!((cm->subsampling_x == 0 && cm->subsampling_y == 0) ||
         (cm->subsampling_x == 1 && cm->subsampling_y == 1))) {
     aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
                        "Only 4:4:4 and 4:2:0 are currently supported by CfL");
   }
-  memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
+  memset(&cfl->pred_buf_q3, 0, sizeof(cfl->pred_buf_q3));
   cfl->subsampling_x = cm->subsampling_x;
   cfl->subsampling_y = cm->subsampling_y;
   cfl->are_parameters_computed = 0;
+  cfl->store_y = 0;
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+  cfl_clear_sub8x8_val(cfl);
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
 }
 
-// Load from the CfL pixel buffer into output
-static void cfl_load(CFL_CTX *cfl, int row, int col, int width, int height) {
-  const int sub_x = cfl->subsampling_x;
-  const int sub_y = cfl->subsampling_y;
-  const int off_log2 = tx_size_wide_log2[0];
-
-  // TODO(ltrudeau) convert to uint16 to add HBD support
-  const uint8_t *y_pix;
-  // TODO(ltrudeau) convert to uint16 to add HBD support
-  uint8_t *output = cfl->y_down_pix;
-
-  int pred_row_offset = 0;
-  int output_row_offset = 0;
-
-  // TODO(ltrudeau) should be faster to downsample when we store the values
-  // TODO(ltrudeau) add support for 4:2:2
-  if (sub_y == 0 && sub_x == 0) {
-    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2];
-    for (int j = 0; j < height; j++) {
-      for (int i = 0; i < width; i++) {
-        // In 4:4:4, pixels match 1 to 1
-        output[output_row_offset + i] = y_pix[pred_row_offset + i];
-      }
-      pred_row_offset += MAX_SB_SIZE;
-      output_row_offset += MAX_SB_SIZE;
-    }
-  } else if (sub_y == 1 && sub_x == 1) {
-    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
-    for (int j = 0; j < height; j++) {
-      for (int i = 0; i < width; i++) {
-        int top_left = (pred_row_offset + i) << sub_y;
-        int bot_left = top_left + MAX_SB_SIZE;
-        // In 4:2:0, average pixels in 2x2 grid
-        output[output_row_offset + i] = OD_SHR_ROUND(
-            y_pix[top_left] + y_pix[top_left + 1]        // Top row
-                + y_pix[bot_left] + y_pix[bot_left + 1]  // Bottom row
-            ,
-            2);
-      }
-      pred_row_offset += MAX_SB_SIZE;
-      output_row_offset += MAX_SB_SIZE;
-    }
-  } else {
-    assert(0);  // Unsupported chroma subsampling
-  }
-  // Due to frame boundary issues, it is possible that the total area of
-  // covered by Chroma exceeds that of Luma. When this happens, we write over
-  // the broken data by repeating the last columns and/or rows.
-  //
-  // Note that in order to manage the case where both rows and columns
-  // overrun,
-  // we apply rows first. This way, when the rows overrun the bottom of the
-  // frame, the columns will be copied over them.
-  const int uv_width = (col << off_log2) + width;
-  const int uv_height = (row << off_log2) + height;
-
-  const int diff_width = uv_width - (cfl->y_width >> sub_x);
-  const int diff_height = uv_height - (cfl->y_height >> sub_y);
+// Due to frame boundary issues, it is possible that the total area covered by
+// chroma exceeds that of luma. When this happens, we fill the missing pixels by
+// repeating the last columns and/or rows.
+static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
+  const int diff_width = width - cfl->buf_width;
+  const int diff_height = height - cfl->buf_height;
 
   if (diff_width > 0) {
-    int last_pixel;
-    output_row_offset = width - diff_width;
-
-    for (int j = 0; j < height; j++) {
-      last_pixel = output_row_offset - 1;
+    const int min_height = height - diff_height;
+    int16_t *pred_buf_q3 = cfl->pred_buf_q3 + (width - diff_width);
+    for (int j = 0; j < min_height; j++) {
+      const int last_pixel = pred_buf_q3[-1];
       for (int i = 0; i < diff_width; i++) {
-        output[output_row_offset + i] = output[last_pixel];
+        pred_buf_q3[i] = last_pixel;
       }
-      output_row_offset += MAX_SB_SIZE;
+      pred_buf_q3 += MAX_SB_SIZE;
     }
+    cfl->buf_width = width;
   }
-
   if (diff_height > 0) {
-    output_row_offset = (height - diff_height) * MAX_SB_SIZE;
-    const int last_row_offset = output_row_offset - MAX_SB_SIZE;
-
+    int16_t *pred_buf_q3 =
+        cfl->pred_buf_q3 + ((height - diff_height) * MAX_SB_SIZE);
     for (int j = 0; j < diff_height; j++) {
+      const int16_t *last_row_q3 = pred_buf_q3 - MAX_SB_SIZE;
       for (int i = 0; i < width; i++) {
-        output[output_row_offset + i] = output[last_row_offset + i];
+        pred_buf_q3[i] = last_row_q3[i];
       }
-      output_row_offset += MAX_SB_SIZE;
+      pred_buf_q3 += MAX_SB_SIZE;
     }
+    cfl->buf_height = height;
   }
 }
 
-// CfL computes its own block-level DC_PRED. This is required to compute both
-// alpha_cb and alpha_cr before the prediction are computed.
-static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
+static void sum_above_row_lbd(const uint8_t *above_u, const uint8_t *above_v,
+                              int width, int *out_sum_u, int *out_sum_v) {
+  int sum_u = 0;
+  int sum_v = 0;
+  for (int i = 0; i < width; i++) {
+    sum_u += above_u[i];
+    sum_v += above_v[i];
+  }
+  *out_sum_u += sum_u;
+  *out_sum_v += sum_v;
+}
+#if CONFIG_HIGHBITDEPTH
+static void sum_above_row_hbd(const uint16_t *above_u, const uint16_t *above_v,
+                              int width, int *out_sum_u, int *out_sum_v) {
+  int sum_u = 0;
+  int sum_v = 0;
+  for (int i = 0; i < width; i++) {
+    sum_u += above_u[i];
+    sum_v += above_v[i];
+  }
+  *out_sum_u += sum_u;
+  *out_sum_v += sum_v;
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+static void sum_above_row(const MACROBLOCKD *xd, int width, int *out_sum_u,
+                          int *out_sum_v) {
   const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
   const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
+#if CONFIG_HIGHBITDEPTH
+  if (get_bitdepth_data_path_index(xd)) {
+    const uint16_t *above_u_16 =
+        CONVERT_TO_SHORTPTR(pd_u->dst.buf) - pd_u->dst.stride;
+    const uint16_t *above_v_16 =
+        CONVERT_TO_SHORTPTR(pd_v->dst.buf) - pd_v->dst.stride;
+    sum_above_row_hbd(above_u_16, above_v_16, width, out_sum_u, out_sum_v);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  const uint8_t *above_u = pd_u->dst.buf - pd_u->dst.stride;
+  const uint8_t *above_v = pd_v->dst.buf - pd_v->dst.stride;
+  sum_above_row_lbd(above_u, above_v, width, out_sum_u, out_sum_v);
+}
 
-  const uint8_t *const dst_u = pd_u->dst.buf;
-  const uint8_t *const dst_v = pd_v->dst.buf;
+static void sum_left_col_lbd(const uint8_t *left_u, int u_stride,
+                             const uint8_t *left_v, int v_stride, int height,
+                             int *out_sum_u, int *out_sum_v) {
+  int sum_u = 0;
+  int sum_v = 0;
+  for (int i = 0; i < height; i++) {
+    sum_u += left_u[i * u_stride];
+    sum_v += left_v[i * v_stride];
+  }
+  *out_sum_u += sum_u;
+  *out_sum_v += sum_v;
+}
+#if CONFIG_HIGHBITDEPTH
+static void sum_left_col_hbd(const uint16_t *left_u, int u_stride,
+                             const uint16_t *left_v, int v_stride, int height,
+                             int *out_sum_u, int *out_sum_v) {
+  int sum_u = 0;
+  int sum_v = 0;
+  for (int i = 0; i < height; i++) {
+    sum_u += left_u[i * u_stride];
+    sum_v += left_v[i * v_stride];
+  }
+  *out_sum_u += sum_u;
+  *out_sum_v += sum_v;
+}
+#endif  // CONFIG_HIGHBITDEPTH
+static void sum_left_col(const MACROBLOCKD *xd, int height, int *out_sum_u,
+                         int *out_sum_v) {
+  const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
+  const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
 
-  const int dst_u_stride = pd_u->dst.stride;
-  const int dst_v_stride = pd_v->dst.stride;
+#if CONFIG_HIGHBITDEPTH
+  if (get_bitdepth_data_path_index(xd)) {
+    const uint16_t *left_u_16 = CONVERT_TO_SHORTPTR(pd_u->dst.buf) - 1;
+    const uint16_t *left_v_16 = CONVERT_TO_SHORTPTR(pd_v->dst.buf) - 1;
+    sum_left_col_hbd(left_u_16, pd_u->dst.stride, left_v_16, pd_v->dst.stride,
+                     height, out_sum_u, out_sum_v);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  const uint8_t *left_u = pd_u->dst.buf - 1;
+  const uint8_t *left_v = pd_v->dst.buf - 1;
+  sum_left_col_lbd(left_u, pd_u->dst.stride, left_v, pd_v->dst.stride, height,
+                   out_sum_u, out_sum_v);
+}
 
+// CfL computes its own block-level DC_PRED. This is required to compute both
+// alpha_cb and alpha_cr before the prediction are computed.
+static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
   CFL_CTX *const cfl = xd->cfl;
 
   // Compute DC_PRED until block boundary. We can't assume the neighbor will use
@@ -138,14 +169,13 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
   int sum_u = 0;
   int sum_v = 0;
 
-// Match behavior of build_intra_predictors (reconintra.c) at superblock
+// Match behavior of build_intra_predictors_high (reconintra.c) at superblock
 // boundaries:
-//
-// 127 127 127 .. 127 127 127 127 127 127
-// 129  A   B  ..  Y   Z
-// 129  C   D  ..  W   X
-// 129  E   F  ..  U   V
-// 129  G   H  ..  S   T   T   T   T   T
+// base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
+// base+1   A      B  ..     Y      Z
+// base+1   C      D  ..     W      X
+// base+1   E      F  ..     U      V
+// base+1   G      H  ..     S      T      T      T      T      T
 // ..
 
 #if CONFIG_CHROMA_SUB8X8
@@ -153,14 +183,11 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
 #else
   if (xd->up_available && xd->mb_to_right_edge >= 0) {
 #endif
-    // TODO(ltrudeau) replace this with DC_PRED assembly
-    for (int i = 0; i < width; i++) {
-      sum_u += dst_u[-dst_u_stride + i];
-      sum_v += dst_v[-dst_v_stride + i];
-    }
+    sum_above_row(xd, width, &sum_u, &sum_v);
   } else {
-    sum_u = width * 127;
-    sum_v = width * 127;
+    const int base = 128 << (xd->bd - 8);
+    sum_u = width * (base - 1);
+    sum_v = width * (base - 1);
   }
 
 #if CONFIG_CHROMA_SUB8X8
@@ -168,13 +195,11 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
 #else
   if (xd->left_available && xd->mb_to_bottom_edge >= 0) {
 #endif
-    for (int i = 0; i < height; i++) {
-      sum_u += dst_u[i * dst_u_stride - 1];
-      sum_v += dst_v[i * dst_v_stride - 1];
-    }
+    sum_left_col(xd, height, &sum_u, &sum_v);
   } else {
-    sum_u += height * 129;
-    sum_v += height * 129;
+    const int base = 128 << (xd->bd - 8);
+    sum_u += height * (base + 1);
+    sum_v += height * (base + 1);
   }
 
   // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
@@ -183,64 +208,103 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
   cfl->dc_pred[CFL_PRED_V] = (sum_v + (num_pel >> 1)) / num_pel;
 }
 
-static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
+static void cfl_subtract_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
   const int width = cfl->uv_width;
   const int height = cfl->uv_height;
   const int tx_height = tx_size_high[tx_size];
   const int tx_width = tx_size_wide[tx_size];
-  const int stride = width >> tx_size_wide_log2[tx_size];
   const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
   const int num_pel_log2 =
       (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
 
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  const uint8_t *y_pix = cfl->y_down_pix;
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  const uint8_t *t_y_pix;
-  int *averages_q3 = cfl->y_averages_q3;
+  int16_t *pred_buf_q3 = cfl->pred_buf_q3;
 
-  cfl_load(cfl, 0, 0, width, height);
+  cfl_pad(cfl, width, height);
 
-  int a = 0;
   for (int b_j = 0; b_j < height; b_j += tx_height) {
     for (int b_i = 0; b_i < width; b_i += tx_width) {
-      int sum = 0;
-      t_y_pix = y_pix;
+      int sum_q3 = 0;
+      int16_t *tx_pred_buf_q3 = pred_buf_q3;
       for (int t_j = 0; t_j < tx_height; t_j++) {
         for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
-          sum += t_y_pix[t_i];
+          sum_q3 += tx_pred_buf_q3[t_i];
         }
-        t_y_pix += MAX_SB_SIZE;
+        tx_pred_buf_q3 += MAX_SB_SIZE;
       }
-      averages_q3[a++] =
-          ((sum << 3) + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
-
+      int avg_q3 = (sum_q3 + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
       // Loss is never more than 1/2 (in Q3)
-      assert(fabs((double)averages_q3[a - 1] -
-                  (sum / ((double)(1 << num_pel_log2))) * (1 << 3)) <= 0.5);
+      assert(fabs((double)avg_q3 - (sum_q3 / ((double)(1 << num_pel_log2)))) <=
+             0.5);
+
+      tx_pred_buf_q3 = pred_buf_q3;
+      for (int t_j = 0; t_j < tx_height; t_j++) {
+        for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
+          tx_pred_buf_q3[t_i] -= avg_q3;
+        }
+
+        tx_pred_buf_q3 += MAX_SB_SIZE;
+      }
     }
-    assert(a % stride == 0);
-    y_pix += block_row_stride;
+    pred_buf_q3 += block_row_stride;
   }
-
-  cfl->y_averages_stride = stride;
-  assert(a <= MAX_NUM_TXB);
 }
 
-static INLINE int cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
+static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
                                    CFL_PRED_TYPE pred_type) {
-  const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
-  const int abs_alpha_q3 = cfl_alpha_mags_q3[mag_idx];
-  if (alpha_sign == CFL_SIGN_POS) {
-    return abs_alpha_q3;
-  } else {
-    assert(abs_alpha_q3 != 0);
-    assert(cfl_alpha_mags_q3[mag_idx + 1] == -abs_alpha_q3);
-    return -abs_alpha_q3;
+  const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
+                                                   : CFL_SIGN_V(joint_sign);
+  if (alpha_sign == CFL_SIGN_ZERO) return 0;
+  const int abs_alpha_q3 =
+      (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx);
+  return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
+}
+
+static void cfl_build_prediction_lbd(const int16_t *pred_buf_q3, uint8_t *dst,
+                                     int dst_stride, int width, int height,
+                                     int alpha_q3, int dc_pred) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      dst[i] =
+          clip_pixel(get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred);
+    }
+    dst += dst_stride;
+    pred_buf_q3 += MAX_SB_SIZE;
   }
 }
 
-// Predict the current transform block using CfL.
+#if CONFIG_HIGHBITDEPTH
+static void cfl_build_prediction_hbd(const int16_t *pred_buf_q3, uint16_t *dst,
+                                     int dst_stride, int width, int height,
+                                     int alpha_q3, int dc_pred, int bit_depth) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      dst[i] = clip_pixel_highbd(
+          get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred, bit_depth);
+    }
+    dst += dst_stride;
+    pred_buf_q3 += MAX_SB_SIZE;
+  }
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+static void cfl_build_prediction(const int16_t *pred_buf_q3, uint8_t *dst,
+                                 int dst_stride, int width, int height,
+                                 int alpha_q3, int dc_pred, int use_hbd,
+                                 int bit_depth) {
+#if CONFIG_HIGHBITDEPTH
+  if (use_hbd) {
+    uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
+    cfl_build_prediction_hbd(pred_buf_q3, dst_16, dst_stride, width, height,
+                             alpha_q3, dc_pred, bit_depth);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  (void)use_hbd;
+  (void)bit_depth;
+  cfl_build_prediction_lbd(pred_buf_q3, dst, dst_stride, width, height,
+                           alpha_q3, dc_pred);
+}
+
 void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
                        int row, int col, TX_SIZE tx_size, int plane) {
   CFL_CTX *const cfl = xd->cfl;
@@ -249,74 +313,112 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
   // CfL parameters must be computed before prediction can be done.
   assert(cfl->are_parameters_computed == 1);
 
-  const int width = tx_size_wide[tx_size];
-  const int height = tx_size_high[tx_size];
-  // TODO(ltrudeau) Convert to uint16 to support HBD
-  const uint8_t *y_pix = cfl->y_down_pix;
+  const int16_t *pred_buf_q3 =
+      cfl->pred_buf_q3 + ((row * MAX_SB_SIZE + col) << tx_size_wide_log2[0]);
+  const int alpha_q3 =
+      cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
 
-  const int dc_pred = cfl->dc_pred[plane - 1];
-  const int alpha_q3 = cfl_idx_to_alpha(
-      mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
-
-  const int avg_row =
-      (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
-  const int avg_col =
-      (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
-  const int avg_q3 =
-      cfl->y_averages_q3[cfl->y_averages_stride * avg_row + avg_col];
+  cfl_build_prediction(pred_buf_q3, dst, dst_stride, tx_size_wide[tx_size],
+                       tx_size_high[tx_size], alpha_q3, cfl->dc_pred[plane - 1],
+                       get_bitdepth_data_path_index(xd), xd->bd);
+}
 
-  cfl_load(cfl, row, col, width, height);
+static void cfl_luma_subsampling_420_lbd(const uint8_t *input, int input_stride,
+                                         int16_t *output_q3, int width,
+                                         int height) {
   for (int j = 0; j < height; j++) {
     for (int i = 0; i < width; i++) {
-      // TODO(ltrudeau) add support for HBD.
-      dst[i] =
-          clip_pixel(get_scaled_luma_q0(alpha_q3, y_pix[i], avg_q3) + dc_pred);
+      int top = i << 1;
+      int bot = top + input_stride;
+      output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
+                     << 1;
     }
-    dst += dst_stride;
-    y_pix += MAX_SB_SIZE;
+    input += input_stride << 1;
+    output_q3 += MAX_SB_SIZE;
   }
 }
 
-void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
-               int col, TX_SIZE tx_size, BLOCK_SIZE bsize) {
-  const int tx_width = tx_size_wide[tx_size];
-  const int tx_height = tx_size_high[tx_size];
-  const int tx_off_log2 = tx_size_wide_log2[0];
+static void cfl_luma_subsampling_444_lbd(const uint8_t *input, int input_stride,
+                                         int16_t *output_q3, int width,
+                                         int height) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      output_q3[i] = input[i] << 3;
+    }
+    input += input_stride;
+    output_q3 += MAX_SB_SIZE;
+  }
+}
 
-#if CONFIG_CHROMA_SUB8X8
-  if (bsize < BLOCK_8X8) {
-    // Transform cannot be smaller than
-    assert(tx_width >= 4);
-    assert(tx_height >= 4);
-
-    const int bw = block_size_wide[bsize];
-    const int bh = block_size_high[bsize];
-
-    // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than
-    // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we
-    // combine the 4X4 non chroma reference into the CfL pixel buffers based on
-    // their row and column index.
-
-    // The following code is adapted from the is_chroma_reference() function.
-    if ((cfl->mi_row &
-         0x01)        // Increment the row index for odd indexed 4X4 blocks
-        && (bh == 4)  // But not for 4X8 blocks
-        && cfl->subsampling_y) {  // And only when chroma is subsampled
-      assert(row == 0);
-      row++;
+#if CONFIG_HIGHBITDEPTH
+static void cfl_luma_subsampling_420_hbd(const uint16_t *input,
+                                         int input_stride, int16_t *output_q3,
+                                         int width, int height) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      int top = i << 1;
+      int bot = top + input_stride;
+      output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
+                     << 1;
     }
+    input += input_stride << 1;
+    output_q3 += MAX_SB_SIZE;
+  }
+}
 
-    if ((cfl->mi_col &
-         0x01)        // Increment the col index for odd indexed 4X4 blocks
-        && (bw == 4)  // But not for 8X4 blocks
-        && cfl->subsampling_x) {  // And only when chroma is subsampled
-      assert(col == 0);
-      col++;
+static void cfl_luma_subsampling_444_hbd(const uint16_t *input,
+                                         int input_stride, int16_t *output_q3,
+                                         int width, int height) {
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      output_q3[i] = input[i] << 3;
     }
+    input += input_stride;
+    output_q3 += MAX_SB_SIZE;
   }
-#else
-  (void)bsize;
-#endif
+}
+#endif  // CONFIG_HIGHBITDEPTH
+
+static void cfl_luma_subsampling_420(const uint8_t *input, int input_stride,
+                                     int16_t *output_q3, int width, int height,
+                                     int use_hbd) {
+#if CONFIG_HIGHBITDEPTH
+  if (use_hbd) {
+    const uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
+    cfl_luma_subsampling_420_hbd(input_16, input_stride, output_q3, width,
+                                 height);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  (void)use_hbd;
+  cfl_luma_subsampling_420_lbd(input, input_stride, output_q3, width, height);
+}
+
+static void cfl_luma_subsampling_444(const uint8_t *input, int input_stride,
+                                     int16_t *output_q3, int width, int height,
+                                     int use_hbd) {
+#if CONFIG_HIGHBITDEPTH
+  if (use_hbd) {
+    uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
+    cfl_luma_subsampling_444_hbd(input_16, input_stride, output_q3, width,
+                                 height);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  (void)use_hbd;
+  cfl_luma_subsampling_444_lbd(input, input_stride, output_q3, width, height);
+}
+
+static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input,
+                             int input_stride, int row, int col, int width,
+                             int height, int use_hbd) {
+  const int tx_off_log2 = tx_size_wide_log2[0];
+  const int sub_x = cfl->subsampling_x;
+  const int sub_y = cfl->subsampling_y;
+  const int store_row = row << (tx_off_log2 - sub_y);
+  const int store_col = col << (tx_off_log2 - sub_x);
+  const int store_height = height >> sub_y;
+  const int store_width = width >> sub_x;
 
   // Invalidate current parameters
   cfl->are_parameters_computed = 0;
@@ -325,29 +427,109 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
   // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
   // frame boundary)
   if (col == 0 && row == 0) {
-    cfl->y_width = tx_width;
-    cfl->y_height = tx_height;
+    cfl->buf_width = store_width;
+    cfl->buf_height = store_height;
   } else {
-    cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width);
-    cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
+    cfl->buf_width = OD_MAXI(store_col + store_width, cfl->buf_width);
+    cfl->buf_height = OD_MAXI(store_row + store_height, cfl->buf_height);
   }
 
   // Check that we will remain inside the pixel buffer.
-  assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE);
-  assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE);
+  assert(store_row + store_height <= MAX_SB_SIZE);
+  assert(store_col + store_width <= MAX_SB_SIZE);
 
   // Store the input into the CfL pixel buffer
-  uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2];
+  int16_t *pred_buf_q3 =
+      cfl->pred_buf_q3 + (store_row * MAX_SB_SIZE + store_col);
 
-  // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
-  for (int j = 0; j < tx_height; j++) {
-    for (int i = 0; i < tx_width; i++) {
-      y_pix[i] = input[i];
+  if (sub_y == 0 && sub_x == 0) {
+    cfl_luma_subsampling_444(input, input_stride, pred_buf_q3, store_width,
+                             store_height, use_hbd);
+  } else if (sub_y == 1 && sub_x == 1) {
+    cfl_luma_subsampling_420(input, input_stride, pred_buf_q3, store_width,
+                             store_height, use_hbd);
+  } else {
+    // TODO(ltrudeau) add support for 4:2:2
+    assert(0);  // Unsupported chroma subsampling
+  }
+}
+
+#if CONFIG_CHROMA_SUB8X8
+// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced
+// and non-chroma-referenced blocks are stored together in the CfL buffer.
+static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
+                                        int *col_out) {
+  // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s.
+  if ((cfl->mi_row & 0x01) && cfl->subsampling_y) {
+    assert(*row_out == 0);
+    (*row_out)++;
+  }
+
+  // Increment col index for right: 4x8, 4x16 or both right 4x4s.
+  if ((cfl->mi_col & 0x01) && cfl->subsampling_x) {
+    assert(*col_out == 0);
+    (*col_out)++;
+  }
+}
+#if CONFIG_DEBUG
+static INLINE void sub8x8_set_val(CFL_CTX *cfl, int row, int col, int val_high,
+                                  int val_wide) {
+  for (int val_r = 0; val_r < val_high; val_r++) {
+    assert(row + val_r < CFL_SUB8X8_VAL_MI_SIZE);
+    int row_off = (row + val_r) * CFL_SUB8X8_VAL_MI_SIZE;
+    for (int val_c = 0; val_c < val_wide; val_c++) {
+      assert(col + val_c < CFL_SUB8X8_VAL_MI_SIZE);
+      assert(cfl->sub8x8_val[row_off + col + val_c] == 0);
+      cfl->sub8x8_val[row_off + col + val_c]++;
     }
-    y_pix += MAX_SB_SIZE;
-    input += input_stride;
   }
 }
+#endif  // CONFIG_DEBUG
+#endif  // CONFIG_CHROMA_SUB8X8
+
+void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
+                  BLOCK_SIZE bsize) {
+  CFL_CTX *const cfl = xd->cfl;
+  struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
+  uint8_t *dst =
+      &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
+  (void)bsize;
+#if CONFIG_CHROMA_SUB8X8
+
+  if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
+    // Only dimensions of size 4 can have an odd offset.
+    assert(!((col & 1) && tx_size_wide[tx_size] != 4));
+    assert(!((row & 1) && tx_size_high[tx_size] != 4));
+    sub8x8_adjust_offset(cfl, &row, &col);
+#if CONFIG_DEBUG
+    sub8x8_set_val(cfl, row, col, tx_size_high_unit[tx_size],
+                   tx_size_wide_unit[tx_size]);
+#endif  // CONFIG_DEBUG
+  }
+#endif
+  cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
+            tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
+}
+
+void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
+  CFL_CTX *const cfl = xd->cfl;
+  struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
+  int row = 0;
+  int col = 0;
+#if CONFIG_CHROMA_SUB8X8
+  bsize = AOMMAX(BLOCK_4X4, bsize);
+  if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
+    sub8x8_adjust_offset(cfl, &row, &col);
+#if CONFIG_DEBUG
+    sub8x8_set_val(cfl, row, col, mi_size_high[bsize], mi_size_wide[bsize]);
+#endif  // CONFIG_DEBUG
+  }
+#endif  // CONFIG_CHROMA_SUB8X8
+  const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
+  const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
+  cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height,
+            get_bitdepth_data_path_index(xd));
+}
 
 void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
   CFL_CTX *const cfl = xd->cfl;
@@ -359,6 +541,16 @@ void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
 #if CONFIG_CHROMA_SUB8X8
   const BLOCK_SIZE plane_bsize = AOMMAX(
       BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
+#if CONFIG_DEBUG
+  if (mbmi->sb_type < BLOCK_8X8) {
+    for (int val_r = 0; val_r < mi_size_high[mbmi->sb_type]; val_r++) {
+      for (int val_c = 0; val_c < mi_size_wide[mbmi->sb_type]; val_c++) {
+        assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] == 1);
+      }
+    }
+    cfl_clear_sub8x8_val(cfl);
+  }
+#endif  // CONFIG_DEBUG
 #else
   const BLOCK_SIZE plane_bsize =
       get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
@@ -368,17 +560,10 @@ void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
   cfl->uv_height =
       max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
 
-#if CONFIG_DEBUG
-  if (mbmi->sb_type >= BLOCK_8X8) {
-    assert(cfl->y_width <= cfl->uv_width << cfl->subsampling_x);
-    assert(cfl->y_height <= cfl->uv_height << cfl->subsampling_y);
-  }
-#endif
+  assert(cfl->buf_width <= cfl->uv_width);
+  assert(cfl->buf_height <= cfl->uv_height);
 
-  // Compute block-level DC_PRED for both chromatic planes.
-  // DC_PRED replaces beta in the linear model.
   cfl_dc_pred(xd, plane_bsize);
-  // Compute transform-level average on reconstructed luma input.
-  cfl_compute_averages(cfl, tx_size);
+  cfl_subtract_averages(cfl, tx_size);
   cfl->are_parameters_computed = 1;
 }
diff --git a/third_party/aom/av1/common/cfl.h b/third_party/aom/av1/common/cfl.h
index f2df4b22b..4ac0b401c 100644
--- a/third_party/aom/av1/common/cfl.h
+++ b/third_party/aom/av1/common/cfl.h
@@ -12,79 +12,20 @@
 #ifndef AV1_COMMON_CFL_H_
 #define AV1_COMMON_CFL_H_
 
-#include <assert.h>
+#include "av1/common/blockd.h"
 
-#include "av1/common/enums.h"
-
-// Forward declaration of AV1_COMMON, in order to avoid creating a cyclic
-// dependency by importing av1/common/onyxc_int.h
-typedef struct AV1Common AV1_COMMON;
-
-// Forward declaration of MACROBLOCK, in order to avoid creating a cyclic
-// dependency by importing av1/common/blockd.h
-typedef struct macroblockd MACROBLOCKD;
-
-typedef struct {
-  // Pixel buffer containing the luma pixels used as prediction for chroma
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  uint8_t y_pix[MAX_SB_SQUARE];
-
-  // Pixel buffer containing the downsampled luma pixels used as prediction for
-  // chroma
-  // TODO(ltrudeau) Convert to uint16 for HBD support
-  uint8_t y_down_pix[MAX_SB_SQUARE];
-
-  // Height and width of the luma prediction block currently in the pixel buffer
-  int y_height, y_width;
-
-  // Height and width of the chroma prediction block currently associated with
-  // this context
-  int uv_height, uv_width;
-
-  // Transform level averages of the luma reconstructed values over the entire
-  // prediction unit
-  // Fixed point y_averages is Q12.3:
-  //   * Worst case division is 1/1024
-  //   * Max error will be 1/16th.
-  // Note: 3 is chosen so that y_averages fits in 15 bits when 12 bit input is
-  // used
-  int y_averages_q3[MAX_NUM_TXB];
-  int y_averages_stride;
-
-  int are_parameters_computed;
-
-  // Chroma subsampling
-  int subsampling_x, subsampling_y;
-
-  // Block level DC_PRED for each chromatic plane
-  int dc_pred[CFL_PRED_PLANES];
-
-  // The rate associated with each alpha codeword
-  int costs[CFL_ALPHABET_SIZE];
-
-  int mi_row, mi_col;
-} CFL_CTX;
-
-static const int cfl_alpha_mags_q3[CFL_MAGS_SIZE] = { 0, 1, -1, 2, -2, 4, -4 };
-
-static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
-  // barrbrain's simple 1D quant ordered by subset 3 likelihood
-  { 0, 0 }, { 1, 1 }, { 3, 0 }, { 3, 3 }, { 1, 0 }, { 3, 1 },
-  { 5, 5 }, { 0, 1 }, { 5, 3 }, { 5, 0 }, { 3, 5 }, { 1, 3 },
-  { 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 }
-};
-
-static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) {
-  return (alpha_q3 * ((y_pix << 3) - avg_q3) + 32) >> 6;
+static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
+  int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
+  return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
 }
 
-void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
-
 void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
                        int row, int col, TX_SIZE tx_size, int plane);
 
-void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
-               int col, TX_SIZE tx_size, BLOCK_SIZE bsize);
+void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
+
+void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
+                  BLOCK_SIZE bsize);
 
 void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
 
diff --git a/third_party/aom/av1/common/clpf_simd.h b/third_party/aom/av1/common/clpf_simd.h
index a615b5ed3..c7ffc569a 100644
--- a/third_party/aom/av1/common/clpf_simd.h
+++ b/third_party/aom/av1/common/clpf_simd.h
@@ -10,10 +10,20 @@
  */
 
 #include "./av1_rtcd.h"
-#include "./cdef_simd.h"
 #include "aom_ports/bitops.h"
 #include "aom_ports/mem.h"
 
+// sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
+SIMD_INLINE v128 constrain16(v128 a, v128 b, unsigned int threshold,
+                             unsigned int adjdamp) {
+  v128 diff = v128_sub_16(a, b);
+  const v128 sign = v128_shr_n_s16(diff, 15);
+  diff = v128_abs_s16(diff);
+  const v128 s =
+      v128_ssub_u16(v128_dup_16(threshold), v128_shr_u16(diff, adjdamp));
+  return v128_xor(v128_add_16(sign, v128_min_s16(diff, s)), sign);
+}
+
 // sign(a - b) * min(abs(a - b), max(0, strength - (abs(a - b) >> adjdamp)))
 SIMD_INLINE v128 constrain(v256 a, v256 b, unsigned int strength,
                            unsigned int adjdamp) {
diff --git a/third_party/aom/av1/common/common.h b/third_party/aom/av1/common/common.h
index 551055a76..8611b776f 100644
--- a/third_party/aom/av1/common/common.h
+++ b/third_party/aom/av1/common/common.h
@@ -50,10 +50,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
 
 #define CHECK_MEM_ERROR(cm, lval, expr) \
   AOM_CHECK_MEM_ERROR(&cm->error, lval, expr)
-// TODO(yaowu: validate the usage of these codes or develop new ones.)
-#define AV1_SYNC_CODE_0 0x49
-#define AV1_SYNC_CODE_1 0x83
-#define AV1_SYNC_CODE_2 0x43
 
 #define AOM_FRAME_MARKER 0x2
 
diff --git a/third_party/aom/av1/common/common_data.h b/third_party/aom/av1/common/common_data.h
index f49c7335a..1a74fe76e 100644
--- a/third_party/aom/av1/common/common_data.h
+++ b/third_party/aom/av1/common/common_data.h
@@ -29,35 +29,93 @@ extern "C" {
 // Log 2 conversion lookup tables for block width and height
 static const uint8_t b_width_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  0, 0, 0,
-#endif
-  0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5) 0, 2, 1, 3
+  0, 0,
+  0,
+#endif
+  0, 0,
+  1, 1,
+  1, 2,
+  2, 2,
+  3, 3,
+  3, 4,
+  4, IF_EXT_PARTITION(4, 5, 5) 0,
+  2, 1,
+  3, 2,
+  4, IF_EXT_PARTITION(3, 5)
 };
 static const uint8_t b_height_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  0, 0, 0,
-#endif
-  0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5) 2, 0, 3, 1
+  0, 0,
+  0,
+#endif
+  0, 1,
+  0, 1,
+  2, 1,
+  2, 3,
+  2, 3,
+  4, 3,
+  4, IF_EXT_PARTITION(5, 4, 5) 2,
+  0, 3,
+  1, 4,
+  2, IF_EXT_PARTITION(5, 3)
 };
 // Log 2 conversion lookup tables for modeinfo width and height
 static const uint8_t mi_width_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  0, 0, 0,
-#endif
-  0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5) 0, 2, 1, 3
+  0, 0,
+  0,
+#endif
+  0, 0,
+  1, 1,
+  1, 2,
+  2, 2,
+  3, 3,
+  3, 4,
+  4, IF_EXT_PARTITION(4, 5, 5) 0,
+  2, 1,
+  3, 2,
+  4, IF_EXT_PARTITION(3, 5)
 #else  // CONFIG_CB4X4
-  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4) 0, 1, 0, 2
+  0, 0,
+  0, 0,
+  0, 1,
+  1, 1,
+  2, 2,
+  2, 3,
+  3, IF_EXT_PARTITION(3, 4, 4) 0,
+  1, 0,
+  2, 1,
+  3, IF_EXT_PARTITION(2, 4)
 #endif
 };
 static const uint8_t mi_height_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  0, 0, 0,
-#endif
-  0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5) 2, 0, 3, 1
+  0, 0,
+  0,
+#endif
+  0, 1,
+  0, 1,
+  2, 1,
+  2, 3,
+  2, 3,
+  4, 3,
+  4, IF_EXT_PARTITION(5, 4, 5) 2,
+  0, 3,
+  1, 4,
+  2, IF_EXT_PARTITION(5, 3)
 #else  // CONFIG_CB4X4
-  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4) 1, 0, 2, 0
+  0, 0,
+  0, 0,
+  1, 0,
+  1, 2,
+  1, 2,
+  3, 2,
+  3, IF_EXT_PARTITION(4, 3, 4) 1,
+  0, 2,
+  0, 3,
+  1, IF_EXT_PARTITION(2, 4)
 #endif
 };
 
@@ -68,9 +126,10 @@ static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = {
   1, 1, 1,
 #endif
   1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16,
-  IF_EXT_PARTITION(16, 32, 32)  1, 4, 2, 8
+  IF_EXT_PARTITION(16, 32, 32)  1, 4, 2, 8, 4, 16, IF_EXT_PARTITION(8, 32)
 #else  // CONFIG_CB4X4
-  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4
+  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4,
+  2, 8, IF_EXT_PARTITION(4, 16)
 #endif
 };
 static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
@@ -79,9 +138,10 @@ static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
   1, 1, 1,
 #endif
   1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16,
-  IF_EXT_PARTITION(32, 16, 32)  4, 1, 8, 2
+  IF_EXT_PARTITION(32, 16, 32)  4, 1, 8, 2, 16, 4, IF_EXT_PARTITION(32, 8)
 #else  // CONFIG_CB4X4
-  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1
+  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1,
+  8, 2, IF_EXT_PARTITION(16, 4)
 #endif
 };
 /* clang-format on */
@@ -89,248 +149,171 @@ static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
 // Width/height lookup tables in units of various block sizes
 static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  2,  2,  4,
+  2,  2,
+  4,
 #endif
-  4,  4,  8,  8,  8,  16, 16,
-  16, 32, 32, 32, 64, 64, IF_EXT_PARTITION(64, 128, 128) 4,
-  16, 8,  32
+  4,  4,
+  8,  8,
+  8,  16,
+  16, 16,
+  32, 32,
+  32, 64,
+  64, IF_EXT_PARTITION(64, 128, 128) 4,
+  16, 8,
+  32, 16,
+  64, IF_EXT_PARTITION(32, 128)
 };
 
 static const uint8_t block_size_high[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  2,  4,  2,
+  2,  4,
+  2,
 #endif
-  4,  8,  4,  8,  16, 8,  16,
-  32, 16, 32, 64, 32, 64, IF_EXT_PARTITION(128, 64, 128) 16,
-  4,  32, 8
+  4,  8,
+  4,  8,
+  16, 8,
+  16, 32,
+  16, 32,
+  64, 32,
+  64, IF_EXT_PARTITION(128, 64, 128) 16,
+  4,  32,
+  8,  64,
+  16, IF_EXT_PARTITION(128, 32)
 };
 
 static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1,  1,
+  1,
 #endif
-  1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32) 1,
-  4, 2, 8
+  1,  1,
+  2,  2,
+  2,  4,
+  4,  4,
+  8,  8,
+  8,  16,
+  16, IF_EXT_PARTITION(16, 32, 32) 1,
+  4,  2,
+  8,  4,
+  16, IF_EXT_PARTITION(8, 32)
 };
 static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1,  1,
+  1,
 #endif
-  1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32) 4,
-  1, 8, 2
+  1,  2,
+  1,  2,
+  4,  2,
+  4,  8,
+  4,  8,
+  16, 8,
+  16, IF_EXT_PARTITION(32, 16, 32) 4,
+  1,  8,
+  2,  16,
+  4,  IF_EXT_PARTITION(32, 8)
 };
 static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1, 1,
+  1,
 #endif
-  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4
+  1, 1,
+  1, 1,
+  1, 2,
+  2, 2,
+  4, 4,
+  4, 8,
+  8, IF_EXT_PARTITION(8, 16, 16) 1,
+  2, 1,
+  4, 2,
+  8, IF_EXT_PARTITION(4, 16)
 };
 static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1, 1,
+  1,
 #endif
-  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1
+  1, 1,
+  1, 1,
+  2, 1,
+  2, 4,
+  2, 4,
+  8, 4,
+  8, IF_EXT_PARTITION(16, 8, 16) 2,
+  1, 4,
+  1, 8,
+  2, IF_EXT_PARTITION(16, 4)
 };
 static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1, 1,
+  1,
 #endif
-  1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8) 1, 1, 1, 2
+  1, 1,
+  1, 1,
+  1, 1,
+  1, 1,
+  2, 2,
+  2, 4,
+  4, IF_EXT_PARTITION(4, 8, 8) 1,
+  1, 1,
+  2, 2,
+  4, IF_EXT_PARTITION(2, 8)
 };
 static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  1, 1, 1,
+  1, 1,
+  1,
 #endif
-  1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8) 1, 1, 2, 1
+  1, 1,
+  1, 1,
+  1, 1,
+  1, 2,
+  1, 2,
+  4, 2,
+  4, IF_EXT_PARTITION(8, 4, 8) 1,
+  1, 2,
+  1, 4,
+  2, IF_EXT_PARTITION(8, 2)
 };
 
 // AOMMIN(3, AOMMIN(b_width_log2(bsize), b_height_log2(bsize)))
 static const uint8_t size_group_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  0, 0, 0,
-#endif
-  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3) 0, 0, 1, 1
+  0, 0,
+  0,
+#endif
+  0, 0,
+  0, 1,
+  1, 1,
+  2, 2,
+  2, 3,
+  3, 3,
+  3, IF_EXT_PARTITION(3, 3, 3) 0,
+  0, 1,
+  1, 2,
+  2, IF_EXT_PARTITION(3, 3)
 };
 
 static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-  2, 3, 3,
-#endif
-  4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14) 6,
-  6, 8, 8
+  2,  3,
+  3,
+#endif
+  4,  5,
+  5,  6,
+  7,  7,
+  8,  9,
+  9,  10,
+  11, 11,
+  12, IF_EXT_PARTITION(13, 13, 14) 6,
+  6,  8,
+  8,  10,
+  10, IF_EXT_PARTITION(12, 12)
 };
 
 /* clang-format off */
-static const PARTITION_TYPE
-  partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES_ALL] = {
-  {     // 4X4 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_NONE,
-    // 4X8,            8X4,               8X8
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 8X16,           16X8,              16X16
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 16X32,          32X16,             32X32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32X64,          64X32,             64X64
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#if CONFIG_EXT_PARTITION
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif  // CONFIG_EXT_PARTITION
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-  }, {  // 8X8 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_SPLIT,
-    // 4X8,            8X4,               8X8
-    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
-    // 8X16,           16X8,              16X16
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 16X32,          32X16,             32X32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32X64,          64X32,             64X64
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#if CONFIG_EXT_PARTITION
-    // 64x128,         128x64,            128x128
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif  // CONFIG_EXT_PARTITION
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-  }, {  // 16X16 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_SPLIT,
-    // 4X8,            8X4,               8X8
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 8X16,           16X8,              16X16
-    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
-    // 16X32,          32X16,             32X32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32X64,          64X32,             64X64
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#if CONFIG_EXT_PARTITION
-    // 64x128,         128x64,            128x128
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif  // CONFIG_EXT_PARTITION
-#if CONFIG_EXT_PARTITION_TYPES
-    // 4x16,           16x4,              8x32
-    PARTITION_VERT_4,  PARTITION_HORZ_4,  PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-#else
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-#endif
-  }, {  // 32X32 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_SPLIT,
-    // 4X8,            8X4,               8X8
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 8X16,           16X8,              16X16
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 16X32,          32X16,             32X32
-    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
-    // 32X64,          64X32,             64X64
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#if CONFIG_EXT_PARTITION
-    // 64x128,         128x64,            128x128
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif  // CONFIG_EXT_PARTITION
-#if CONFIG_EXT_PARTITION_TYPES
-    // 4x16,           16x4,              8x32
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_VERT_4,
-    // 32x8
-    PARTITION_HORZ_4
-#else
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-#endif
-  }, {  // 64X64 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_SPLIT,
-    // 4X8,            8X4,               8X8
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 8X16,           16X8,              16X16
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 16X32,          32X16,             32X32
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 32X64,          64X32,             64X64
-    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
-#if CONFIG_EXT_PARTITION
-    // 64x128,         128x64,            128x128
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif  // CONFIG_EXT_PARTITION
-#if CONFIG_EXT_PARTITION_TYPES
-    // 4x16,           16x4,              8x32
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 32x8
-    PARTITION_SPLIT
-#else
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID,   PARTITION_INVALID,   PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-#endif  // CONFIG_EXT_PARTITION_TYPES
-#if CONFIG_EXT_PARTITION
-  }, {  // 128x128 ->
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-    // 2X2,            2X4,               4X2,
-    PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
-#endif
-    //                                    4X4
-                                          PARTITION_SPLIT,
-    // 4X8,            8X4,               8X8
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 8X16,           16X8,              16X16
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 16X32,          32X16,             32X32
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 32X64,          64X32,             64X64
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 64x128,         128x64,            128x128
-    PARTITION_VERT,    PARTITION_HORZ,    PARTITION_NONE,
-#if CONFIG_EXT_PARTITION_TYPES
-    // 4x16,           16x4,              8x32
-    PARTITION_SPLIT,   PARTITION_SPLIT,   PARTITION_SPLIT,
-    // 32x8
-    PARTITION_SPLIT
-#else
-    // 4x16,           16x4,              8x32
-    PARTITION_INVALID,   PARTITION_INVALID,   PARTITION_INVALID,
-    // 32x8
-    PARTITION_INVALID
-#endif  // CONFIG_EXT_PARTITION_TYPES
-#endif  // CONFIG_EXT_PARTITION
-  }
-};
-
 #if CONFIG_EXT_PARTITION_TYPES
 static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #else
@@ -358,8 +341,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_4X16,    BLOCK_16X4,    BLOCK_8X32,
-    // 32X8,
-    BLOCK_32X8,
+    // 32X8,       16X64,         64X16
+    BLOCK_32X8,    BLOCK_16X64,   BLOCK_64X16,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_32X128,  BLOCK_128X32
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_HORZ
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -384,8 +371,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_VERT
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -410,8 +401,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_SPLIT
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -433,8 +428,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
 #if CONFIG_EXT_PARTITION_TYPES
   }, {  // PARTITION_HORZ_A
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
@@ -457,8 +456,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_HORZ_B
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -480,8 +483,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_VERT_A
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -503,8 +510,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_VERT_B
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
     // 2X2,        2X4,           4X2,
@@ -526,8 +537,12 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_HORZ_4
 #if CONFIG_CB4X4
     // 2X2,        2X4,           4X2,
@@ -545,15 +560,19 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
     // 16X32,      32X16,         32X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X8,
     // 32X64,      64X32,         64X64
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X16,
 #if CONFIG_EXT_PARTITION
     // 64x128,     128x64,        128x128
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X32,
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
   }, {  // PARTITION_VERT_4
 #if CONFIG_CB4X4
     // 2X2,        2X4,           4X2,
@@ -571,15 +590,19 @@ static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES_ALL] =
     // 16X32,      32X16,         32X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X32,
     // 32X64,      64X32,         64X64
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X64,
 #if CONFIG_EXT_PARTITION
     // 64x128,     128x64,        128x128
-    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X128,
 #endif  // CONFIG_EXT_PARTITION
     // 4X16,       16X4,          8X32
     BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
-    // 32X8,
-    BLOCK_INVALID,
+    // 32X8,       16X64,         64X16
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
+    // 32x128,     128x32
+    BLOCK_INVALID, BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_EXT_PARTITION_TYPES
   }
 };
@@ -618,8 +641,12 @@ static const TX_SIZE max_txsize_lookup[BLOCK_SIZES_ALL] = {
 #endif  // CONFIG_TX64X64
   // 4x16,   16x4,     8x32
   TX_4X4,    TX_4X4,   TX_8X8,
-  // 32x8
-  TX_8X8
+  // 32x8,   16x64     64x16
+  TX_8X8,    TX_16X16, TX_16X16,
+#if CONFIG_EXT_PARTITION
+  // 32x128  128x32
+  TX_32X32,  TX_32X32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 #if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
@@ -638,9 +665,9 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
   TX_8X16,   TX_16X8,  TX_16X16,
   // 16X32,  32X16,    32X32
   TX_16X32,  TX_32X16, TX_32X32,
-  // 32X64,  64X32,
-  TX_32X32,  TX_32X32,
 #if CONFIG_TX64X64
+  // 32X64,  64X32,
+  TX_32X64,  TX_64X32,
   // 64X64
   TX_64X64,
 #if CONFIG_EXT_PARTITION
@@ -648,6 +675,8 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
   TX_64X64,  TX_64X64, TX_64X64,
 #endif  // CONFIG_EXT_PARTITION
 #else
+  // 32X64,  64X32,
+  TX_32X32,  TX_32X32,
   // 64X64
   TX_32X32,
 #if CONFIG_EXT_PARTITION
@@ -659,13 +688,19 @@ static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES_ALL] = {
   // 4x16,   16x4,     8x32
   TX_4X16,   TX_16X4,  TX_8X32,
   // 32x8
-  TX_32X8
+  TX_32X8,
 #else
   // 4x16,   16x4,     8x32
   TX_4X8,    TX_8X4,   TX_8X16,
   // 32x8
-  TX_16X8
+  TX_16X8,
 #endif
+  // 16x64,  64x16
+  TX_16X32,  TX_32X16,
+#if CONFIG_EXT_PARTITION
+  // 32x128  128x32
+  TX_32X32,  TX_32X32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 #if CONFIG_RECT_TX_EXT
@@ -690,8 +725,12 @@ static const TX_SIZE quarter_txsize_lookup[BLOCK_SIZES_ALL] = {
 #endif
   // 4x16,    16x4,       8x32
   TX_4X16,    TX_16X4,    TX_8X32,
-  // 32x8
-  TX_32X8
+  // 32x8     16x64       64x16
+  TX_32X8,    TX_INVALID, TX_INVALID,
+#if CONFIG_EXT_PARTITION
+  // 32x128   128x32
+  TX_INVALID, TX_INVALID
+#endif  // CONFIG_EXT_PARTITION
 };
 #endif
 #else
@@ -738,9 +777,9 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
   TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,
   // 16X32,           32X16,              32X32
   TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
-  // 32X64,           64X32,
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
 #if CONFIG_TX64X64
+  // 32X64,           64X32,
+  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
   // 64X64
   TX_64X64 - TX_8X8,
 #if CONFIG_EXT_PARTITION
@@ -748,6 +787,8 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
   TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
 #endif  // CONFIG_EXT_PARTITION
 #else
+  // 32X64,           64X32,
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
   // 64X64
   TX_32X32 - TX_8X8,
 #if CONFIG_EXT_PARTITION
@@ -757,10 +798,14 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
 #endif  // CONFIG_TX64X64
   // TODO(david.barker): Change these if we support rectangular transforms
   // for 4:1 shaped partitions
-  // 4x16,          16x4,           8x32
-  INT32_MIN,        INT32_MIN,      TX_8X8 - TX_8X8,
-  // 32x8
-  TX_8X8 - TX_8X8
+  // 4x16,            16x4,               8x32
+  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,
+  // 32x8,            16x64,              64x16
+  TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+  // 32x128,          128x32
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8
+#endif  // CONFIG_EXT_PARTITION
 };
 #else
 // Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
@@ -777,9 +822,9 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
   TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,
   // 16X32,           32X16,              32X32
   TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,  TX_32X32 - TX_8X8,
-  // 32X64,           64X32,
-  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
 #if CONFIG_TX64X64
+  // 32X64,           64X32,
+  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
   // 64X64
   TX_64X64 - TX_8X8,
 #if CONFIG_EXT_PARTITION
@@ -787,6 +832,8 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
   TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,  TX_64X64 - TX_8X8,
 #endif  // CONFIG_EXT_PARTITION
 #else
+  // 32X64,           64X32,
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
   // 64X64
   TX_32X32 - TX_8X8,
 #if CONFIG_EXT_PARTITION
@@ -794,10 +841,14 @@ static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES_ALL] = {
   TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_TX64X64
-  // 4x16,            16x4,           8x32
-  INT32_MIN,          INT32_MIN,      TX_8X8 - TX_8X8,
-  // 32x8
-  TX_8X8 - TX_8X8
+  // 4x16,            16x4,               8x32
+  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,
+  // 32x8             16x64,              64x16
+  TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+  // 32x128,          128x32
+  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8
+#endif  // CONFIG_EXT_PARTITION
 };
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
 
@@ -822,6 +873,10 @@ static const TX_SIZE sub_tx_size_map[TX_SIZES_ALL] = {
   TX_8X8,    // TX_16X8
   TX_16X16,  // TX_16X32
   TX_16X16,  // TX_32X16
+#if CONFIG_TX64X64
+  TX_32X32,  // TX_32X64
+  TX_32X32,  // TX_64X32
+#endif       // CONFIG_TX64X64
   TX_4X4,    // TX_4X16
   TX_4X4,    // TX_16X4
   TX_8X8,    // TX_8X32
@@ -845,6 +900,10 @@ static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
   TX_16X16,  // TX_16X8
   TX_16X16,  // TX_16X32
   TX_32X32,  // TX_32X16
+#if CONFIG_TX64X64
+  TX_32X32,  // TX_32X64
+  TX_64X64,  // TX_64X32
+#endif       // CONFIG_TX64X64
   TX_4X4,    // TX_4X16
   TX_16X16,  // TX_16X4
   TX_8X8,    // TX_8X32
@@ -868,6 +927,10 @@ static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
   TX_8X8,    // TX_16X8
   TX_32X32,  // TX_16X32
   TX_16X16,  // TX_32X16
+#if CONFIG_TX64X64
+  TX_64X64,  // TX_32X64
+  TX_32X32,  // TX_64X32
+#endif       // CONFIG_TX64X64
   TX_16X16,  // TX_4X16
   TX_4X4,    // TX_16X4
   TX_32X32,  // TX_8X32
@@ -885,11 +948,15 @@ static const int tx_size_wide[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
   2,
 #endif
-  4,  8, 16, 32,
+  4,  8,  16, 32,
 #if CONFIG_TX64X64
   64,
 #endif  // CONFIG_TX64X64
-  4,  8, 8,  16, 16, 32, 4, 16, 8, 32
+  4,  8,  8,  16, 16, 32,
+#if CONFIG_TX64X64
+  32, 64,
+#endif  // CONFIG_TX64X64
+  4,  16, 8,  32
 };
 
 #if CONFIG_CHROMA_2X2
@@ -903,44 +970,64 @@ static const int tx_size_high[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
   2,
 #endif
-  4,  8, 16, 32,
+  4,  8,  16, 32,
 #if CONFIG_TX64X64
   64,
 #endif  // CONFIG_TX64X64
-  8,  4, 16, 8,  32, 16, 16, 4, 32, 8
+  8,  4,  16, 8,  32, 16,
+#if CONFIG_TX64X64
+  64, 32,
+#endif  // CONFIG_TX64X64
+  16, 4,  32, 8
 };
 
 // Transform block width in unit
 static const int tx_size_wide_unit[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
-  1,  2, 4, 8, 16,
+  1,  2,  4, 8, 16,
 #if CONFIG_TX64X64
   32,
 #endif  // CONFIG_TX64X64
-  2,  4, 4, 8, 8,  16, 2, 8, 4, 16
+  2,  4,  4, 8, 8,  16,
+#if CONFIG_TX64X64
+  16, 32,
+#endif  // CONFIG_TX64X64
+  2,  8,  4, 16
 #else  // CONFIG_CHROMA_2X2
-  1,  2, 4, 8,
+  1,  2,  4, 8,
 #if CONFIG_TX64X64
   16,
 #endif  // CONFIG_TX64X64
-  1,  2, 2, 4, 4, 8, 1, 4, 2, 8
+  1,  2,  2, 4, 4, 8,
+#if CONFIG_TX64X64
+  8,  16,
+#endif  // CONFIG_TX64X64
+  1,  4,  2, 8
 #endif  // CONFIG_CHROMA_2X2
 };
 
 // Transform block height in unit
 static const int tx_size_high_unit[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
-  1,  2, 4, 8, 16,
+  1,  2,  4,  8, 16,
 #if CONFIG_TX64X64
   32,
 #endif  // CONFIG_TX64X64
-  4,  2, 8, 4, 16, 8, 8, 2, 16, 4
+  4,  2,  8,  4, 16, 8,
+#if CONFIG_TX64X64
+  32, 16,
+#endif  // CONFIG_TX64X64
+  8,  2,  16, 4
 #else  // CONFIG_CHROMA_2X2
   1,  2, 4, 8,
 #if CONFIG_TX64X64
   16,
 #endif  // CONFIG_TX64X64
-  2,  1, 4, 2, 8, 4, 4, 1, 8, 2
+  2,  1, 4, 2, 8, 4,
+#if CONFIG_TX64X64
+  16, 8,
+#endif  // CONFIG_TX64X64
+  4,  1, 8, 2
 #endif  // CONFIG_CHROMA_2X2
 };
 
@@ -953,7 +1040,11 @@ static const int tx_size_wide_log2[TX_SIZES_ALL] = {
 #if CONFIG_TX64X64
   6,
 #endif  // CONFIG_TX64X64
-  2, 3, 3, 4, 4, 5, 2, 4, 3, 5
+  2, 3, 3, 4, 4, 5,
+#if CONFIG_TX64X64
+  5, 6,
+#endif  // CONFIG_TX64X64
+  2, 4, 3, 5
 };
 
 // Transform block height in log2
@@ -965,7 +1056,11 @@ static const int tx_size_high_log2[TX_SIZES_ALL] = {
 #if CONFIG_TX64X64
   6,
 #endif  // CONFIG_TX64X64
-  3, 2, 4, 3, 5, 4, 4, 2, 5, 3
+  3, 2, 4, 3, 5, 4,
+#if CONFIG_TX64X64
+  6, 5,
+#endif  // CONFIG_TX64X64
+  4, 2, 5, 3
 };
 
 #define TX_UNIT_WIDE_LOG2 (MI_SIZE_LOG2 - tx_size_wide_log2[0])
@@ -975,11 +1070,15 @@ static const int tx_size_2d[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
   4,
 #endif
-  16,   64, 256, 1024,
+  16,   64,   256, 1024,
 #if CONFIG_TX64X64
   4096,
 #endif  // CONFIG_TX64X64
-  32,   32, 128, 128,  512, 512, 64, 64, 256, 256
+  32,   32,   128, 128,  512, 512,
+#if CONFIG_TX64X64
+  2048, 2048,
+#endif  // CONFIG_TX64X64
+  64,   64,   256, 256
 };
 
 static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
@@ -999,6 +1098,10 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
   BLOCK_16X8,   // TX_16X8
   BLOCK_16X32,  // TX_16X32
   BLOCK_32X16,  // TX_32X16
+#if CONFIG_TX64X64
+  BLOCK_32X64,  // TX_32X64
+  BLOCK_64X32,  // TX_64X32
+#endif          // CONFIG_TX64X64
   BLOCK_4X16,   // TX_4X16
   BLOCK_16X4,   // TX_16X4
   BLOCK_8X32,   // TX_8X32
@@ -1022,6 +1125,10 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
   TX_8X8,    // TX_16X8
   TX_16X16,  // TX_16X32
   TX_16X16,  // TX_32X16
+#if CONFIG_TX64X64
+  TX_32X32,  // TX_32X64
+  TX_32X32,  // TX_64X32
+#endif       // CONFIG_TX64X64
   TX_4X4,    // TX_4X16
   TX_4X4,    // TX_16X4
   TX_8X8,    // TX_8X32
@@ -1045,6 +1152,10 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
   TX_16X16,  // TX_16X8
   TX_32X32,  // TX_16X32
   TX_32X32,  // TX_32X16
+#if CONFIG_TX64X64
+  TX_64X64,  // TX_32X64
+  TX_64X64,  // TX_64X32
+#endif       // CONFIG_TX64X64
   TX_16X16,  // TX_4X16
   TX_16X16,  // TX_16X4
   TX_32X32,  // TX_8X32
@@ -1100,10 +1211,16 @@ static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES_ALL][2][2] = {
   { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
   { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
 #endif  // CONFIG_EXT_PARTITION
-  { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_INVALID } },
-  { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_INVALID } },
+  { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
+  { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_8X4 } },
   { { BLOCK_8X32, BLOCK_8X16 }, { BLOCK_INVALID, BLOCK_4X16 } },
   { { BLOCK_32X8, BLOCK_INVALID }, { BLOCK_16X8, BLOCK_16X4 } },
+  { { BLOCK_16X64, BLOCK_16X32 }, { BLOCK_INVALID, BLOCK_8X32 } },
+  { { BLOCK_64X16, BLOCK_INVALID }, { BLOCK_32X16, BLOCK_32X8 } },
+#if CONFIG_EXT_PARTITION
+  { { BLOCK_32X128, BLOCK_32X64 }, { BLOCK_INVALID, BLOCK_16X64 } },
+  { { BLOCK_128X32, BLOCK_INVALID }, { BLOCK_64X32, BLOCK_64X16 } },
+#endif  // CONFIG_EXT_PARTITION
 };
 
 static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
@@ -1123,6 +1240,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#if CONFIG_TX64X64
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#endif  // CONFIG_TX64X64
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
@@ -1147,13 +1268,17 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#if CONFIG_TX64X64
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#endif  // CONFIG_TX64X64
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
   },
   {
-      // BLOCK_2X4
+      // BLOCK_4X2
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
@@ -1165,6 +1290,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#if CONFIG_TX64X64
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+      { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
+#endif  // CONFIG_TX64X64
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
       { { TX_2X2, TX_2X2 }, { TX_2X2, TX_2X2 } },
@@ -1175,7 +1304,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
   },
 #elif CONFIG_CHROMA_SUB8X8
   {
-      // BLOCK_2X2
+      // BLOCK_2x2
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1186,6 +1315,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1209,13 +1342,17 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
   },
   {
-      // BLOCK_2X4
+      // BLOCK_4X2
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1226,6 +1363,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1252,6 +1393,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1284,6 +1429,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1313,6 +1462,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -1336,6 +1489,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
       { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
       { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X4 }, { TX_4X8, TX_4X4 } },
@@ -1351,7 +1508,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
 #if CONFIG_TX64X64
-      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
@@ -1359,6 +1516,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
+#if CONFIG_TX64X64
+      { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X16, TX_8X8 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X16, TX_8X8 }, { TX_4X16, TX_4X8 } },
@@ -1374,7 +1535,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
       { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
 #if CONFIG_TX64X64
-      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+      { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1382,6 +1543,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },  // used
       { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
+#if CONFIG_TX64X64
+      { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+      { { TX_16X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
@@ -1405,6 +1570,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X8 }, { TX_8X16, TX_8X8 } },
       { { TX_16X16, TX_16X8 }, { TX_8X16, TX_8X8 } },
+#if CONFIG_TX64X64
+      { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1420,7 +1589,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
 #if CONFIG_TX64X64
-      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1428,6 +1597,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X32, TX_16X16 }, { TX_8X16, TX_8X16 } },  // used
       { { TX_16X32, TX_16X16 }, { TX_8X16, TX_8X16 } },
+#if CONFIG_TX64X64
+      { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X32, TX_16X16 }, { TX_8X8, TX_8X8 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
       { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
       { { TX_8X32, TX_8X16 }, { TX_8X32, TX_8X16 } },
@@ -1443,7 +1616,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
       { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
 #if CONFIG_TX64X64
-      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+      { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1451,6 +1624,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_32X16, TX_16X8 }, { TX_16X16, TX_16X8 } },
       { { TX_32X16, TX_16X8 }, { TX_16X16, TX_16X8 } },  // used
+#if CONFIG_TX64X64
+      { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+      { { TX_32X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1474,6 +1651,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
       { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1497,6 +1678,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X32 }, { TX_16X16, TX_16X16 } },
       { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1520,6 +1705,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
       { { TX_32X16, TX_16X16 }, { TX_32X16, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+      { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1543,6 +1732,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
       { { TX_32X16, TX_32X16 }, { TX_32X16, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
+      { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1559,7 +1752,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
       { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #if CONFIG_TX64X64
-      { { TX_64X64, TX_64X64 }, { TX_32X32, TX_32X32 } },
+      { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1567,6 +1760,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
       { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
+      { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
@@ -1582,7 +1779,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
       { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #if CONFIG_TX64X64
-      { { TX_64X64, TX_32X32 }, { TX_64X64, TX_32X32 } },
+      { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1590,6 +1787,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
       { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
+      { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1605,7 +1806,7 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
       { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #if CONFIG_TX64X64
-      { { TX_64X64, TX_64X64 }, { TX_64X64, TX_64X64 } },
+      { { TX_64X64, TX_32X32 }, { TX_32X32, TX_32X32 } },
 #endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
@@ -1613,6 +1814,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
       { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
       { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X64, TX_32X32 }, { TX_16X16, TX_16X16 } },
+      { { TX_64X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
@@ -1639,6 +1844,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
       { { TX_4X8, TX_4X8 }, { TX_4X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X16, TX_4X8 }, { TX_4X4, TX_4X4 } },
@@ -1664,6 +1873,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
+#if CONFIG_TX64X64
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_16X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
       { { TX_8X4, TX_4X4 }, { TX_8X4, TX_4X4 } },
@@ -1687,6 +1900,10 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_8X8, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X16 }, { TX_4X8, TX_4X8 } },
+#if CONFIG_TX64X64
+      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
       { { TX_8X4, TX_8X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X32, TX_8X16 }, { TX_4X16, TX_4X16 } },
@@ -1710,11 +1927,117 @@ static const TX_SIZE uv_txsize_lookup[BLOCK_SIZES_ALL][TX_SIZES_ALL][2][2] = {
       { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_16X8, TX_8X4 } },
+#if CONFIG_TX64X64
+      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+      { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
+#endif  // CONFIG_TX64X64
       { { TX_4X8, TX_4X4 }, { TX_4X8, TX_4X4 } },
       { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
       { { TX_8X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
       { { TX_32X8, TX_16X4 }, { TX_16X8, TX_16X4 } },
-  }
+  },
+  {
+// BLOCK_16X64
+#if CONFIG_CHROMA_2X2
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+#if CONFIG_TX64X64
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+#endif
+      { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
+      { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
+      { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
+      { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X32, TX_16X32 }, { TX_8X16, TX_8X16 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X16, TX_8X16 } },
+#if CONFIG_TX64X64
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
+#endif
+      { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
+      { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
+      { { TX_8X32, TX_8X32 }, { TX_8X32, TX_8X32 } },
+      { { TX_16X8, TX_16X8 }, { TX_8X8, TX_8X8 } },
+  },
+  {
+// BLOCK_64X16
+#if CONFIG_CHROMA_2X2
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+#if CONFIG_TX64X64
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+#endif
+      { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
+      { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
+      { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
+      { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
+      { { TX_16X16, TX_16X8 }, { TX_16X16, TX_16X8 } },
+      { { TX_32X16, TX_16X8 }, { TX_32X16, TX_16X8 } },
+#if CONFIG_TX64X64
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+      { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
+#endif
+      { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
+      { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
+      { { TX_8X16, TX_8X8 }, { TX_8X16, TX_8X8 } },
+      { { TX_32X8, TX_32X8 }, { TX_32X8, TX_32X8 } },
+  },
+#if CONFIG_EXT_PARTITION
+  {
+// BLOCK_32X128
+#if CONFIG_CHROMA_2X2
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } },
+#endif
+      { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
+      { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
+      { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
+      { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
+      { { TX_16X32, TX_16X32 }, { TX_16X32, TX_16X32 } },
+      { { TX_32X16, TX_32X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
+      { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
+      { { TX_8X32, TX_8X32 }, { TX_8X32, TX_8X32 } },
+      { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X8 } },
+  },
+  {
+// BLOCK_128X32
+#if CONFIG_CHROMA_2X2
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+      { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
+      { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
+      { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#if CONFIG_TX64X64
+      { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } },
+#endif
+      { { TX_4X8, TX_4X8 }, { TX_4X8, TX_4X8 } },
+      { { TX_8X4, TX_8X4 }, { TX_8X4, TX_8X4 } },
+      { { TX_8X16, TX_8X16 }, { TX_8X16, TX_8X16 } },
+      { { TX_16X8, TX_16X8 }, { TX_16X8, TX_16X8 } },
+      { { TX_16X32, TX_16X16 }, { TX_16X32, TX_16X16 } },
+      { { TX_32X16, TX_32X16 }, { TX_32X16, TX_32X16 } },
+      { { TX_4X16, TX_4X16 }, { TX_4X16, TX_4X16 } },
+      { { TX_16X4, TX_16X4 }, { TX_16X4, TX_16X4 } },
+      { { TX_8X32, TX_8X16 }, { TX_8X32, TX_8X16 } },
+      { { TX_32X8, TX_32X8 }, { TX_32X8, TX_32X8 } },
+  },
+#endif
 };
 
 // Generates 4 bit field in which each bit set to 1 represents
@@ -1748,10 +2071,14 @@ static const struct {
   { 0, 16 },   // 128X64- {0b00000, 0b10000}
   { 0, 0 },    // 128X128-{0b00000, 0b00000}
 
-  { 31, 28 },  // 4X16 - {0b11111, 0b11100}
-  { 28, 31 },  // 16X4 - {0b11100, 0b11111}
-  { 30, 24 },  // 8X32 - {0b11110, 0b11000}
-  { 24, 30 },  // 32X8 - {0b11000, 0b11110}
+  { 31, 28 },  // 4X16  - {0b11111, 0b11100}
+  { 28, 31 },  // 16X4  - {0b11100, 0b11111}
+  { 30, 24 },  // 8X32  - {0b11110, 0b11000}
+  { 24, 30 },  // 32X8  - {0b11000, 0b11110}
+  { 28, 16 },  // 16X64 - {0b11100, 0b10000}
+  { 16, 28 },  // 64X16 - {0b10000, 0b11100}
+  { 24, 0 },   // 32X128- {0b11000, 0b00000}
+  { 0, 24 },   // 128X32- {0b00000, 0b11000}
 #else
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
   { 15, 15 },  // 2X2   - {0b1111, 0b1111}
@@ -1774,12 +2101,24 @@ static const struct {
 
   { 15, 12 },  // 4X16 - {0b1111, 0b1100}
   { 12, 15 },  // 16X4 - {0b1100, 0b1111}
-  { 8, 14 },  // 8X32 - {0b1110, 0b1000}
-  { 14, 8 },  // 32X8 - {0b1000, 0b1110}
+  { 8, 14 },   // 8X32 - {0b1110, 0b1000}
+  { 14, 8 },   // 32X8 - {0b1000, 0b1110}
+  { 12, 0 },   // 16X64- {0b1100, 0b0000}
+  { 0, 12 },   // 64X16- {0b0000, 0b1100}
 #endif  // CONFIG_EXT_PARTITION
 };
 /* clang-format on */
 
+#if CONFIG_KF_CTX
+static const int intra_mode_context[INTRA_MODES] = {
+  0, 1, 2, 3, 4, 4, 4, 4, 3, 0,
+#if CONFIG_SMOOTH_HV
+  1, 2,
+#endif
+  0,
+};
+#endif
+
 #if CONFIG_SUPERTX
 static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
 //  ss_x == 0 ss_x == 0   ss_x == 1 ss_x == 1
@@ -1831,8 +2170,25 @@ static const ADAPT_OVERLAP_BLOCK adapt_overlap_block_lookup[BLOCK_SIZES_ALL] = {
   ADAPT_OVERLAP_BLOCK_INVALID,
 #endif  // CONFIG_EXT_PARTITION
   ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
+  ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
+  ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID,
+#if CONFIG_EXT_PARTITION
   ADAPT_OVERLAP_BLOCK_INVALID, ADAPT_OVERLAP_BLOCK_INVALID
+#endif  // CONFIG_EXT_PARTITION
 };
+
+static const BLOCK_SIZE bsize_2_sqr_bsize[BLOCK_SIZES] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+  BLOCK_2X2,   BLOCK_2X2,   BLOCK_2X2,
+#endif
+  BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,   BLOCK_8X8,
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
+  BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
+#if CONFIG_EXT_PARTITION
+  BLOCK_64X64, BLOCK_64X64,
+#endif
+};
+
 #endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 
 #if CONFIG_ADAPT_SCAN
diff --git a/third_party/aom/av1/common/convolve.c b/third_party/aom/av1/common/convolve.c
index 54ad7555c..5476f59a6 100644
--- a/third_party/aom/av1/common/convolve.c
+++ b/third_party/aom/av1/common/convolve.c
@@ -14,6 +14,7 @@
 
 #include "./aom_dsp_rtcd.h"
 #include "./av1_rtcd.h"
+#include "av1/common/blockd.h"
 #include "av1/common/convolve.h"
 #include "av1/common/filter.h"
 #include "av1/common/onyxc_int.h"
@@ -360,11 +361,75 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
       }
       CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      dst[y * dst_stride + x] += res;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+  }
+}
+
+void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
+                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                             InterpFilterParams *filter_params_x,
+                             InterpFilterParams *filter_params_y,
+                             const int subpel_x_qn, const int x_step_qn,
+                             const int subpel_y_qn, const int y_step_qn,
+                             ConvolveParams *conv_params) {
+  int x, y, k;
+  uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+  int im_stride = w;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+
+  // horizontal filter
+  const uint8_t *src_horiz = src - fo_vert * src_stride;
+  for (y = 0; y < im_h; ++y) {
+    int x_qn = subpel_x_qn;
+    for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
+      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(x_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
+      int sum = 0;
+      for (k = 0; k < filter_params_x->taps; ++k)
+        sum += x_filter[k] * src_x[k - fo_horiz];
+      im_block[y * im_stride + x] =
+          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
+    }
+    src_horiz += src_stride;
+  }
+
+  // vertical filter
+  const uint8_t *src_vert = im_block + fo_vert * im_stride;
+  for (x = 0; x < w; ++x) {
+    int y_qn = subpel_y_qn;
+    for (y = 0; y < h; ++y, y_qn += y_step_qn) {
+      const uint8_t *const src_y =
+          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(y_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
+      CONV_BUF_TYPE sum = 0;
+      for (k = 0; k < filter_params_y->taps; ++k) {
+        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+      }
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
     }
+    src_vert++;
   }
 }
+
 #else
+
 /* When convolve-round is enabled and compound-round is disabled, we use a
    high-precision convolve filter.
    Note: For notes on hardware implementations, including the required
@@ -416,60 +481,93 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
       CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                           ((1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1)));
-      dst[y * dst_stride + x] += res;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
     }
   }
 }
-#endif
 
-static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
-                                   const uint8_t *src, int src_stride, int w,
-                                   int h) {
-  int r, c;
-  for (r = 0; r < h; ++r)
-    for (c = 0; c < w; ++c)
-      dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
-}
+void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
+                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
+                             InterpFilterParams *filter_params_x,
+                             InterpFilterParams *filter_params_y,
+                             const int subpel_x_qn, const int x_step_qn,
+                             const int subpel_y_qn, const int y_step_qn,
+                             ConvolveParams *conv_params) {
+  int x, y, k;
+  int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+  int im_stride = w;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const int bd = 8;
 
-static INLINE void transpose_int32(int32_t *dst, int dst_stride,
-                                   const int32_t *src, int src_stride, int w,
-                                   int h) {
-  int r, c;
-  for (r = 0; r < h; ++r)
-    for (c = 0; c < w; ++c)
-      dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
+  // horizontal filter
+  const uint8_t *src_horiz = src - fo_vert * src_stride;
+  for (y = 0; y < im_h; ++y) {
+    int x_qn = subpel_x_qn;
+    for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
+      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(x_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (k = 0; k < filter_params_x->taps; ++k) {
+        sum += x_filter[k] * src_x[k - fo_horiz];
+      }
+      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
+      im_block[y * im_stride + x] =
+          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+    }
+    src_horiz += src_stride;
+  }
+
+  // vertical filter
+  int32_t *src_vert = im_block + fo_vert * im_stride;
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  for (x = 0; x < w; ++x) {
+    int y_qn = subpel_y_qn;
+    for (y = 0; y < h; ++y, y_qn += y_step_qn) {
+      const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(y_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
+      CONV_BUF_TYPE sum = 1 << offset_bits;
+      for (k = 0; k < filter_params_y->taps; ++k) {
+        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+      }
+      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+                          ((1 << (offset_bits - conv_params->round_1)) +
+                           (1 << (offset_bits - conv_params->round_1 - 1)));
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+    src_vert++;
+  }
 }
+#endif  // CONFIG_COMPOUND_ROUND
 
 void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
-                            const InterpFilter *interp_filter,
-                            const int subpel_x_q4, int x_step_q4,
-                            const int subpel_y_q4, int y_step_q4,
-                            ConvolveParams *conv_params) {
+                            InterpFilters interp_filters, const int subpel_x_q4,
+                            int x_step_q4, const int subpel_y_q4, int y_step_q4,
+                            int scaled, ConvolveParams *conv_params) {
   (void)x_step_q4;
   (void)y_step_q4;
   (void)dst;
   (void)dst_stride;
-#if CONFIG_DUAL_FILTER
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
-
-#if USE_EXTRA_FILTER
-  if (filter_params_x.interp_filter == MULTITAP_SHARP &&
-      filter_params_y.interp_filter == MULTITAP_SHARP) {
-    // Avoid two directions both using 12-tap filter.
-    // This will reduce hardware implementation cost.
-    filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-  }
-#endif  // USE_EXTRA_FILTER
-#else
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(*interp_filter);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(*interp_filter);
-#endif
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
+                                 &filter_params_y);
 
   if (filter_params_y.taps < filter_params_x.taps) {
     uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
@@ -487,27 +585,32 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                     conv_params->dst_stride, w, h);
 
     // horizontal and vertical parameters are swapped because of the transpose
-    av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert, tr_src_stride,
-                    tr_dst, tr_dst_stride, h, w, &filter_params_y,
-                    &filter_params_x, subpel_y_q4, subpel_x_q4, conv_params);
+    if (scaled)
+      av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
+                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
+                            &filter_params_y, &filter_params_x, subpel_y_q4,
+                            y_step_q4, subpel_x_q4, x_step_q4, conv_params);
+    else
+      av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
+                      tr_src_stride, tr_dst, tr_dst_stride, h, w,
+                      &filter_params_y, &filter_params_x, subpel_y_q4,
+                      subpel_x_q4, conv_params);
     transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
                     tr_dst_stride, h, w);
   } else {
-    av1_convolve_2d(src, src_stride, conv_params->dst, conv_params->dst_stride,
-                    w, h, &filter_params_x, &filter_params_y, subpel_x_q4,
-                    subpel_y_q4, conv_params);
+    if (scaled)
+      av1_convolve_2d_scale(src, src_stride, conv_params->dst,
+                            conv_params->dst_stride, w, h, &filter_params_x,
+                            &filter_params_y, subpel_x_q4, x_step_q4,
+                            subpel_y_q4, y_step_q4, conv_params);
+    else
+      av1_convolve_2d(src, src_stride, conv_params->dst,
+                      conv_params->dst_stride, w, h, &filter_params_x,
+                      &filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
   }
 }
 
 #if CONFIG_HIGHBITDEPTH
-static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
-                                    const uint16_t *src, int src_stride, int w,
-                                    int h) {
-  int r, c;
-  for (r = 0; r < h; ++r)
-    for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
-}
-
 void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
                                     uint8_t *dst8, int dst_stride, int w, int h,
                                     int bits, int bd) {
@@ -561,11 +664,76 @@ void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
       }
       CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
-      dst[y * dst_stride + x] += res;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+  }
+}
+
+void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
+                                    CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                    int h, InterpFilterParams *filter_params_x,
+                                    InterpFilterParams *filter_params_y,
+                                    const int subpel_x_qn, const int x_step_qn,
+                                    const int subpel_y_qn, const int y_step_qn,
+                                    ConvolveParams *conv_params, int bd) {
+  int x, y, k;
+  uint16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+  int im_stride = w;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+  (void)bd;
+
+  // horizontal filter
+  const uint16_t *src_horiz = src - fo_vert * src_stride;
+  for (y = 0; y < im_h; ++y) {
+    int x_qn = subpel_x_qn;
+    for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
+      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(x_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
+      int sum = 0;
+      for (k = 0; k < filter_params_x->taps; ++k)
+        sum += x_filter[k] * src_x[k - fo_horiz];
+      im_block[y * im_stride + x] =
+          clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
+    }
+    src_horiz += src_stride;
+  }
+
+  // vertical filter
+  uint16_t *src_vert = im_block + fo_vert * im_stride;
+  for (x = 0; x < w; ++x) {
+    int y_qn = subpel_y_qn;
+    for (y = 0; y < h; ++y, y_qn += y_step_qn) {
+      const uint16_t *const src_y =
+          &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(y_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
+      CONV_BUF_TYPE sum = 0;
+      for (k = 0; k < filter_params_y->taps; ++k) {
+        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+      }
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
     }
+    src_vert++;
   }
 }
+
 #else
+
 void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
                               CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
                               InterpFilterParams *filter_params_x,
@@ -611,42 +779,95 @@ void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
       CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
                           ((1 << (offset_bits - conv_params->round_1)) +
                            (1 << (offset_bits - conv_params->round_1 - 1)));
-      dst[y * dst_stride + x] += res;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
     }
   }
 }
-#endif
+
+void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
+                                    CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                    int h, InterpFilterParams *filter_params_x,
+                                    InterpFilterParams *filter_params_y,
+                                    const int subpel_x_qn, const int x_step_qn,
+                                    const int subpel_y_qn, const int y_step_qn,
+                                    ConvolveParams *conv_params, int bd) {
+  int x, y, k;
+  int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+  int im_stride = w;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+
+  // horizontal filter
+  const uint16_t *src_horiz = src - fo_vert * src_stride;
+  for (y = 0; y < im_h; ++y) {
+    int x_qn = subpel_x_qn;
+    for (x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
+      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(x_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (k = 0; k < filter_params_x->taps; ++k) {
+        sum += x_filter[k] * src_x[k - fo_horiz];
+      }
+      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
+      im_block[y * im_stride + x] =
+          ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+    }
+    src_horiz += src_stride;
+  }
+
+  // vertical filter
+  int32_t *src_vert = im_block + fo_vert * im_stride;
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  for (x = 0; x < w; ++x) {
+    int y_qn = subpel_y_qn;
+    for (y = 0; y < h; ++y, y_qn += y_step_qn) {
+      const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(y_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
+      CONV_BUF_TYPE sum = 1 << offset_bits;
+      for (k = 0; k < filter_params_y->taps; ++k) {
+        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+      }
+      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+                          ((1 << (offset_bits - conv_params->round_1)) +
+                           (1 << (offset_bits - conv_params->round_1 - 1)));
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+    src_vert++;
+  }
+}
+#endif  // CONFIG_COMPOUND_ROUND
 
 void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
                                    uint8_t *dst, int dst_stride, int w, int h,
-                                   const InterpFilter *interp_filter,
+                                   InterpFilters interp_filters,
                                    const int subpel_x_q4, int x_step_q4,
                                    const int subpel_y_q4, int y_step_q4,
-                                   ConvolveParams *conv_params, int bd) {
+                                   int scaled, ConvolveParams *conv_params,
+                                   int bd) {
   (void)x_step_q4;
   (void)y_step_q4;
   (void)dst;
   (void)dst_stride;
-#if CONFIG_DUAL_FILTER
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
-
-#if USE_EXTRA_FILTER
-  if (filter_params_x.interp_filter == MULTITAP_SHARP &&
-      filter_params_y.interp_filter == MULTITAP_SHARP) {
-    // Avoid two directions both using 12-tap filter.
-    // This will reduce hardware implementation cost.
-    filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-  }
-#endif
-#else
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(*interp_filter);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(*interp_filter);
-#endif
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
+                                 &filter_params_y);
+
   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   if (filter_params_y.taps < filter_params_x.taps) {
     uint16_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
@@ -664,17 +885,29 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
                     conv_params->dst_stride, w, h);
 
     // horizontal and vertical parameters are swapped because of the transpose
-    av1_highbd_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
-                           tr_src_stride, tr_dst, tr_dst_stride, h, w,
-                           &filter_params_y, &filter_params_x, subpel_y_q4,
-                           subpel_x_q4, conv_params, bd);
+    if (scaled)
+      av1_highbd_convolve_2d_scale(
+          tr_src + fo_horiz * tr_src_stride + fo_vert, tr_src_stride, tr_dst,
+          tr_dst_stride, h, w, &filter_params_y, &filter_params_x, subpel_y_q4,
+          y_step_q4, subpel_x_q4, x_step_q4, conv_params, bd);
+    else
+      av1_highbd_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
+                             tr_src_stride, tr_dst, tr_dst_stride, h, w,
+                             &filter_params_y, &filter_params_x, subpel_y_q4,
+                             subpel_x_q4, conv_params, bd);
     transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
                     tr_dst_stride, h, w);
   } else {
-    av1_highbd_convolve_2d(src, src_stride, conv_params->dst,
-                           conv_params->dst_stride, w, h, &filter_params_x,
-                           &filter_params_y, subpel_x_q4, subpel_y_q4,
-                           conv_params, bd);
+    if (scaled)
+      av1_highbd_convolve_2d_scale(
+          src, src_stride, conv_params->dst, conv_params->dst_stride, w, h,
+          &filter_params_x, &filter_params_y, subpel_x_q4, x_step_q4,
+          subpel_y_q4, y_step_q4, conv_params, bd);
+    else
+      av1_highbd_convolve_2d(src, src_stride, conv_params->dst,
+                             conv_params->dst_stride, w, h, &filter_params_x,
+                             &filter_params_y, subpel_x_q4, subpel_y_q4,
+                             conv_params, bd);
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -689,11 +922,7 @@ typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
 
 static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                            const InterpFilter *interp_filter,
-#else
-                            const InterpFilter interp_filter,
-#endif
+                            const InterpFilters interp_filters,
                             const int subpel_x_q4, int x_step_q4,
                             const int subpel_y_q4, int y_step_q4,
                             ConvolveParams *conv_params,
@@ -701,16 +930,11 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
                             ConvolveFunc convolve_vert) {
   int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
   int ignore_vert = y_step_q4 == SUBPEL_SHIFTS && subpel_y_q4 == 0;
-#if CONFIG_DUAL_FILTER
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params;
-#else
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter);
-#endif
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                 &filter_params_y);
+
   assert(conv_params->round == CONVOLVE_OPT_ROUND);
 
   assert(w <= MAX_BLOCK_WIDTH);
@@ -721,18 +945,12 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
   if (ignore_horiz && ignore_vert) {
     convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
   } else if (ignore_vert) {
-#if CONFIG_DUAL_FILTER
-    filter_params = filter_params_x;
-#endif
-    assert(filter_params.taps <= MAX_FILTER_TAP);
-    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+    assert(filter_params_x.taps <= MAX_FILTER_TAP);
+    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
                    subpel_x_q4, x_step_q4, conv_params);
   } else if (ignore_horiz) {
-#if CONFIG_DUAL_FILTER
-    filter_params = filter_params_y;
-#endif
-    assert(filter_params.taps <= MAX_FILTER_TAP);
-    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+    assert(filter_params_y.taps <= MAX_FILTER_TAP);
+    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
                   subpel_y_q4, y_step_q4, conv_params);
   } else {
     // temp's size is set to a 256 aligned value to facilitate SIMD
@@ -743,12 +961,7 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     int filter_size;
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
-        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
-      // Avoid two directions both using 12-tap filter.
-      // This will reduce hardware implementation cost.
-      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-    }
+    av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
 
     // we do filter with fewer taps first to reduce hardware implementation
     // complexity
@@ -759,22 +972,21 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
       temp_conv_params.ref = 0;
       temp_conv_params.do_average = 0;
       temp_conv_params.round = CONVOLVE_OPT_ROUND;
-      filter_params = filter_params_y;
       filter_size = filter_params_x.taps;
       intermediate_width =
           (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
       assert(intermediate_width <= max_intermediate_size);
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
-                    intermediate_width, h, filter_params, subpel_y_q4,
+                    intermediate_width, h, filter_params_y, subpel_y_q4,
                     y_step_q4, &temp_conv_params);
 
-      filter_params = filter_params_x;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
       convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
-                     w, h, filter_params, subpel_x_q4, x_step_q4, conv_params);
+                     w, h, filter_params_x, subpel_x_q4, x_step_q4,
+                     conv_params);
     } else
 #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     {
@@ -784,30 +996,22 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
       temp_conv_params.ref = 0;
       temp_conv_params.do_average = 0;
       temp_conv_params.round = CONVOLVE_OPT_ROUND;
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_x;
       filter_size = filter_params_y.taps;
-#else
-      filter_size = filter_params.taps;
-#endif
       intermediate_height =
           (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
       assert(intermediate_height <= max_intermediate_size);
       (void)max_intermediate_size;
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
 
       convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
-                     temp_stride, w, intermediate_height, filter_params,
+                     temp_stride, w, intermediate_height, filter_params_x,
                      subpel_x_q4, x_step_q4, &temp_conv_params);
 
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_y;
-#endif
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
-                    dst, dst_stride, w, h, filter_params, subpel_y_q4,
+                    dst, dst_stride, w, h, filter_params_y, subpel_y_q4,
                     y_step_q4, conv_params);
     }
   }
@@ -815,11 +1019,7 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
 
 static void convolve_scale_helper(const uint8_t *src, int src_stride,
                                   uint8_t *dst, int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                                  const InterpFilter *interp_filter,
-#else
-                                  const InterpFilter interp_filter,
-#endif
+                                  const InterpFilters interp_filters,
                                   const int subpel_x_qn, int x_step_qn,
                                   const int subpel_y_qn, int y_step_qn,
                                   ConvolveParams *conv_params,
@@ -827,16 +1027,11 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
                                   ConvolveFunc convolve_vert) {
   int ignore_horiz = x_step_qn == SCALE_SUBPEL_SHIFTS && subpel_x_qn == 0;
   int ignore_vert = y_step_qn == SCALE_SUBPEL_SHIFTS && subpel_y_qn == 0;
-#if CONFIG_DUAL_FILTER
-  InterpFilterParams filter_params_x =
-      av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params_y =
-      av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
-  InterpFilterParams filter_params;
-#else
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter);
-#endif
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                 &filter_params_y);
+
   assert(conv_params->round == CONVOLVE_OPT_ROUND);
 
   assert(w <= MAX_BLOCK_WIDTH);
@@ -847,18 +1042,12 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
   if (ignore_horiz && ignore_vert) {
     convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
   } else if (ignore_vert) {
-#if CONFIG_DUAL_FILTER
-    filter_params = filter_params_x;
-#endif
-    assert(filter_params.taps <= MAX_FILTER_TAP);
-    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+    assert(filter_params_x.taps <= MAX_FILTER_TAP);
+    convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
                    subpel_x_qn, x_step_qn, conv_params);
   } else if (ignore_horiz) {
-#if CONFIG_DUAL_FILTER
-    filter_params = filter_params_y;
-#endif
-    assert(filter_params.taps <= MAX_FILTER_TAP);
-    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+    assert(filter_params_y.taps <= MAX_FILTER_TAP);
+    convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
                   subpel_y_qn, y_step_qn, conv_params);
   } else {
     // temp's size is set to a 256 aligned value to facilitate SIMD
@@ -869,12 +1058,7 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     int filter_size;
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-    if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
-        interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
-      // Avoid two directions both using 12-tap filter.
-      // This will reduce hardware implementation cost.
-      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-    }
+    av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
 
     // we do filter with fewer taps first to reduce hardware implementation
     // complexity
@@ -885,23 +1069,22 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
       temp_conv_params.ref = 0;
       temp_conv_params.do_average = 0;
       temp_conv_params.round = CONVOLVE_OPT_ROUND;
-      filter_params = filter_params_y;
       filter_size = filter_params_x.taps;
       intermediate_width =
           (((w - 1) * x_step_qn + subpel_x_qn) >> SCALE_SUBPEL_BITS) +
           filter_size;
       assert(intermediate_width <= max_intermediate_size);
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
-                    intermediate_width, h, filter_params, subpel_y_qn,
+                    intermediate_width, h, filter_params_y, subpel_y_qn,
                     y_step_qn, &temp_conv_params);
 
-      filter_params = filter_params_x;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
       convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
-                     w, h, filter_params, subpel_x_qn, x_step_qn, conv_params);
+                     w, h, filter_params_x, subpel_x_qn, x_step_qn,
+                     conv_params);
     } else {
 #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
       int intermediate_height;
@@ -910,31 +1093,23 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
       temp_conv_params.ref = 0;
       temp_conv_params.do_average = 0;
       temp_conv_params.round = CONVOLVE_OPT_ROUND;
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_x;
       filter_size = filter_params_y.taps;
-#else
-    filter_size = filter_params.taps;
-#endif
       intermediate_height =
           (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
           filter_size;
       assert(intermediate_height <= max_intermediate_size);
       (void)max_intermediate_size;
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
 
       convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
-                     temp_stride, w, intermediate_height, filter_params,
+                     temp_stride, w, intermediate_height, filter_params_x,
                      subpel_x_qn, x_step_qn, &temp_conv_params);
 
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_y;
-#endif
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
-                    dst, dst_stride, w, h, filter_params, subpel_y_qn,
+                    dst, dst_stride, w, h, filter_params_y, subpel_y_qn,
                     y_step_qn, conv_params);
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     }
@@ -943,44 +1118,29 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
 }
 
 void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
-                  int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                  const InterpFilter *interp_filter,
-#else
-                  const InterpFilter interp_filter,
-#endif
+                  int dst_stride, int w, int h, InterpFilters interp_filters,
                   const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                   int y_step_q4, ConvolveParams *conv_params) {
-  convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
+  convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
                   subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
                   av1_convolve_horiz_facade, av1_convolve_vert_facade);
 }
 
 void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
-                    int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                    const InterpFilter *interp_filter,
-#else
-                    const InterpFilter interp_filter,
-#endif
+                    int dst_stride, int w, int h, InterpFilters interp_filters,
                     const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                     int y_step_q4, ConvolveParams *conv_params) {
-  convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
+  convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
                   subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
                   av1_convolve_horiz_facade_c, av1_convolve_vert_facade_c);
 }
 
 void av1_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                        const InterpFilter *interp_filter,
-#else
-                        const InterpFilter interp_filter,
-#endif
-                        const int subpel_x_qn, int x_step_qn,
-                        const int subpel_y_qn, int y_step_qn,
+                        InterpFilters interp_filters, const int subpel_x_qn,
+                        int x_step_qn, const int subpel_y_qn, int y_step_qn,
                         ConvolveParams *conv_params) {
-  convolve_scale_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
+  convolve_scale_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
                         subpel_x_qn, x_step_qn, subpel_y_qn, y_step_qn,
                         conv_params, av1_convolve_horiz_facade_scale,
                         av1_convolve_vert_facade_scale);
@@ -1242,14 +1402,9 @@ void av1_highbd_convolve_vert_facade_scale(
 
 void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                          int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                         const InterpFilter *interp_filter,
-#else
-                         const InterpFilter interp_filter,
-#endif
-                         const int subpel_x_q4, int x_step_q4,
-                         const int subpel_y_q4, int y_step_q4, int ref_idx,
-                         int bd) {
+                         InterpFilters interp_filters, const int subpel_x_q4,
+                         int x_step_q4, const int subpel_y_q4, int y_step_q4,
+                         int ref_idx, int bd) {
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
@@ -1262,27 +1417,20 @@ void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
 
   if (ignore_horiz && ignore_vert) {
     highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
-  } else if (ignore_vert) {
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
-#else
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter);
-#endif
+    return;
+  }
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                 &filter_params_y);
+
+  if (ignore_vert) {
     av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
-                                     filter_params, subpel_x_q4, x_step_q4,
+                                     filter_params_x, subpel_x_q4, x_step_q4,
                                      ref_idx, bd);
   } else if (ignore_horiz) {
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
-#else
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter);
-#endif
     av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
-                                    filter_params, subpel_y_q4, y_step_q4,
+                                    filter_params_y, subpel_y_q4, y_step_q4,
                                     ref_idx, bd);
   } else {
     // temp's size is set to a 256 aligned value to facilitate SIMD
@@ -1293,54 +1441,37 @@ void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
     uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     int filter_size;
-    InterpFilterParams filter_params;
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params_x =
-        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
-    InterpFilterParams filter_params_y =
-        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
-#endif
 
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
-        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
-      // Avoid two directions both using 12-tap filter.
-      // This will reduce hardware implementation cost.
-      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-    }
+    av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
+
     if (filter_params_y.taps < filter_params_x.taps) {
       int intermediate_width;
       int temp_stride = max_intermediate_size;
-      filter_params = filter_params_y;
       filter_size = filter_params_x.taps;
       intermediate_width =
           (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
       assert(intermediate_width <= max_intermediate_size);
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
-      av1_highbd_convolve_vert_facade(
-          src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
-          intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
+      av1_highbd_convolve_vert_facade(src8 - (filter_size / 2 - 1), src_stride,
+                                      temp8, temp_stride, intermediate_width, h,
+                                      filter_params_y, subpel_y_q4, y_step_q4,
+                                      0, bd);
 
-      filter_params = filter_params_x;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
 
       av1_highbd_convolve_horiz_facade(
           temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
-          filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
+          filter_params_x, subpel_x_q4, x_step_q4, ref_idx, bd);
     } else
 #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     {
       int intermediate_height;
       int temp_stride = MAX_SB_SIZE;
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_x;
       filter_size = filter_params_y.taps;
-#else
-      filter_params = av1_get_interp_filter_params(interp_filter);
-      filter_size = filter_params.taps;
-#endif
+
       intermediate_height =
           (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
       assert(intermediate_height <= max_intermediate_size);
@@ -1348,29 +1479,23 @@ void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
 
       av1_highbd_convolve_horiz_facade(
           src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
-          temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
+          temp_stride, w, intermediate_height, filter_params_x, subpel_x_q4,
           x_step_q4, 0, bd);
 
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_y;
-#endif
-      filter_size = filter_params.taps;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      filter_size = filter_params_y.taps;
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       av1_highbd_convolve_vert_facade(
           temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
-          dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
+          dst_stride, w, h, filter_params_y, subpel_y_q4, y_step_q4, ref_idx,
+          bd);
     }
   }
 }
 
 void av1_highbd_convolve_scale(const uint8_t *src8, int src_stride,
                                uint8_t *dst8, int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                               const InterpFilter *interp_filter,
-#else
-                               const InterpFilter interp_filter,
-#endif
+                               InterpFilters interp_filters,
                                const int subpel_x_qn, int x_step_qn,
                                const int subpel_y_qn, int y_step_qn,
                                int ref_idx, int bd) {
@@ -1386,27 +1511,20 @@ void av1_highbd_convolve_scale(const uint8_t *src8, int src_stride,
 
   if (ignore_horiz && ignore_vert) {
     highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
-  } else if (ignore_vert) {
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
-#else
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter);
-#endif
+    return;
+  }
+
+  InterpFilterParams filter_params_x, filter_params_y;
+  av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                 &filter_params_y);
+
+  if (ignore_vert) {
     av1_highbd_convolve_horiz_facade_scale(src8, src_stride, dst8, dst_stride,
-                                           w, h, filter_params, subpel_x_qn,
+                                           w, h, filter_params_x, subpel_x_qn,
                                            x_step_qn, ref_idx, bd);
   } else if (ignore_horiz) {
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
-#else
-    InterpFilterParams filter_params =
-        av1_get_interp_filter_params(interp_filter);
-#endif
     av1_highbd_convolve_vert_facade_scale(src8, src_stride, dst8, dst_stride, w,
-                                          h, filter_params, subpel_y_qn,
+                                          h, filter_params_y, subpel_y_qn,
                                           y_step_qn, ref_idx, bd);
   } else {
     // temp's size is set to a 256 aligned value to facilitate SIMD
@@ -1417,54 +1535,36 @@ void av1_highbd_convolve_scale(const uint8_t *src8, int src_stride,
     uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     int filter_size;
-    InterpFilterParams filter_params;
-#if CONFIG_DUAL_FILTER
-    InterpFilterParams filter_params_x =
-        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
-    InterpFilterParams filter_params_y =
-        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
-#endif
 
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-    if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
-        interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
-      // Avoid two directions both using 12-tap filter.
-      // This will reduce hardware implementation cost.
-      filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
-    }
+    av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
+
     if (filter_params_y.taps < filter_params_x.taps) {
       int intermediate_width;
       int temp_stride = max_intermediate_size;
-      filter_params = filter_params_y;
       filter_size = filter_params_x.taps;
       intermediate_width =
           (((w - 1) * x_step_qn + subpel_x_qn) >> SCALE_SUBPEL_BITS) +
           filter_size;
       assert(intermediate_width <= max_intermediate_size);
 
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       av1_highbd_convolve_vert_facade_scale(
           src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
-          intermediate_width, h, filter_params, subpel_y_qn, y_step_qn, 0, bd);
+          intermediate_width, h, filter_params_y, subpel_y_qn, y_step_qn, 0,
+          bd);
 
-      filter_params = filter_params_x;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      assert(filter_params_x.taps <= MAX_FILTER_TAP);
 
       av1_highbd_convolve_horiz_facade_scale(
           temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
-          filter_params, subpel_x_qn, x_step_qn, ref_idx, bd);
+          filter_params_x, subpel_x_qn, x_step_qn, ref_idx, bd);
     } else {
 #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
       int intermediate_height;
       int temp_stride = MAX_SB_SIZE;
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_x;
       filter_size = filter_params_y.taps;
-#else
-    filter_params = av1_get_interp_filter_params(interp_filter);
-    filter_size = filter_params.taps;
-#endif
       intermediate_height =
           (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
           filter_size;
@@ -1473,18 +1573,16 @@ void av1_highbd_convolve_scale(const uint8_t *src8, int src_stride,
 
       av1_highbd_convolve_horiz_facade_scale(
           src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
-          temp_stride, w, intermediate_height, filter_params, subpel_x_qn,
+          temp_stride, w, intermediate_height, filter_params_x, subpel_x_qn,
           x_step_qn, 0, bd);
 
-#if CONFIG_DUAL_FILTER
-      filter_params = filter_params_y;
-#endif
-      filter_size = filter_params.taps;
-      assert(filter_params.taps <= MAX_FILTER_TAP);
+      filter_size = filter_params_y.taps;
+      assert(filter_params_y.taps <= MAX_FILTER_TAP);
 
       av1_highbd_convolve_vert_facade_scale(
           temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
-          dst_stride, w, h, filter_params, subpel_y_qn, y_step_qn, ref_idx, bd);
+          dst_stride, w, h, filter_params_y, subpel_y_qn, y_step_qn, ref_idx,
+          bd);
 #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     }
 #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
diff --git a/third_party/aom/av1/common/convolve.h b/third_party/aom/av1/common/convolve.h
index 462733946..c43f649e0 100644
--- a/third_party/aom/av1/common/convolve.h
+++ b/third_party/aom/av1/common/convolve.h
@@ -47,15 +47,49 @@ static INLINE ConvolveParams get_conv_params(int ref, int do_average,
   conv_params.do_post_rounding = 0;
   return conv_params;
 }
+
+#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
+static INLINE void av1_convolve_filter_params_fixup_1212(
+    const InterpFilterParams *params_x, InterpFilterParams *params_y) {
+  if (params_x->interp_filter == MULTITAP_SHARP &&
+      params_y->interp_filter == MULTITAP_SHARP) {
+    // Avoid two directions both using 12-tap filter.
+    // This will reduce hardware implementation cost.
+    *params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
+  }
+}
+#endif
+
+static INLINE void av1_get_convolve_filter_params(
+    InterpFilters interp_filters, int avoid_1212, InterpFilterParams *params_x,
+    InterpFilterParams *params_y) {
+#if CONFIG_DUAL_FILTER
+  InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
+  InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
+#else
+  InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 0);
+  InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
+#endif
+
+  *params_x = av1_get_interp_filter_params(filter_x);
+  *params_y = av1_get_interp_filter_params(filter_y);
+
+  if (avoid_1212) {
+#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
+    convolve_filter_params_fixup_1212(params_x, params_y);
+#endif
+  }
+}
+
 struct AV1Common;
 void av1_convolve_init(struct AV1Common *cm);
+
 #if CONFIG_CONVOLVE_ROUND
 void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
-                            const InterpFilter *interp_filter,
-                            const int subpel_x_q4, int x_step_q4,
-                            const int subpel_y_q4, int y_step_q4,
-                            ConvolveParams *conv_params);
+                            InterpFilters interp_filters, const int subpel_x_q4,
+                            int x_step_q4, const int subpel_y_q4, int y_step_q4,
+                            int scaled, ConvolveParams *conv_params);
 
 static INLINE ConvolveParams get_conv_params_no_round(int ref, int do_average,
                                                       int plane, int32_t *dst,
@@ -80,63 +114,42 @@ static INLINE ConvolveParams get_conv_params_no_round(int ref, int do_average,
 #if CONFIG_HIGHBITDEPTH
 void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
                                    uint8_t *dst, int dst_stride, int w, int h,
-                                   const InterpFilter *interp_filter,
+                                   InterpFilters interp_filters,
                                    const int subpel_x_q4, int x_step_q4,
                                    const int subpel_y_q4, int y_step_q4,
-                                   ConvolveParams *conv_params, int bd);
+                                   int scaled, ConvolveParams *conv_params,
+                                   int bd);
 #endif
 #endif  // CONFIG_CONVOLVE_ROUND
 
 void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
-                  int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                  const InterpFilter *interp_filter,
-#else
-                  const InterpFilter interp_filter,
-#endif
+                  int dst_stride, int w, int h, InterpFilters interp_filters,
                   const int subpel_x, int xstep, const int subpel_y, int ystep,
                   ConvolveParams *conv_params);
 
 void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
-                    int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                    const InterpFilter *interp_filter,
-#else
-                    const InterpFilter interp_filter,
-#endif
+                    int dst_stride, int w, int h, InterpFilters interp_filters,
                     const int subpel_x, int xstep, const int subpel_y,
                     int ystep, ConvolveParams *conv_params);
 
 void av1_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                        const InterpFilter *interp_filter,
-#else
-                        const InterpFilter interp_filter,
-#endif
-                        const int subpel_x, int xstep, const int subpel_y,
-                        int ystep, ConvolveParams *conv_params);
+                        InterpFilters interp_filters, const int subpel_x,
+                        int xstep, const int subpel_y, int ystep,
+                        ConvolveParams *conv_params);
 
 #if CONFIG_HIGHBITDEPTH
 void av1_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                         const InterpFilter *interp_filter,
-#else
-                         const InterpFilter interp_filter,
-#endif
-                         const int subpel_x, int xstep, const int subpel_y,
-                         int ystep, int avg, int bd);
+                         InterpFilters interp_filters, const int subpel_x,
+                         int xstep, const int subpel_y, int ystep, int avg,
+                         int bd);
 
 void av1_highbd_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
                                int dst_stride, int w, int h,
-#if CONFIG_DUAL_FILTER
-                               const InterpFilter *interp_filter,
-#else
-                               const InterpFilter interp_filter,
-#endif  // CONFIG_DUAL_FILTER
-                               const int subpel_x, int xstep,
-                               const int subpel_y, int ystep, int avg, int bd);
+                               InterpFilters interp_filters, const int subpel_x,
+                               int xstep, const int subpel_y, int ystep,
+                               int avg, int bd);
 #endif  // CONFIG_HIGHBITDEPTH
 
 #ifdef __cplusplus
diff --git a/third_party/aom/av1/common/daala_tx.c b/third_party/aom/av1/common/daala_tx.c
index 31f03de53..e5b2372e3 100644
--- a/third_party/aom/av1/common/daala_tx.c
+++ b/third_party/aom/av1/common/daala_tx.c
@@ -166,6 +166,87 @@
   } \
   while (0)
 
+#define OD_FDST_4(q0, q2, q1, q3) \
+  /* Embedded 4-point orthonormal Type-IV fDST. */ \
+  do { \
+    int q0h; \
+    int q1h; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \
+    q2 += (q1*13573 + 16384) >> 15; \
+    /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \
+    q1 -= (q2*5793 + 4096) >> 13; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \
+    q2 += (q1*3393 + 4096) >> 13; \
+    q0 += q2; \
+    q0h = OD_DCT_RSHIFT(q0, 1); \
+    q2 = q0h - q2; \
+    q1 += q3; \
+    q1h = OD_DCT_RSHIFT(q1, 1); \
+    q3 -= q1h; \
+    /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
+        0.524455699240090 */ \
+    OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \
+    q2 -= (q1*537 + 512) >> 10; \
+    /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
+    OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \
+    q1 += (q2*1609 + 1024) >> 11; \
+    /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
+        0.223847182092655 */ \
+    OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \
+    q2 += (q1*7335 + 16384) >> 15; \
+    /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
+        0.6215036383171189 */ \
+    OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \
+    q3 += (q0*5091 + 4096) >> 13; \
+    /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
+    OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \
+    q0 -= (q3*5681 + 2048) >> 12; \
+    /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
+        0.52204745462729 */ \
+    OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \
+    q3 += (q0*4277 + 4096) >> 13; \
+  } \
+  while (0)
+
+#define OD_IDST_4(q0, q2, q1, q3) \
+  /* Embedded 4-point orthonormal Type-IV iDST. */ \
+  do { \
+    int q0h; \
+    int q2h; \
+    /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~=
+        0.52204745462729 */ \
+    q3 -= (q0*4277 + 4096) >> 13; \
+    /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \
+    q0 += (q3*5681 + 2048) >> 12; \
+    /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~=
+        0.6215036383171189 */ \
+    q3 -= (q0*5091 + 4096) >> 13; \
+    /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~=
+        0.223847182092655 */ \
+    q1 -= (q2*7335 + 16384) >> 15; \
+    /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \
+    q2 -= (q1*1609 + 1024) >> 11; \
+    /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~=
+        0.524455699240090 */ \
+    q1 += (q2*537 + 512) >> 10; \
+    q2h = OD_DCT_RSHIFT(q2, 1); \
+    q3 += q2h; \
+    q2 -= q3; \
+    q0h = OD_DCT_RSHIFT(q0, 1); \
+    q1 = q0h - q1; \
+    q0 -= q1; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    q1 -= (q2*3393 + 4096) >> 13; \
+    /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    q2 += (q1*5793 + 4096) >> 13; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    q1 -= (q2*13573 + 16384) >> 15; \
+  } \
+  while (0)
+
 #define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \
   /* Embedded 4-point asymmetric Type-IV fDST. */ \
   do { \
@@ -277,7 +358,43 @@
   } \
   while (0)
 
-#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \
+#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
+  /* Embedded 8-point asymmetric Type-II fDCT. */ \
+  do { \
+    r0 += r7h; \
+    r7 = r0 - r7; \
+    r1 = r6h - r1; \
+    r6 -= r1; \
+    r2 += r5h; \
+    r5 = r2 - r5; \
+    r3 = r4h - r3; \
+    r4 -= r3; \
+    OD_FDCT_4(r0, r4, r2, r6); \
+    OD_FDST_4(r7, r3, r5, r1); \
+  } \
+  while (0)
+
+#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
+  /* Embedded 8-point asymmetric Type-II iDCT. */ \
+  do { \
+    OD_IDST_4(r7, r5, r6, r4); \
+    OD_IDCT_4(r0, r2, r1, r3); \
+    r7 = r0 - r7; \
+    r7h = OD_DCT_RSHIFT(r7, 1); \
+    r0 -= r7h; \
+    r1 += r6; \
+    r1h = OD_DCT_RSHIFT(r1, 1); \
+    r6 = r1h - r6; \
+    r5 = r2 - r5; \
+    r5h = OD_DCT_RSHIFT(r5, 1); \
+    r2 -= r5h; \
+    r3 += r4; \
+    r3h = OD_DCT_RSHIFT(r3, 1); \
+    r4 = r3h - r4; \
+  } \
+  while (0)
+
+#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7)  \
   /* Embedded 8-point orthonormal Type-IV fDST. */ \
   do { \
     int t0h; \
@@ -446,6 +563,2561 @@
   } \
   while (0)
 
+/* Rewrite this so that t0h can be passed in. */
+#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
+  /* Embedded 8-point asymmetric Type-IV fDST. */ \
+  do { \
+    int t0h; \
+    int t2h; \
+    int t5h; \
+    int t7h; \
+    /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \
+    t6 += (t1*1035 + 1024) >> 11; \
+    /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \
+    t1 -= (t6*3675 + 2048) >> 12; \
+    /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \
+    t6 -= (t1*851 + 4096) >> 13; \
+    /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \
+    t5 += (t2*4379 + 4096) >> 13; \
+    /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \
+    t2 -= (t5*10217 + 4096) >> 13; \
+    /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \
+    t5 += (t2*4379 + 8192) >> 14; \
+    /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \
+    t4 += (t3*12905 + 8192) >> 14; \
+    /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \
+    t3 -= (t4*3363 + 4096) >> 13; \
+    /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \
+    t4 -= (t3*3525 + 2048) >> 12; \
+    /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
+    OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \
+    t7 += (t0*5417 + 4096) >> 13; \
+    /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \
+    t0 -= (t7*5765 + 2048) >> 12; \
+    /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
+    OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \
+    t7 += (t0*2507 + 2048) >> 12; \
+    t0 += t1; \
+    t0h = OD_DCT_RSHIFT(t0, 1); \
+    t1 -= t0h; \
+    t2 -= t3; \
+    t2h = OD_DCT_RSHIFT(t2, 1); \
+    t3 += t2h; \
+    t5 -= t4; \
+    t5h = OD_DCT_RSHIFT(t5, 1); \
+    t4 += t5h; \
+    t7 += t6; \
+    t7h = OD_DCT_RSHIFT(t7, 1); \
+    t6 = t7h - t6; \
+    t4 = t7h - t4; \
+    t7 -= t4; \
+    t1 += t5h; \
+    t5 = t1 - t5; \
+    t6 += t2h; \
+    t2 = t6 - t2; \
+    t3 -= t0h; \
+    t0 += t3; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \
+    t1 += (t6*3259 + 8192) >> 14; \
+    /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \
+    t6 -= (t1*3135 + 4096) >> 13; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \
+    t1 += (t6*3259 + 8192) >> 14; \
+    /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \
+    t5 += (t2*2737 + 2048) >> 12; \
+    /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \
+    t2 -= (t5*473 + 256) >> 9; \
+    /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \
+    t5 += (t2*2737 + 2048) >> 12; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \
+    t3 += (t4*3393 + 4096) >> 13; \
+    /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \
+    t4 -= (t3*5793 + 4096) >> 13; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \
+    t3 += (t4*3393 + 4096) >> 13; \
+  } \
+  while (0)
+
+#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \
+  /* Embedded 8-point asymmetric Type-IV iDST. */ \
+  do { \
+    int t0h; \
+    int t2h; \
+    int t5h__; \
+    int t7h__; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    t6 -= (t1*3393 + 4096) >> 13; \
+    /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    t1 += (t6*5793 + 4096) >> 13; \
+    /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    t6 -= (t1*3393 + 4096) >> 13; \
+    /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    t5 -= (t2*2737 + 2048) >> 12; \
+    /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t2 += (t5*473 + 256) >> 9; \
+    /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    t5 -= (t2*2737 + 2048) >> 12; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    t4 -= (t3*3259 + 8192) >> 14; \
+    /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
+    t3 += (t4*3135 + 4096) >> 13; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    t4 -= (t3*3259 + 8192) >> 14; \
+    t0 -= t6; \
+    t0h = OD_DCT_RSHIFT(t0, 1); \
+    t6 += t0h; \
+    t2 = t3 - t2; \
+    t2h = OD_DCT_RSHIFT(t2, 1); \
+    t3 -= t2h; \
+    t5 = t4 - t5; \
+    t5h__ = OD_DCT_RSHIFT(t5, 1); \
+    t4 -= t5h__; \
+    t7 += t1; \
+    t7h__ = OD_DCT_RSHIFT(t7, 1); \
+    t1 = t7h__ - t1; \
+    t3 = t7h__ - t3; \
+    t7 -= t3; \
+    t1 -= t5h__; \
+    t5 += t1; \
+    t6 -= t2h; \
+    t2 += t6; \
+    t4 += t0h; \
+    t0 -= t4; \
+    /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \
+    t7 -= (t0*2507 + 2048) >> 12; \
+    /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \
+    t0 += (t7*5765 + 2048) >> 12; \
+    /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \
+    t7 -= (t0*5417 + 4096) >> 13; \
+    /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \
+    t1 += (t6*3525 + 2048) >> 12; \
+    /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \
+    t6 += (t1*3363 + 4096) >> 13; \
+    /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \
+    t1 -= (t6*12905 + 8192) >> 14; \
+    /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \
+    t5 -= (t2*4379 + 8192) >> 14; \
+    /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \
+    t2 += (t5*10217 + 4096) >> 13; \
+    /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \
+    t5 -= (t2*4379 + 4096) >> 13; \
+    /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \
+    t3 += (t4*851 + 4096) >> 13; \
+    /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \
+    t4 += (t3*3675 + 2048) >> 12; \
+    /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \
+    t3 -= (t4*1035 + 1024) >> 11; \
+  } \
+  while (0)
+
+#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
+  s1, s9, s5, sd, s3, sb, s7, sf) \
+  /* Embedded 16-point orthonormal Type-II fDCT. */ \
+  do { \
+    int s8h; \
+    int sah; \
+    int sch; \
+    int seh; \
+    int sfh; \
+    sf = s0 - sf; \
+    sfh = OD_DCT_RSHIFT(sf, 1); \
+    s0 -= sfh; \
+    se += s1; \
+    seh = OD_DCT_RSHIFT(se, 1); \
+    s1 = seh - s1; \
+    sd = s2 - sd; \
+    s2 -= OD_DCT_RSHIFT(sd, 1); \
+    sc += s3; \
+    sch = OD_DCT_RSHIFT(sc, 1); \
+    s3 = sch - s3; \
+    sb = s4 - sb; \
+    s4 -= OD_DCT_RSHIFT(sb, 1); \
+    sa += s5; \
+    sah = OD_DCT_RSHIFT(sa, 1); \
+    s5 = sah - s5; \
+    s9 = s6 - s9; \
+    s6 -= OD_DCT_RSHIFT(s9, 1); \
+    s8 += s7; \
+    s8h = OD_DCT_RSHIFT(s8, 1); \
+    s7 = s8h - s7; \
+    OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \
+    OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \
+  } \
+  while (0)
+
+#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \
+  s1, s9, s5, sd, s3, sb, s7, sf) \
+  /* Embedded 16-point orthonormal Type-II iDCT. */ \
+  do { \
+    int s1h; \
+    int s3h; \
+    int s5h; \
+    int s7h; \
+    int sfh; \
+    OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \
+    OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \
+    sfh = OD_DCT_RSHIFT(sf, 1); \
+    s0 += sfh; \
+    sf = s0 - sf; \
+    se = s1h - se; \
+    s1 -= se; \
+    s2 += OD_DCT_RSHIFT(sd, 1); \
+    sd = s2 - sd; \
+    sc = s3h - sc; \
+    s3 -= sc; \
+    s4 += OD_DCT_RSHIFT(sb, 1); \
+    sb = s4 - sb; \
+    sa = s5h - sa; \
+    s5 -= sa; \
+    s6 += OD_DCT_RSHIFT(s9, 1); \
+    s9 = s6 - s9; \
+    s8 = s7h - s8; \
+    s7 -= s8; \
+  } \
+  while (0)
+
+#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \
+  t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \
+  /* Embedded 16-point asymmetric Type-II fDCT. */ \
+  do { \
+    t0 += tfh; \
+    tf = t0 - tf; \
+    t1 -= teh; \
+    te += t1; \
+    t2 += tdh; \
+    td = t2 - td; \
+    t3 -= tch; \
+    tc += t3; \
+    t4 += tbh; \
+    tb = t4 - tb; \
+    t5 -= tah; \
+    ta += t5; \
+    t6 += t9h; \
+    t9 = t6 - t9; \
+    t7 -= t8h; \
+    t8 += t7; \
+    OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \
+    OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \
+  } \
+  while (0)
+
+#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
+  t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \
+  /* Embedded 16-point asymmetric Type-II iDCT. */ \
+  do { \
+    OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \
+    OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \
+    t1 -= te; \
+    t1h = OD_DCT_RSHIFT(t1, 1); \
+    te += t1h; \
+    t9 = t6 - t9; \
+    t9h = OD_DCT_RSHIFT(t9, 1); \
+    t6 -= t9h; \
+    t5 -= ta; \
+    t5h = OD_DCT_RSHIFT(t5, 1); \
+    ta += t5h; \
+    td = t2 - td; \
+    tdh = OD_DCT_RSHIFT(td, 1); \
+    t2 -= tdh; \
+    t3 -= tc; \
+    t3h = OD_DCT_RSHIFT(t3, 1); \
+    tc += t3h; \
+    tb = t4 - tb; \
+    tbh = OD_DCT_RSHIFT(tb, 1); \
+    t4 -= tbh; \
+    t7 -= t8; \
+    t7h = OD_DCT_RSHIFT(t7, 1); \
+    t8 += t7h; \
+    tf = t0 - tf; \
+    tfh = OD_DCT_RSHIFT(tf, 1); \
+    t0 -= tfh; \
+  } \
+  while (0)
+
+#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
+  s1, s9, s5, sd, s3, sb, s7, sf) \
+  /* Embedded 16-point orthonormal Type-IV fDST. */ \
+  do { \
+    int s0h; \
+    int s2h; \
+    int sdh; \
+    int sfh; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \
+    s1 += (se*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \
+    se -= (s1*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \
+    s1 += (se*13573 + 16384) >> 15; \
+    /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \
+    sd += (s2*21895 + 16384) >> 15; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \
+    s2 -= (sd*15137 + 8192) >> 14; \
+    /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \
+    sd += (s2*21895 + 16384) >> 15; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \
+    sc += (s3*3259 + 8192) >> 14; \
+    /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
+    OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \
+    s3 -= (sc*3135 + 4096) >> 13; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \
+    sc += (s3*3259 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \
+    sa += (s5*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \
+    OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \
+    s5 -= (sa*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \
+    sa += (s5*13573 + 16384) >> 15; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \
+    s6 += (s9*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
+    OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \
+    s9 -= (s6*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \
+    s6 += (s9*13573 + 16384) >> 15; \
+    sf += se; \
+    sfh = OD_DCT_RSHIFT(sf, 1); \
+    se = sfh - se; \
+    s0 += s1; \
+    s0h = OD_DCT_RSHIFT(s0, 1); \
+    s1 = s0h - s1; \
+    s2 = s3 - s2; \
+    s2h = OD_DCT_RSHIFT(s2, 1); \
+    s3 -= s2h; \
+    sd -= sc; \
+    sdh = OD_DCT_RSHIFT(sd, 1); \
+    sc += sdh; \
+    sa = s4 - sa; \
+    s4 -= OD_DCT_RSHIFT(sa, 1); \
+    s5 += sb; \
+    sb = OD_DCT_RSHIFT(s5, 1) - sb; \
+    s8 += s6; \
+    s6 -= OD_DCT_RSHIFT(s8, 1); \
+    s7 = s9 - s7; \
+    s9 -= OD_DCT_RSHIFT(s7, 1); \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \
+    s4 += (sb*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \
+    sb -= (s4*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \
+    s4 += (sb*6723 + 4096) >> 13; \
+    /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \
+    sa += (s5*8757 + 8192) >> 14; \
+    /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
+    OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \
+    s5 -= (sa*6811 + 4096) >> 13; \
+    /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \
+    sa += (s5*8757 + 8192) >> 14; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \
+    s6 += (s9*2485 + 4096) >> 13; \
+    /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \
+    s9 -= (s6*4551 + 4096) >> 13; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \
+    s6 += (s9*2485 + 4096) >> 13; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \
+    s7 += (s8*3227 + 16384) >> 15; \
+    /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
+    OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \
+    s8 -= (s7*6393 + 16384) >> 15; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \
+    s7 += (s8*3227 + 16384) >> 15; \
+    s1 -= s2h; \
+    s2 += s1; \
+    se += sdh; \
+    sd = se - sd; \
+    s3 += sfh; \
+    sf -= s3; \
+    sc = s0h - sc; \
+    s0 -= sc; \
+    sb += OD_DCT_RSHIFT(s8, 1); \
+    s8 = sb - s8; \
+    s4 += OD_DCT_RSHIFT(s7, 1); \
+    s7 -= s4; \
+    s6 += OD_DCT_RSHIFT(s5, 1); \
+    s5 = s6 - s5; \
+    s9 -= OD_DCT_RSHIFT(sa, 1); \
+    sa += s9; \
+    s8 += s0; \
+    s0 -= OD_DCT_RSHIFT(s8, 1); \
+    sf += s7; \
+    s7 = OD_DCT_RSHIFT(sf, 1) - s7; \
+    s1 -= s6; \
+    s6 += OD_DCT_RSHIFT(s1, 1); \
+    s9 += se; \
+    se = OD_DCT_RSHIFT(s9, 1) - se; \
+    s2 += sa; \
+    sa = OD_DCT_RSHIFT(s2, 1) - sa; \
+    s5 += sd; \
+    sd -= OD_DCT_RSHIFT(s5, 1); \
+    s4 = sc - s4; \
+    sc -= OD_DCT_RSHIFT(s4, 1); \
+    s3 -= sb; \
+    sb += OD_DCT_RSHIFT(s3, 1); \
+    /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \
+    s0 -= (sf*2799 + 2048) >> 12; \
+    /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \
+    sf += (s0*2893 + 1024) >> 11; \
+    /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \
+    s0 -= (sf*5397 + 4096) >> 13; \
+    /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \
+    se += (s1*41 + 32) >> 6; \
+    /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \
+    s1 -= (se*2865 + 1024) >> 11; \
+    /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \
+    se += (s1*4641 + 4096) >> 13; \
+    /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \
+    sd += (s2*2473 + 2048) >> 12; \
+    /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \
+    s2 -= (sd*5619 + 2048) >> 12; \
+    /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \
+    sd += (s2*7839 + 8192) >> 14; \
+    /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \
+    sc -= (s3*5747 + 4096) >> 13; \
+    /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \
+    OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \
+    s3 += (sc*3903 + 4096) >> 13; \
+    /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \
+    sc += (s3*5701 + 4096) >> 13; \
+    /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \
+    sb += (s4*4471 + 4096) >> 13; \
+    /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \
+    s4 -= (sb*1309 + 512) >> 10; \
+    /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \
+    sb += (s4*5067 + 8192) >> 14; \
+    /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \
+    sa -= (s5*2217 + 2048) >> 12; \
+    /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \
+    OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \
+    s5 += (sa*1489 + 1024) >> 11; \
+    /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \
+    sa += (s5*75 + 128) >> 8; \
+    /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \
+    s6 -= (s9*2087 + 2048) >> 12; \
+    /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \
+    s9 += (s6*4653 + 2048) >> 12; \
+    /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \
+    s6 -= (s9*4545 + 16384) >> 15; \
+    /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \
+    s7 += (s8*2053 + 2048) >> 12; \
+    /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \
+    s8 -= (s7*1945 + 1024) >> 11; \
+    /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
+    OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \
+    s7 -= (s8*1651 + 16384) >> 15; \
+  } \
+  while (0)
+
+#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \
+  s1, s9, s5, sd, s3, sb, s7, sf) \
+  /* Embedded 16-point orthonormal Type-IV iDST. */ \
+  do { \
+    int s0h; \
+    int s4h; \
+    int sbh; \
+    int sfh; \
+    /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \
+    se += (s1*1651 + 16384) >> 15; \
+    /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \
+    s1 += (se*1945 + 1024) >> 11; \
+    /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \
+    se -= (s1*2053 + 2048) >> 12; \
+    /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \
+    s6 += (s9*4545 + 16384) >> 15; \
+    /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \
+    s9 -= (s6*4653 + 2048) >> 12; \
+    /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \
+    s6 += (s9*2087 + 2048) >> 12; \
+    /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \
+    s5 -= (sa*75 + 128) >> 8; \
+    /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \
+    sa -= (s5*1489 + 1024) >> 11; \
+    /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \
+    s5 += (sa*2217 + 2048) >> 12; \
+    /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \
+    sd -= (s2*5067 + 8192) >> 14; \
+    /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \
+    s2 += (sd*1309 + 512) >> 10; \
+    /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \
+    sd -= (s2*4471 + 4096) >> 13; \
+    /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */  \
+    s3 -= (sc*5701 + 4096) >> 13; \
+    /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \
+    sc -= (s3*3903 + 4096) >> 13; \
+    /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \
+    s3 += (sc*5747 + 4096) >> 13; \
+    /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \
+    sb -= (s4*7839 + 8192) >> 14; \
+    /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \
+    s4 += (sb*5619 + 2048) >> 12; \
+    /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \
+    sb -= (s4*2473 + 2048) >> 12; \
+    /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \
+    s7 -= (s8*4641 + 4096) >> 13; \
+    /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \
+    s8 += (s7*2865 + 1024) >> 11; \
+    /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \
+    s7 -= (s8*41 + 32) >> 6; \
+    /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \
+    s0 += (sf*5397 + 4096) >> 13; \
+    /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \
+    sf -= (s0*2893 + 1024) >> 11; \
+    /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \
+    s0 += (sf*2799 + 2048) >> 12; \
+    sd -= OD_DCT_RSHIFT(sc, 1); \
+    sc += sd; \
+    s3 += OD_DCT_RSHIFT(s2, 1); \
+    s2 = s3 - s2; \
+    sb += OD_DCT_RSHIFT(sa, 1); \
+    sa -= sb; \
+    s5 = OD_DCT_RSHIFT(s4, 1) - s5; \
+    s4 -= s5; \
+    s7 = OD_DCT_RSHIFT(s9, 1) - s7; \
+    s9 -= s7; \
+    s6 -= OD_DCT_RSHIFT(s8, 1); \
+    s8 += s6; \
+    se = OD_DCT_RSHIFT(sf, 1) - se; \
+    sf -= se; \
+    s0 += OD_DCT_RSHIFT(s1, 1); \
+    s1 -= s0; \
+    s5 -= s9; \
+    s9 += OD_DCT_RSHIFT(s5, 1); \
+    sa = s6 - sa; \
+    s6 -= OD_DCT_RSHIFT(sa, 1); \
+    se += s2; \
+    s2 -= OD_DCT_RSHIFT(se, 1); \
+    s1 = sd - s1; \
+    sd -= OD_DCT_RSHIFT(s1, 1); \
+    s0 += s3; \
+    s0h = OD_DCT_RSHIFT(s0, 1); \
+    s3 = s0h - s3; \
+    sf += sc; \
+    sfh = OD_DCT_RSHIFT(sf, 1); \
+    sc -= sfh; \
+    sb = s7 - sb; \
+    sbh = OD_DCT_RSHIFT(sb, 1); \
+    s7 -= sbh; \
+    s4 -= s8; \
+    s4h = OD_DCT_RSHIFT(s4, 1); \
+    s8 += s4h; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    se -= (s1*3227 + 16384) >> 15; \
+    /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
+    s1 += (se*6393 + 16384) >> 15; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    se -= (s1*3227 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    s6 -= (s9*2485 + 4096) >> 13; \
+    /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    s9 += (s6*4551 + 4096) >> 13; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    s6 -= (s9*2485 + 4096) >> 13; \
+    /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
+    s5 -= (sa*8757 + 8192) >> 14; \
+    /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
+    sa += (s5*6811 + 4096) >> 13; \
+    /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
+    s5 -= (sa*8757 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
+    s2 -= (sd*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    sd += (s2*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    s2 -= (sd*6723 + 4096) >> 13; \
+    s9 += OD_DCT_RSHIFT(se, 1); \
+    se = s9 - se; \
+    s6 += OD_DCT_RSHIFT(s1, 1); \
+    s1 -= s6; \
+    sd = OD_DCT_RSHIFT(sa, 1) - sd; \
+    sa -= sd; \
+    s2 += OD_DCT_RSHIFT(s5, 1); \
+    s5 = s2 - s5; \
+    s3 -= sbh; \
+    sb += s3; \
+    sc += s4h; \
+    s4 = sc - s4; \
+    s8 = s0h - s8; \
+    s0 -= s8; \
+    s7 = sfh - s7; \
+    sf -= s7; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s6 -= (s9*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
+    s9 += (s6*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s6 -= (s9*13573 + 16384) >> 15; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s5 -= (sa*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
+    sa += (s5*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s5 -= (sa*13573 + 16384) >> 15; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    s3 -= (sc*3259 + 8192) >> 14; \
+    /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \
+    sc += (s3*3135 + 4096) >> 13; \
+    /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \
+    s3 -= (sc*3259 + 8192) >> 14; \
+    /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    sb -= (s4*21895 + 16384) >> 15; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    s4 += (sb*15137 + 8192) >> 14; \
+    /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \
+    sb -= (s4*21895 + 16384) >> 15; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s8 -= (s7*13573 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \
+    s7 += (s8*11585 + 8192) >> 14; \
+    /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \
+    s8 -= (s7*13573 + 16384) >> 15; \
+  } \
+  while (0)
+
+/* TODO: rewrite this to match OD_FDST_16. */
+#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \
+  t1, t9, t5, td, t3, tb, t7, t7h, tf) \
+  /* Embedded 16-point asymmetric Type-IV fDST. */ \
+  do { \
+    int t2h; \
+    int t3h; \
+    int t6h; \
+    int t8h; \
+    int t9h; \
+    int tch; \
+    int tdh; \
+    /* TODO: Can we move these into another operation */ \
+    t8 = -t8; \
+    t9 = -t9; \
+    ta = -ta; \
+    tb = -tb; \
+    td = -td; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \
+    t1 -= (te*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \
+    te += (t1*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \
+    t1 -= (te*13573 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \
+    t2 += (td*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \
+    td -= (t2*15137 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \
+    t2 += (td*14341 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \
+    tc -= (t3*14341 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \
+    t3 += (tc*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \
+    tc -= (t3*4161 + 8192) >> 14; \
+    te = t0h - te; \
+    t0 -= te; \
+    tf = OD_DCT_RSHIFT(t1, 1) - tf; \
+    t1 -= tf; \
+    /* TODO: Can we move this into another operation */ \
+    tc = -tc; \
+    t2 = OD_DCT_RSHIFT(tc, 1) - t2; \
+    tc -= t2; \
+    t3 = OD_DCT_RSHIFT(td, 1) - t3; \
+    td = t3 - td; \
+    /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \
+    t9 -= (t6*7489 + 4096) >> 13; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \
+    t6 += (t9*11585 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \
+    t9 += (t6*19195 + 16384) >> 15; \
+    t8 += OD_DCT_RSHIFT(t9, 1); \
+    t9 -= t8; \
+    t6 = t7h - t6; \
+    t7 -= t6; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \
+    t8 += (t7*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \
+    t7 -= (t8*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \
+    t8 += (t7*6723 + 4096) >> 13; \
+    /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \
+    t9 += (t6*17515 + 16384) >> 15; \
+    /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \
+    t6 -= (t9*13623 + 8192) >> 14; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \
+    t9 += (t6*17515 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \
+    t5 += (ta*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \
+    ta -= (t5*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \
+    t5 += (ta*13573 + 8192) >> 14; \
+    tb += OD_DCT_RSHIFT(t5, 1); \
+    t5 = tb - t5; \
+    ta += t4h; \
+    t4 -= ta; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \
+    ta += (t5*2485 + 4096) >> 13; \
+    /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \
+    t5 -= (ta*18205 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \
+    ta += (t5*2485 + 4096) >> 13; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \
+    tb -= (t4*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \
+    t4 += (tb*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \
+    tb -= (t4*6723 + 4096) >> 13; \
+    /* TODO: Can we move this into another operation */ \
+    t5 = -t5; \
+    tc -= tf; \
+    tch = OD_DCT_RSHIFT(tc, 1); \
+    tf += tch; \
+    t3 += t0; \
+    t3h = OD_DCT_RSHIFT(t3, 1); \
+    t0 -= t3h; \
+    td -= t1; \
+    tdh = OD_DCT_RSHIFT(td, 1); \
+    t1 += tdh; \
+    t2 += te; \
+    t2h = OD_DCT_RSHIFT(t2, 1); \
+    te -= t2h; \
+    t8 += t4; \
+    t8h = OD_DCT_RSHIFT(t8, 1); \
+    t4 = t8h - t4; \
+    t7 = tb - t7; \
+    t7h = OD_DCT_RSHIFT(t7, 1); \
+    tb = t7h - tb; \
+    t6 -= ta; \
+    t6h = OD_DCT_RSHIFT(t6, 1); \
+    ta += t6h; \
+    t9 = t5 - t9; \
+    t9h = OD_DCT_RSHIFT(t9, 1); \
+    t5 -= t9h; \
+    t0 -= t7h; \
+    t7 += t0; \
+    tf += t8h; \
+    t8 -= tf; \
+    te -= t6h; \
+    t6 += te; \
+    t1 += t9h; \
+    t9 -= t1; \
+    tb -= tch; \
+    tc += tb; \
+    t4 += t3h; \
+    t3 -= t4; \
+    ta -= tdh; \
+    td += ta; \
+    t5 = t2h - t5; \
+    t2 -= t5; \
+    /* TODO: Can we move these into another operation */ \
+    t8 = -t8; \
+    t9 = -t9; \
+    ta = -ta; \
+    tb = -tb; \
+    tc = -tc; \
+    td = -td; \
+    tf = -tf; \
+    /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
+    OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \
+    t0 -= (tf*7799 + 4096) >> 13; \
+    /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
+    OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \
+    tf += (t0*4091 + 2048) >> 12; \
+    /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
+    OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \
+    t0 -= (tf*7799 + 4096) >> 13; \
+    /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \
+    t1 += (te*2417 + 16384) >> 15; \
+    /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \
+    te -= (t1*601 + 2048) >> 12; \
+    /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \
+    t1 += (te*2417 + 16384) >> 15; \
+    /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \
+    t7 -= (t8*14525 + 16384) >> 15; \
+    /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \
+    t8 += (t7*3035 + 2048) >> 12; \
+    /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \
+    t7 -= (t8*7263 + 8192) >> 14; \
+    /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \
+    t2 -= (td*6393 + 4096) >> 13; \
+    /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \
+    td += (t2*3973 + 2048) >> 12; \
+    /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \
+    t2 -= (td*6393 + 4096) >> 13; \
+    /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \
+    t5 -= (ta*9281 + 8192) >> 14; \
+    /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \
+    ta += (t5*7027 + 4096) >> 13; \
+    /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \
+    t5 -= (ta*9281 + 8192) >> 14; \
+    /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \
+    t3 -= (tc*11539 + 8192) >> 14; \
+    /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \
+    tc += (t3*7713 + 4096) >> 13; \
+    /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \
+    t3 -= (tc*11539 + 8192) >> 14; \
+    /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \
+    t4 -= (tb*10375 + 8192) >> 14; \
+    /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \
+    tb += (t4*7405 + 4096) >> 13; \
+    /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \
+    t4 -= (tb*10375 + 8192) >> 14; \
+    /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \
+    t6 -= (t9*8247 + 8192) >> 14; \
+    /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \
+    t9 += (t6*1645 + 1024) >> 11; \
+    /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \
+    t6 -= (t9*8247 + 8192) >> 14; \
+  } \
+  while (0)
+
+#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \
+  t1, t9, t5, td, t3, tb, t7, tf) \
+  /* Embedded 16-point asymmetric Type-IV iDST. */ \
+  do { \
+    int t1h_; \
+    int t3h_; \
+    int t4h; \
+    int t6h; \
+    int t9h_; \
+    int tbh_; \
+    int tch; \
+    /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
+    t6 += (t9*8247 + 8192) >> 14; \
+    /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \
+    t9 -= (t6*1645 + 1024) >> 11; \
+    /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \
+    t6 += (t9*8247 + 8192) >> 14; \
+    /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
+    t2 += (td*10375 + 8192) >> 14; \
+    /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \
+    td -= (t2*7405 + 4096) >> 13; \
+    /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \
+    t2 += (td*10375 + 8192) >> 14; \
+    /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
+    tc += (t3*11539 + 8192) >> 14; \
+    /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \
+    t3 -= (tc*7713 + 4096) >> 13; \
+    /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \
+    tc += (t3*11539 + 8192) >> 14; \
+    /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
+    ta += (t5*9281 + 8192) >> 14; \
+    /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \
+    t5 -= (ta*7027 + 4096) >> 13; \
+    /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \
+    ta += (t5*9281 + 8192) >> 14; \
+    /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
+    t4 += (tb*6393 + 4096) >> 13; \
+    /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \
+    tb -= (t4*3973 + 2048) >> 12; \
+    /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \
+    t4 += (tb*6393 + 4096) >> 13; \
+    /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
+    te += (t1*7263 + 8192) >> 14; \
+    /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \
+    t1 -= (te*3035 + 2048) >> 12; \
+    /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \
+    te += (t1*14525 + 16384) >> 15; \
+    /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
+    t8 -= (t7*2417 + 16384) >> 15; \
+    /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \
+    t7 += (t8*601 + 2048) >> 12; \
+    /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \
+    t8 -= (t7*2417 + 16384) >> 15; \
+    /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
+    t0 += (tf*7799 + 4096) >> 13; \
+    /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \
+    tf -= (t0*4091 + 2048) >> 12; \
+    /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \
+    t0 += (tf*7799 + 4096) >> 13; \
+    /* TODO: Can we move these into another operation */ \
+    t1 = -t1; \
+    t3 = -t3; \
+    t5 = -t5; \
+    t9 = -t9; \
+    tb = -tb; \
+    td = -td; \
+    tf = -tf; \
+    t4 += ta; \
+    t4h = OD_DCT_RSHIFT(t4, 1); \
+    ta = t4h - ta; \
+    tb -= t5; \
+    tbh_ = OD_DCT_RSHIFT(tb, 1); \
+    t5 += tbh_; \
+    tc += t2; \
+    tch = OD_DCT_RSHIFT(tc, 1); \
+    t2 -= tch; \
+    t3 -= td; \
+    t3h_ = OD_DCT_RSHIFT(t3, 1); \
+    td += t3h_; \
+    t9 += t8; \
+    t9h_ = OD_DCT_RSHIFT(t9, 1); \
+    t8 -= t9h_; \
+    t6 -= t7; \
+    t6h = OD_DCT_RSHIFT(t6, 1); \
+    t7 += t6h; \
+    t1 += tf; \
+    t1h_ = OD_DCT_RSHIFT(t1, 1); \
+    tf -= t1h_; \
+    te -= t0; \
+    teh = OD_DCT_RSHIFT(te, 1); \
+    t0 += teh; \
+    ta += t9h_; \
+    t9 = ta - t9; \
+    t5 -= t6h; \
+    t6 += t5; \
+    td = teh - td; \
+    te = td - te; \
+    t2 = t1h_ - t2; \
+    t1 -= t2; \
+    t7 += t4h; \
+    t4 -= t7; \
+    t8 -= tbh_; \
+    tb += t8; \
+    t0 += tch; \
+    tc -= t0; \
+    tf -= t3h_; \
+    t3 += tf; \
+    /* TODO: Can we move this into another operation */ \
+    ta = -ta; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    td += (t2*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    t2 -= (td*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \
+    td += (t2*6723 + 4096) >> 13; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    t5 -= (ta*2485 + 4096) >> 13; \
+    /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    ta += (t5*18205 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    t5 -= (ta*2485 + 4096) >> 13; \
+    t2 += t5; \
+    t2h = OD_DCT_RSHIFT(t2, 1); \
+    t5 -= t2h; \
+    ta = td - ta; \
+    td -= OD_DCT_RSHIFT(ta, 1); \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    ta -= (t5*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    t5 += (ta*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    ta -= (t5*13573 + 8192) >> 14; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \
+    t9 -= (t6*17515 + 16384) >> 15; \
+    /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \
+    t6 += (t9*13623 + 8192) >> 14; \
+    /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \
+    t9 -= (t6*17515 + 16384) >> 15; \
+    /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
+    t1 -= (te*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \
+    te += (t1*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \
+    t1 -= (te*6723 + 4096) >> 13; \
+    te += t6; \
+    teh = OD_DCT_RSHIFT(te, 1); \
+    t6 = teh - t6; \
+    t9 += t1; \
+    t1 -= OD_DCT_RSHIFT(t9, 1); \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    t9 -= (t6*19195 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    t6 -= (t9*11585 + 8192) >> 14; \
+    /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    t9 += (t6*7489 + 4096) >> 13; \
+    tb = tc - tb; \
+    tc = OD_DCT_RSHIFT(tb, 1) - tc; \
+    t3 += t4; \
+    t4 = OD_DCT_RSHIFT(t3, 1) - t4; \
+    /* TODO: Can we move this into another operation */ \
+    t3 = -t3; \
+    t8 += tf; \
+    tf = OD_DCT_RSHIFT(t8, 1) - tf; \
+    t0 += t7; \
+    t0h = OD_DCT_RSHIFT(t0, 1); \
+    t7 = t0h - t7; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    t3 += (tc*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    tc -= (t3*15137 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    t3 += (tc*14341 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    t4 -= (tb*14341 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    tb += (t4*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    t4 -= (tb*4161 + 8192) >> 14; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    t8 += (t7*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    t7 -= (t8*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    t8 += (t7*13573 + 8192) >> 14; \
+    /* TODO: Can we move these into another operation */ \
+    t1 = -t1; \
+    t5 = -t5; \
+    t9 = -t9; \
+    tb = -tb; \
+    td = -td; \
+  } \
+  while (0)
+
+#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
+  te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
+  /* Embedded 32-point orthonormal Type-II fDCT. */ \
+  do { \
+    int tgh; \
+    int thh; \
+    int tih; \
+    int tkh; \
+    int tmh; \
+    int tnh; \
+    int toh; \
+    int tqh; \
+    int tsh; \
+    int tuh; \
+    int tvh; \
+    tv = t0 - tv; \
+    tvh = OD_DCT_RSHIFT(tv, 1); \
+    t0 -= tvh; \
+    tu += t1; \
+    tuh = OD_DCT_RSHIFT(tu, 1); \
+    t1 = tuh - t1; \
+    tt = t2 - tt; \
+    t2 -= OD_DCT_RSHIFT(tt, 1); \
+    ts += t3; \
+    tsh = OD_DCT_RSHIFT(ts, 1); \
+    t3 = tsh - t3; \
+    tr = t4 - tr; \
+    t4 -= OD_DCT_RSHIFT(tr, 1); \
+    tq += t5; \
+    tqh = OD_DCT_RSHIFT(tq, 1); \
+    t5 = tqh - t5; \
+    tp = t6 - tp; \
+    t6 -= OD_DCT_RSHIFT(tp, 1); \
+    to += t7; \
+    toh = OD_DCT_RSHIFT(to, 1); \
+    t7 = toh - t7; \
+    tn = t8 - tn; \
+    tnh = OD_DCT_RSHIFT(tn, 1); \
+    t8 -= tnh; \
+    tm += t9; \
+    tmh = OD_DCT_RSHIFT(tm, 1); \
+    t9 = tmh - t9; \
+    tl = ta - tl; \
+    ta -= OD_DCT_RSHIFT(tl, 1); \
+    tk += tb; \
+    tkh = OD_DCT_RSHIFT(tk, 1); \
+    tb = tkh - tb; \
+    tj = tc - tj; \
+    tc -= OD_DCT_RSHIFT(tj, 1); \
+    ti += td; \
+    tih = OD_DCT_RSHIFT(ti, 1); \
+    td = tih - td; \
+    th = te - th; \
+    thh = OD_DCT_RSHIFT(th, 1); \
+    te -= thh; \
+    tg += tf; \
+    tgh = OD_DCT_RSHIFT(tg, 1); \
+    tf = tgh - tf; \
+    OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
+     t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \
+    OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \
+     tt, td, tl, t5, tp, t9, th, thh, t1); \
+  } \
+  while (0)
+
+#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \
+  te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
+  /* Embedded 32-point orthonormal Type-II iDCT. */ \
+  do { \
+    int t1h; \
+    int t3h; \
+    int t5h; \
+    int t7h; \
+    int t9h; \
+    int tbh; \
+    int tdh; \
+    int tfh; \
+    int thh; \
+    int tth; \
+    int tvh; \
+    OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \
+     tu, tm, tq, ti, ts, tk, to, tg); \
+    OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \
+     t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \
+    tu = t1h - tu; \
+    t1 -= tu; \
+    te += thh; \
+    th = te - th; \
+    tm = t9h - tm; \
+    t9 -= tm; \
+    t6 += OD_DCT_RSHIFT(tp, 1); \
+    tp = t6 - tp; \
+    tq = t5h - tq; \
+    t5 -= tq; \
+    ta += OD_DCT_RSHIFT(tl, 1); \
+    tl = ta - tl; \
+    ti = tdh - ti; \
+    td -= ti; \
+    t2 += tth; \
+    tt = t2 - tt; \
+    ts = t3h - ts; \
+    t3 -= ts; \
+    tc += OD_DCT_RSHIFT(tj, 1); \
+    tj = tc - tj; \
+    tk = tbh - tk; \
+    tb -= tk; \
+    t4 += OD_DCT_RSHIFT(tr, 1); \
+    tr = t4 - tr; \
+    to = t7h - to; \
+    t7 -= to; \
+    t8 += OD_DCT_RSHIFT(tn, 1); \
+    tn = t8 - tn; \
+    tg = tfh - tg; \
+    tf -= tg; \
+    t0 += tvh; \
+    tv = t0 - tv; \
+  } \
+  while (0)
+
+#if CONFIG_TX64X64
+#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
+  t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
+  t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
+  t7, tn, tnh, tf, tv, tvh) \
+  /* Embedded 32-point asymmetric Type-II fDCT. */ \
+  do { \
+    t0 += tvh; \
+    tv = t0 - tv; \
+    t1 = tuh - t1; \
+    tu -= t1; \
+    t2 += tth; \
+    tt = t2 - tt; \
+    t3 = tsh - t3; \
+    ts -= t3; \
+    t4 += trh; \
+    tr = t4 - tr; \
+    t5 = tqh - t5; \
+    tq -= t5; \
+    t6 += tph; \
+    tp = t6 - tp; \
+    t7 = toh - t7; \
+    to -= t7; \
+    t8 += tnh; \
+    tn = t8 - tn; \
+    t9 = tmh - t9; \
+    tm -= t9; \
+    ta += tlh; \
+    tl = ta - tl; \
+    tb = tkh - tb; \
+    tk -= tb; \
+    tc += tjh; \
+    tj = tc - tj; \
+    td = tih - td; \
+    ti -= td; \
+    te += thh; \
+    th = te - th; \
+    tf = tgh - tf; \
+    tg -= tf; \
+    OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
+     t2, ti, ta, tq, t6, tm, te, tu); \
+    OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
+     tt, td, tl, t5, tp, t9, th, t1); \
+  } \
+  while (0)
+
+#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
+  t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
+  td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
+  tf, tfh, tv, tvh) \
+  /* Embedded 32-point asymmetric Type-II iDCT. */ \
+  do { \
+    OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
+     tu, tm, tq, ti, ts, tk, to, tg); \
+    OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
+     t1, t9, t5, td, t3, tb, t7, tf); \
+    tv = t0 - tv; \
+    tvh = OD_DCT_RSHIFT(tv, 1); \
+    t0 -= tvh; \
+    t1 += tu; \
+    t1h = OD_DCT_RSHIFT(t1, 1); \
+    tu = t1h - tu; \
+    tt = t2 - tt; \
+    tth = OD_DCT_RSHIFT(tt, 1); \
+    t2 -= tth; \
+    t3 += ts; \
+    t3h = OD_DCT_RSHIFT(t3, 1); \
+    ts = t3h - ts; \
+    tr = t4 - tr; \
+    trh = OD_DCT_RSHIFT(tr, 1); \
+    t4 -= trh; \
+    t5 += tq; \
+    t5h = OD_DCT_RSHIFT(t5, 1); \
+    tq = t5h - tq; \
+    tp = t6 - tp; \
+    tph = OD_DCT_RSHIFT(tp, 1); \
+    t6 -= tph; \
+    t7 += to; \
+    t7h = OD_DCT_RSHIFT(t7, 1); \
+    to = t7h - to; \
+    tn = t8 - tn; \
+    tnh = OD_DCT_RSHIFT(tn, 1); \
+    t8 -= tnh; \
+    t9 += tm; \
+    t9h = OD_DCT_RSHIFT(t9, 1); \
+    tm = t9h - tm; \
+    tl = ta - tl; \
+    tlh = OD_DCT_RSHIFT(tl, 1); \
+    ta -= tlh; \
+    tb += tk; \
+    tbh = OD_DCT_RSHIFT(tb, 1); \
+    tk = tbh - tk; \
+    tj = tc - tj; \
+    tjh = OD_DCT_RSHIFT(tj, 1); \
+    tc -= tjh; \
+    td += ti; \
+    tdh = OD_DCT_RSHIFT(td, 1); \
+    ti = tdh - ti; \
+    th = te - th; \
+    thh = OD_DCT_RSHIFT(th, 1); \
+    te -= thh; \
+    tf += tg; \
+    tfh = OD_DCT_RSHIFT(tf, 1); \
+    tg = tfh - tg; \
+  } \
+  while (0)
+
+#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
+  tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
+  /* Embedded 32-point asymmetric Type-IV fDST. */ \
+  do { \
+    int t0h; \
+    int t1h; \
+    int t4h; \
+    int t5h; \
+    int tqh; \
+    int trh; \
+    int tuh; \
+    int tvh; \
+    \
+    tu = -tu; \
+    \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
+    t5 -= (tq*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
+    tq += (t5*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
+    t5 -= (tq*13573 + 8192) >> 14; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
+    tp += (t6*29957 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
+    t6 -= (tp*11585 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
+    tp -= (t6*19195 + 16384) >> 15; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
+    tu += (t1*29957 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
+    t1 -= (tu*11585 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
+    tu -= (t1*19195 + 16384) >> 15; \
+    /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
+    tt += (t2*28681 + 16384) >> 15; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
+    t2 -= (tt*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
+    tt += (t2*4161 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
+    t3 += (ts*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
+    ts -= (t3*15137 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
+    t3 += (ts*14341 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
+    t9 -= (tm*19195 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
+    tm -= (t9*11585 + 8192) >> 14; \
+    /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
+    t9 += (tm*7489 + 4096) >> 13; \
+    /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
+    OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
+    ta += (tl*3259 + 4096) >> 13; \
+    /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
+    tl -= (ta*3135 + 8192) >> 14; \
+    /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
+    OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
+    ta += (tl*3259 + 4096) >> 13; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
+    tb += (tk*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
+    tk -= (tb*15137 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
+    tb += (tk*14341 + 8192) >> 14; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
+    th += (te*29957 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
+    te -= (th*11585 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
+    th -= (te*19195 + 16384) >> 15; \
+    /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
+    tj += (tc*28681 + 16384) >> 15; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
+    tc -= (tj*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
+    tj += (tc*4161 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
+    td += (ti*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
+    ti -= (td*15137 + 8192) >> 14; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
+    td += (ti*14341 + 8192) >> 14; \
+    \
+    t1 = -t1; \
+    t2 = -t2; \
+    t3 = -t3; \
+    td = -td; \
+    tg = -tg; \
+    to = -to; \
+    ts = -ts; \
+    \
+    tr -= OD_DCT_RSHIFT(t5, 1); \
+    t5 += tr; \
+    tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
+    t4 += tq; \
+    t6 -= OD_DCT_RSHIFT(t7, 1); \
+    t7 += t6; \
+    to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
+    tp += to; \
+    t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
+    t0 -= t1; \
+    tv -= OD_DCT_RSHIFT(tu, 1); \
+    tu += tv; \
+    t3 -= OD_DCT_RSHIFT(tt, 1); \
+    tt += t3; \
+    t2 += OD_DCT_RSHIFT(ts, 1); \
+    ts -= t2; \
+    t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
+    t8 += t9; \
+    tn += OD_DCT_RSHIFT(tm, 1); \
+    tm -= tn; \
+    tb += OD_DCT_RSHIFT(ta, 1); \
+    ta -= tb; \
+    tl -= OD_DCT_RSHIFT(tk, 1); \
+    tk += tl; \
+    te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
+    tf += te; \
+    tg -= OD_DCT_RSHIFT(th, 1); \
+    th += tg; \
+    tc -= OD_DCT_RSHIFT(ti, 1); \
+    ti += tc; \
+    td += OD_DCT_RSHIFT(tj, 1); \
+    tj -= td; \
+    \
+    t4 = -t4; \
+    \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
+    OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
+    t4 += (tr*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
+    tr -= (t4*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
+    OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
+    t4 += (tr*6723 + 4096) >> 13; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
+    t5 += (tq*17515 + 16384) >> 15; \
+    /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
+    tq -= (t5*13623 + 8192) >> 14; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
+    t5 += (tq*17515 + 16384) >> 15; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
+    t7 += (to*3227 + 16384) >> 15; \
+    /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
+    to -= (t7*6393 + 16384) >> 15; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
+    t7 += (to*3227 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
+    t6 += (tp*2485 + 4096) >> 13; \
+    /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
+    tp -= (t6*18205 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
+    t6 += (tp*2485 + 4096) >> 13; \
+    \
+    t5 = -t5; \
+    \
+    tr += to; \
+    trh = OD_DCT_RSHIFT(tr, 1); \
+    to -= trh; \
+    t4 += t7; \
+    t4h = OD_DCT_RSHIFT(t4, 1); \
+    t7 -= t4h; \
+    t5 += tp; \
+    t5h = OD_DCT_RSHIFT(t5, 1); \
+    tp -= t5h; \
+    tq += t6; \
+    tqh = OD_DCT_RSHIFT(tq, 1); \
+    t6 -= tqh; \
+    t0 -= t3; \
+    t0h = OD_DCT_RSHIFT(t0, 1); \
+    t3 += t0h; \
+    tv -= ts; \
+    tvh = OD_DCT_RSHIFT(tv, 1); \
+    ts += tvh; \
+    tu += tt; \
+    tuh = OD_DCT_RSHIFT(tu, 1); \
+    tt -= tuh; \
+    t1 -= t2; \
+    t1h = OD_DCT_RSHIFT(t1, 1); \
+    t2 += t1h; \
+    t8 += tb; \
+    tb -= OD_DCT_RSHIFT(t8, 1); \
+    tn += tk; \
+    tk -= OD_DCT_RSHIFT(tn, 1); \
+    t9 += tl; \
+    tl -= OD_DCT_RSHIFT(t9, 1); \
+    tm -= ta; \
+    ta += OD_DCT_RSHIFT(tm, 1); \
+    tc -= tf; \
+    tf += OD_DCT_RSHIFT(tc, 1); \
+    tj += tg; \
+    tg -= OD_DCT_RSHIFT(tj, 1); \
+    td -= te; \
+    te += OD_DCT_RSHIFT(td, 1); \
+    ti += th; \
+    th -= OD_DCT_RSHIFT(ti, 1); \
+    \
+    t9 = -t9; \
+    tl = -tl; \
+    \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
+    t8 += (tn*805 + 8192) >> 14; \
+    /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
+    tn -= (t8*803 + 4096) >> 13; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
+    t8 += (tn*805 + 8192) >> 14; \
+    /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
+    tk += (tb*11725 + 16384) >> 15; \
+    /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
+    OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
+    tb -= (tk*5197 + 4096) >> 13; \
+    /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
+    tk += (tb*11725 + 16384) >> 15; \
+    /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
+    OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
+    ta += (tl*2455 + 2048) >> 12; \
+    /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
+    tl -= (ta*14449 + 8192) >> 14; \
+    /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
+    OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
+    ta += (tl*2455 + 2048) >> 12; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
+    t9 += (tm*4861 + 16384) >> 15; \
+    /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
+    tm -= (t9*1189 + 2048) >> 12; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
+    t9 += (tm*4861 + 16384) >> 15; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
+    tf += (tg*805 + 8192) >> 14; \
+    /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
+    OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
+    tg -= (tf*803 + 4096) >> 13; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
+    tf += (tg*805 + 8192) >> 14; \
+    /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
+    tc += (tj*2931 + 4096) >> 13; \
+    /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
+    tj -= (tc*5197 + 4096) >> 13; \
+    /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
+    tc += (tj*2931 + 4096) >> 13; \
+    /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
+    OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
+    td += (ti*513 + 1024) >> 11; \
+    /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
+    OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
+    ti -= (td*7723 + 8192) >> 14; \
+    /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
+    OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
+    td += (ti*513 + 1024) >> 11; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
+    te += (th*4861 + 16384) >> 15; \
+    /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
+    OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
+    th -= (te*1189 + 2048) >> 12; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
+    te += (th*4861 + 16384) >> 15; \
+    \
+    ta = -ta; \
+    tb = -tb; \
+    \
+    tt += t5h; \
+    t5 -= tt; \
+    t2 -= tqh; \
+    tq += t2; \
+    tp += t1h; \
+    t1 -= tp; \
+    t6 -= tuh; \
+    tu += t6; \
+    t7 += tvh; \
+    tv -= t7; \
+    to += t0h; \
+    t0 -= to; \
+    t3 -= t4h; \
+    t4 += t3; \
+    ts += trh; \
+    tr -= ts; \
+    tf -= OD_DCT_RSHIFT(tn, 1); \
+    tn += tf; \
+    tg -= OD_DCT_RSHIFT(t8, 1); \
+    t8 += tg; \
+    tk += OD_DCT_RSHIFT(tc, 1); \
+    tc -= tk; \
+    tb += OD_DCT_RSHIFT(tj, 1); \
+    tj -= tb; \
+    ta += OD_DCT_RSHIFT(ti, 1); \
+    ti -= ta; \
+    tl += OD_DCT_RSHIFT(td, 1); \
+    td -= tl; \
+    te -= OD_DCT_RSHIFT(tm, 1); \
+    tm += te; \
+    th -= OD_DCT_RSHIFT(t9, 1); \
+    t9 += th; \
+    ta -= t5; \
+    t5 += OD_DCT_RSHIFT(ta, 1); \
+    tq -= tl; \
+    tl += OD_DCT_RSHIFT(tq, 1); \
+    t2 -= ti; \
+    ti += OD_DCT_RSHIFT(t2, 1); \
+    td -= tt; \
+    tt += OD_DCT_RSHIFT(td, 1); \
+    tm += tp; \
+    tp -= OD_DCT_RSHIFT(tm, 1); \
+    t6 += t9; \
+    t9 -= OD_DCT_RSHIFT(t6, 1); \
+    te -= tu; \
+    tu += OD_DCT_RSHIFT(te, 1); \
+    t1 -= th; \
+    th += OD_DCT_RSHIFT(t1, 1); \
+    t0 -= tg; \
+    tg += OD_DCT_RSHIFT(t0, 1); \
+    tf += tv; \
+    tv -= OD_DCT_RSHIFT(tf, 1); \
+    t8 -= t7; \
+    t7 += OD_DCT_RSHIFT(t8, 1); \
+    to -= tn; \
+    tn += OD_DCT_RSHIFT(to, 1); \
+    t4 -= tk; \
+    tk += OD_DCT_RSHIFT(t4, 1); \
+    tb -= tr; \
+    tr += OD_DCT_RSHIFT(tb, 1); \
+    t3 -= tj; \
+    tj += OD_DCT_RSHIFT(t3, 1); \
+    tc -= ts; \
+    ts += OD_DCT_RSHIFT(tc, 1); \
+    \
+    tr = -tr; \
+    ts = -ts; \
+    tt = -tt; \
+    tu = -tu; \
+    \
+    /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
+    tv += (t0*2847 + 2048) >> 12; \
+    /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */  \
+    OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
+    t0 -= (tv*5791 + 2048) >> 12; \
+    /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
+    tv += (t0*5593 + 4096) >> 13; \
+    /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
+    tg -= (tf*4099 + 4096) >> 13; \
+    /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
+    tf += (tg*1997 + 1024) >> 11; \
+    /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
+    tg += (tf*815 + 16384) >> 15; \
+    /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
+    tn -= (t8*2527 + 2048) >> 12; \
+    /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
+    t8 += (tn*4695 + 4096) >> 13; \
+    /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
+    tn += (t8*4187 + 4096) >> 13; \
+    /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
+    t7 += (to*5477 + 4096) >> 13; \
+    /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
+    to -= (t7*4169 + 4096) >> 13; \
+    /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
+    t7 -= (to*2571 + 2048) >> 12; \
+    /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
+    tt += (t2*5331 + 4096) >> 13; \
+    /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
+    t2 -= (tt*5749 + 2048) >> 12; \
+    /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
+    tt += (t2*2413 + 2048) >> 12; \
+    /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
+    ti -= (td*4167 + 4096) >> 13; \
+    /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
+    td += (ti*891 + 512) >> 10; \
+    /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
+    ti += (td*4327 + 16384) >> 15; \
+    /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
+    tl -= (ta*2261 + 2048) >> 12; \
+    /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
+    ta += (tl*2855 + 2048) >> 12; \
+    /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
+    tl += (ta*5417 + 8192) >> 14; \
+    /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
+    t5 += (tq*3459 + 2048) >> 12; \
+    /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
+    tq -= (t5*1545 + 2048) >> 12; \
+    /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
+    t5 -= (tq*1971 + 1024) >> 11; \
+    /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
+    ts += (t3*323 + 256) >> 9; \
+    /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
+    t3 -= (ts*5707 + 2048) >> 12; \
+    /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
+    ts += (t3*2229 + 2048) >> 12; \
+    /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
+    tj -= (tc*1061 + 1024) >> 11; \
+    /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
+    tc += (tj*6671 + 4096) >> 13; \
+    /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
+    tj += (tc*6287 + 16384) >> 15; \
+    /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
+    tk -= (tb*4359 + 4096) >> 13; \
+    /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
+    tb += (tk*3099 + 2048) >> 12; \
+    /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \
+    tk += (tb*2109 + 4096) >> 13; \
+    /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \
+    tr += (t4*5017 + 4096) >> 13; \
+    /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \
+    t4 -= (tr*1413 + 512) >> 10; \
+    /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \
+    tr += (t4*8195 + 8192) >> 14; \
+    /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \
+    t9 += (tm*2373 + 2048) >> 12; \
+    /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \
+    tm -= (t9*5209 + 4096) >> 13; \
+    /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \
+    t9 -= (tm*3391 + 4096) >> 13; \
+    /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \
+    tp -= (t6*1517 + 1024) >> 11; \
+    /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \
+    t6 += (tp*1817 + 2048) >> 12; \
+    /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \
+    tp += (t6*6331 + 4096) >> 13; \
+    /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \
+    th -= (te*515 + 512) >> 10; \
+    /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \
+    te += (th*7567 + 4096) >> 13; \
+    /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \
+    th += (te*2513 + 16384) >> 15; \
+    /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \
+    tu += (t1*2753 + 2048) >> 12; \
+    /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \
+    t1 -= (tu*5777 + 2048) >> 12; \
+    /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
+    OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \
+    tu += (t1*1301 + 1024) >> 11; \
+  } \
+  while (0)
+
+#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
+  tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
+  /* Embedded 32-point asymmetric Type-IV iDST. */ \
+  do { \
+    int t0h; \
+    int t4h; \
+    int tbh; \
+    int tfh; \
+    int tgh; \
+    int tkh; \
+    int trh; \
+    int tvh; \
+    /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \
+    tf -= (tg*1301 + 1024) >> 11; \
+    /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \
+    tg += (tf*5777 + 2048) >> 12; \
+    /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \
+    tf -= (tg*2753 + 2048) >> 12; \
+    /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \
+    th -= (te*2513 + 16384) >> 15; \
+    /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \
+    te -= (th*7567 + 4096) >> 13; \
+    /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \
+    th += (te*515 + 512) >> 10; \
+    /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \
+    tj -= (tc*6331 + 4096) >> 13; \
+    /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \
+    tc -= (tj*1817 + 2048) >> 12; \
+    /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \
+    tj += (tc*1517 + 1024) >> 11; \
+    /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \
+    ti += (td*3391 + 4096) >> 13; \
+    /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \
+    td += (ti*5209 + 4096) >> 13; \
+    /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \
+    ti -= (td*2373 + 2048) >> 12; \
+    /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \
+    tr -= (t4*8195 + 8192) >> 14; \
+    /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \
+    t4 += (tr*1413 + 512) >> 10; \
+    /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \
+    tr -= (t4*5017 + 4096) >> 13; \
+    /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \
+    t5 -= (tq*2109 + 4096) >> 13; \
+    /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
+    tq -= (t5*3099 + 2048) >> 12; \
+    /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
+    t5 += (tq*4359 + 4096) >> 13; \
+    /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
+    tp -= (t6*6287 + 16384) >> 15; \
+    /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
+    t6 -= (tp*6671 + 4096) >> 13; \
+    /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
+    tp += (t6*1061 + 1024) >> 11; \
+    /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
+    t7 -= (to*2229 + 2048) >> 12; \
+    /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
+    to += (t7*5707 + 2048) >> 12; \
+    /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
+    t7 -= (to*323 + 256) >> 9; \
+    /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
+    tk += (tb*1971 + 1024) >> 11; \
+    /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
+    tb += (tk*1545 + 2048) >> 12; \
+    /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
+    tk -= (tb*3459 + 2048) >> 12; \
+    /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
+    tl -= (ta*5417 + 8192) >> 14; \
+    /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
+    ta -= (tl*2855 + 2048) >> 12; \
+    /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
+    tl += (ta*2261 + 2048) >> 12; \
+    /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
+    t9 -= (tm*4327 + 16384) >> 15; \
+    /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
+    tm -= (t9*891 + 512) >> 10; \
+    /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
+    t9 += (tm*4167 + 4096) >> 13; \
+    /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
+    tn -= (t8*2413 + 2048) >> 12; \
+    /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
+    t8 += (tn*5749 + 2048) >> 12; \
+    /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
+    tn -= (t8*5331 + 4096) >> 13; \
+    /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
+    ts += (t3*2571 + 2048) >> 12; \
+    /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
+    t3 += (ts*4169 + 4096) >> 13; \
+    /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
+    ts -= (t3*5477 + 4096) >> 13; \
+    /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
+    tt -= (t2*4187 + 4096) >> 13; \
+    /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
+    t2 -= (tt*4695 + 4096) >> 13; \
+    /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
+    tt += (t2*2527 + 2048) >> 12; \
+    /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
+    t1 -= (tu*815 + 16384) >> 15; \
+    /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
+    tu -= (t1*1997 + 1024) >> 11; \
+    /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
+    t1 += (tu*4099 + 4096) >> 13; \
+    /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
+    tv -= (t0*5593 + 4096) >> 13; \
+    /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
+    t0 += (tv*5791 + 2048) >> 12; \
+    /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
+    tv -= (t0*2847 + 2048) >> 12; \
+    \
+    t7 = -t7; \
+    tf = -tf; \
+    tn = -tn; \
+    tr = -tr; \
+    \
+    t7 -= OD_DCT_RSHIFT(t6, 1); \
+    t6 += t7; \
+    tp -= OD_DCT_RSHIFT(to, 1); \
+    to += tp; \
+    tr -= OD_DCT_RSHIFT(tq, 1); \
+    tq += tr; \
+    t5 -= OD_DCT_RSHIFT(t4, 1); \
+    t4 += t5; \
+    tt -= OD_DCT_RSHIFT(t3, 1); \
+    t3 += tt; \
+    ts -= OD_DCT_RSHIFT(t2, 1); \
+    t2 += ts; \
+    tv += OD_DCT_RSHIFT(tu, 1); \
+    tu -= tv; \
+    t1 -= OD_DCT_RSHIFT(t0, 1); \
+    t0 += t1; \
+    th -= OD_DCT_RSHIFT(tg, 1); \
+    tg += th; \
+    tf -= OD_DCT_RSHIFT(te, 1); \
+    te += tf; \
+    ti += OD_DCT_RSHIFT(tc, 1); \
+    tc -= ti; \
+    tj += OD_DCT_RSHIFT(td, 1); \
+    td -= tj; \
+    tn -= OD_DCT_RSHIFT(tm, 1); \
+    tm += tn; \
+    t9 -= OD_DCT_RSHIFT(t8, 1); \
+    t8 += t9; \
+    tl -= OD_DCT_RSHIFT(tb, 1); \
+    tb += tl; \
+    tk -= OD_DCT_RSHIFT(ta, 1); \
+    ta += tk; \
+    \
+    ti -= th; \
+    th += OD_DCT_RSHIFT(ti, 1); \
+    td -= te; \
+    te += OD_DCT_RSHIFT(td, 1); \
+    tm += tl; \
+    tl -= OD_DCT_RSHIFT(tm, 1); \
+    t9 += ta; \
+    ta -= OD_DCT_RSHIFT(t9, 1); \
+    tp += tq; \
+    tq -= OD_DCT_RSHIFT(tp, 1); \
+    t6 += t5; \
+    t5 -= OD_DCT_RSHIFT(t6, 1); \
+    t2 -= t1; \
+    t1 += OD_DCT_RSHIFT(t2, 1); \
+    tt -= tu; \
+    tu += OD_DCT_RSHIFT(tt, 1); \
+    tr += t7; \
+    trh = OD_DCT_RSHIFT(tr, 1); \
+    t7 -= trh; \
+    t4 -= to; \
+    t4h = OD_DCT_RSHIFT(t4, 1); \
+    to += t4h; \
+    t0 += t3; \
+    t0h = OD_DCT_RSHIFT(t0, 1); \
+    t3 -= t0h; \
+    tv += ts; \
+    tvh = OD_DCT_RSHIFT(tv, 1); \
+    ts -= tvh; \
+    tf -= tc; \
+    tfh = OD_DCT_RSHIFT(tf, 1); \
+    tc += tfh; \
+    tg += tj; \
+    tgh = OD_DCT_RSHIFT(tg, 1); \
+    tj -= tgh; \
+    tb -= t8; \
+    tbh = OD_DCT_RSHIFT(tb, 1); \
+    t8 += tbh; \
+    tk += tn; \
+    tkh = OD_DCT_RSHIFT(tk, 1); \
+    tn -= tkh; \
+    \
+    ta = -ta; \
+    tq = -tq; \
+    \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    te -= (th*4861 + 16384) >> 15; \
+    /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
+    th += (te*1189 + 2048) >> 12; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    te -= (th*4861 + 16384) >> 15; \
+    /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
+    tm -= (t9*513 + 1024) >> 11; \
+    /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
+    t9 += (tm*7723 + 8192) >> 14; \
+    /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
+    tm -= (t9*513 + 1024) >> 11; \
+    /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    t6 -= (tp*2931 + 4096) >> 13; \
+    /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
+    tp += (t6*5197 + 4096) >> 13; \
+    /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    t6 -= (tp*2931 + 4096) >> 13; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    tu -= (t1*805 + 8192) >> 14; \
+    /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
+    t1 += (tu*803 + 4096) >> 13; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    tu -= (t1*805 + 8192) >> 14; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    ti -= (td*4861 + 16384) >> 15; \
+    /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
+    td += (ti*1189 + 2048) >> 12; \
+    /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
+    ti -= (td*4861 + 16384) >> 15; \
+    /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
+    ta -= (tl*2455 + 2048) >> 12; \
+    /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
+    tl += (ta*14449 + 8192) >> 14; \
+    /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
+    ta -= (tl*2455 + 2048) >> 12; \
+    /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    t5 -= (tq*11725 + 16384) >> 15; \
+    /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
+    tq += (t5*5197 + 4096) >> 13; \
+    /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
+    t5 -= (tq*11725 + 16384) >> 15; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    t2 -= (tt*805 + 8192) >> 14; \
+    /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
+    tt += (t2*803 + 4096) >> 13; \
+    /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
+    t2 -= (tt*805 + 8192) >> 14; \
+    \
+    tl = -tl; \
+    ti = -ti; \
+    \
+    th += OD_DCT_RSHIFT(t9, 1); \
+    t9 -= th; \
+    te -= OD_DCT_RSHIFT(tm, 1); \
+    tm += te; \
+    t1 += OD_DCT_RSHIFT(tp, 1); \
+    tp -= t1; \
+    tu -= OD_DCT_RSHIFT(t6, 1); \
+    t6 += tu; \
+    ta -= OD_DCT_RSHIFT(td, 1); \
+    td += ta; \
+    tl += OD_DCT_RSHIFT(ti, 1); \
+    ti -= tl; \
+    t5 += OD_DCT_RSHIFT(tt, 1); \
+    tt -= t5; \
+    tq += OD_DCT_RSHIFT(t2, 1); \
+    t2 -= tq; \
+    \
+    t8 -= tgh; \
+    tg += t8; \
+    tn += tfh; \
+    tf -= tn; \
+    t7 -= tvh; \
+    tv += t7; \
+    to -= t0h; \
+    t0 += to; \
+    tc += tbh; \
+    tb -= tc; \
+    tj += tkh; \
+    tk -= tj; \
+    ts += t4h; \
+    t4 -= ts; \
+    t3 += trh; \
+    tr -= t3; \
+    \
+    tk = -tk; \
+    \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    tc -= (tj*2485 + 4096) >> 13; \
+    /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
+    tj += (tc*18205 + 16384) >> 15; \
+    /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
+    tc -= (tj*2485 + 4096) >> 13; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    ts -= (t3*3227 + 16384) >> 15; \
+    /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
+    t3 += (ts*6393 + 16384) >> 15; \
+    /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
+    ts -= (t3*3227 + 16384) >> 15; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
+    tk -= (tb*17515 + 16384) >> 15; \
+    /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
+    tb += (tk*13623 + 8192) >> 14; \
+    /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
+    tk -= (tb*17515 + 16384) >> 15; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
+    t4 -= (tr*6723 + 4096) >> 13; \
+    /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
+    tr += (t4*16069 + 8192) >> 14; \
+    /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
+    t4 -= (tr*6723 + 4096) >> 13; \
+    \
+    t4 = -t4; \
+    \
+    tp += tm; \
+    tm -= OD_DCT_RSHIFT(tp, 1); \
+    t9 -= t6; \
+    t6 += OD_DCT_RSHIFT(t9, 1); \
+    th -= t1; \
+    t1 += OD_DCT_RSHIFT(th, 1); \
+    tu -= te; \
+    te += OD_DCT_RSHIFT(tu, 1); /* pass */ \
+    t5 -= tl; \
+    tl += OD_DCT_RSHIFT(t5, 1); \
+    ta += tq; \
+    tq -= OD_DCT_RSHIFT(ta, 1); \
+    td += tt; \
+    tt -= OD_DCT_RSHIFT(td, 1); \
+    t2 -= ti; \
+    ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \
+    t7 += t8; \
+    t8 -= OD_DCT_RSHIFT(t7, 1); \
+    tn -= to; \
+    to += OD_DCT_RSHIFT(tn, 1); \
+    tf -= tv; \
+    tv += OD_DCT_RSHIFT(tf, 1); \
+    t0 += tg; \
+    tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \
+    tj -= t3; \
+    t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \
+    ts -= tc; \
+    tc += OD_DCT_RSHIFT(ts, 1); \
+    t4 -= tb; \
+    tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \
+    tk -= tr; \
+    tr += OD_DCT_RSHIFT(tk, 1); \
+    \
+    t1 = -t1; \
+    t3 = -t3; \
+    t7 = -t7; \
+    t8 = -t8; \
+    tg = -tg; \
+    tm = -tm; \
+    to = -to; \
+    \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    tm -= (t9*14341 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t9 += (tm*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    tm -= (t9*4161 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    tp -= (t6*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t6 += (tp*15137 + 8192) >> 14; \
+    /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    tp -= (t6*28681 + 16384) >> 15; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    th += (te*19195 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    te += (th*11585 + 8192) >> 14; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    th -= (te*29957 + 16384) >> 15; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    tq -= (t5*14341 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t5 += (tq*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    tq -= (t5*4161 + 8192) >> 14; \
+    /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
+    ta -= (tl*3259 + 4096) >> 13; \
+    /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
+    tl += (ta*3135 + 8192) >> 14; \
+    /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
+    ta -= (tl*3259 + 4096) >> 13; \
+    /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    ti -= (td*7489 + 4096) >> 13; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    td += (ti*11585 + 8192) >> 14; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    ti += (td*19195 + 16384) >> 15; \
+    /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    to -= (t7*14341 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t7 += (to*15137 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    to -= (t7*4161 + 8192) >> 14; \
+    /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
+    tn -= (t8*4161 + 8192) >> 14; \
+    /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
+    t8 += (tn*15137 + 8192) >> 14; \
+    /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
+    tn -= (t8*28681 + 16384) >> 15; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    tf += (tg*19195 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    tg += (tf*11585 + 8192) >> 14; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    tf -= (tg*29957 + 16384) >> 15; \
+    /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
+    tj += (tc*19195 + 16384) >> 15; \
+    /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
+    tc += (tj*11585 + 8192) >> 14; \
+    /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
+    tj -= (tc*29957 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    tk += (tb*13573 + 8192) >> 14; \
+    /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
+    tb -= (tk*11585 + 16384) >> 15; \
+    /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
+    tk += (tb*13573 + 8192) >> 14; \
+    \
+    tf = -tf; \
+    \
+  } \
+  while (0)
+
+#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
+  us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
+  ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
+  ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
+  /* Embedded 64-point orthonormal Type-II fDCT. */ \
+  do { \
+    int uwh; \
+    int uxh; \
+    int uyh; \
+    int uzh; \
+    int uAh; \
+    int uBh; \
+    int uCh; \
+    int uDh; \
+    int uEh; \
+    int uFh; \
+    int uGh; \
+    int uHh; \
+    int uIh; \
+    int uJh; \
+    int uKh; \
+    int uLh; \
+    int uMh; \
+    int uNh; \
+    int uOh; \
+    int uPh; \
+    int uQh; \
+    int uRh; \
+    int uSh; \
+    int uTh; \
+    int uUh; \
+    int uVh; \
+    int uWh; \
+    int uXh; \
+    int uYh; \
+    int uZh; \
+    int u_h; \
+    int uh_; \
+    u = u0 - u; \
+    uh_ = OD_DCT_RSHIFT(u, 1); \
+    u0 -= uh_; \
+    u_ += u1; \
+    u_h = OD_DCT_RSHIFT(u_, 1); \
+    u1 = u_h - u1; \
+    uZ = u2 - uZ; \
+    uZh = OD_DCT_RSHIFT(uZ, 1); \
+    u2 -= uZh; \
+    uY += u3; \
+    uYh = OD_DCT_RSHIFT(uY, 1); \
+    u3 = uYh - u3; \
+    uX = u4 - uX; \
+    uXh = OD_DCT_RSHIFT(uX, 1); \
+    u4 -= uXh; \
+    uW += u5; \
+    uWh = OD_DCT_RSHIFT(uW, 1); \
+    u5 = uWh - u5; \
+    uV = u6 - uV; \
+    uVh = OD_DCT_RSHIFT(uV, 1); \
+    u6 -= uVh; \
+    uU += u7; \
+    uUh = OD_DCT_RSHIFT(uU, 1); \
+    u7 = uUh - u7; \
+    uT = u8 - uT; \
+    uTh = OD_DCT_RSHIFT(uT, 1); \
+    u8 -= uTh; \
+    uS += u9; \
+    uSh = OD_DCT_RSHIFT(uS, 1); \
+    u9 = uSh - u9; \
+    uR = ua - uR; \
+    uRh = OD_DCT_RSHIFT(uR, 1); \
+    ua -= uRh; \
+    uQ += ub; \
+    uQh = OD_DCT_RSHIFT(uQ, 1); \
+    ub = uQh - ub; \
+    uP = uc - uP; \
+    uPh = OD_DCT_RSHIFT(uP, 1); \
+    uc -= uPh; \
+    uO += ud; \
+    uOh = OD_DCT_RSHIFT(uO, 1); \
+    ud = uOh - ud; \
+    uN = ue - uN; \
+    uNh = OD_DCT_RSHIFT(uN, 1); \
+    ue -= uNh; \
+    uM += uf; \
+    uMh = OD_DCT_RSHIFT(uM, 1); \
+    uf = uMh - uf; \
+    uL = ug - uL; \
+    uLh = OD_DCT_RSHIFT(uL, 1); \
+    ug -= uLh; \
+    uK += uh; \
+    uKh = OD_DCT_RSHIFT(uK, 1); \
+    uh = uKh - uh; \
+    uJ = ui - uJ; \
+    uJh = OD_DCT_RSHIFT(uJ, 1); \
+    ui -= uJh; \
+    uI += uj; \
+    uIh = OD_DCT_RSHIFT(uI, 1); \
+    uj = uIh - uj; \
+    uH = uk - uH; \
+    uHh = OD_DCT_RSHIFT(uH, 1); \
+    uk -= uHh; \
+    uG += ul; \
+    uGh = OD_DCT_RSHIFT(uG, 1); \
+    ul = uGh - ul; \
+    uF = um - uF; \
+    uFh = OD_DCT_RSHIFT(uF, 1); \
+    um -= uFh; \
+    uE += un; \
+    uEh = OD_DCT_RSHIFT(uE, 1); \
+    un = uEh - un; \
+    uD = uo - uD; \
+    uDh = OD_DCT_RSHIFT(uD, 1); \
+    uo -= uDh; \
+    uC += up; \
+    uCh = OD_DCT_RSHIFT(uC, 1); \
+    up = uCh - up; \
+    uB = uq - uB; \
+    uBh = OD_DCT_RSHIFT(uB, 1); \
+    uq -= uBh; \
+    uA += ur; \
+    uAh = OD_DCT_RSHIFT(uA, 1); \
+    ur = uAh - ur; \
+    uz = us - uz; \
+    uzh = OD_DCT_RSHIFT(uz, 1); \
+    us -= uzh; \
+    uy += ut; \
+    uyh = OD_DCT_RSHIFT(uy, 1); \
+    ut = uyh - ut; \
+    ux = uu - ux; \
+    uxh = OD_DCT_RSHIFT(ux, 1); \
+    uu -= uxh; \
+    uw += uv; \
+    uwh = OD_DCT_RSHIFT(uw, 1); \
+    uv = uwh - uv; \
+    OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \
+      u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \
+      ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \
+      ue, uK, uKh, uu, u_, u_h); \
+    OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \
+      uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \
+  } \
+  while (0)
+
+#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \
+  us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \
+  ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \
+  ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \
+  /* Embedded 64-point orthonormal Type-II fDCT. */ \
+  do { \
+    int u1h; \
+    int u3h; \
+    int u5h; \
+    int u7h; \
+    int u9h; \
+    int ubh; \
+    int udh; \
+    int ufh; \
+    int uhh; \
+    int ujh; \
+    int ulh; \
+    int unh; \
+    int uph; \
+    int urh; \
+    int uth; \
+    int uvh; \
+    int uxh; \
+    int uzh; \
+    int uBh; \
+    int uDh; \
+    int uFh; \
+    int uHh; \
+    int uJh; \
+    int uLh; \
+    int uNh; \
+    int uPh; \
+    int uRh; \
+    int uTh; \
+    int uVh; \
+    int uXh; \
+    int uZh; \
+    int uh_; \
+    OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \
+      uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \
+    OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \
+      ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \
+      ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \
+      uv, uvh); \
+    uh_ = OD_DCT_RSHIFT(u, 1); \
+    u0 += uh_; \
+    u = u0 - u; \
+    u_ = u1h - u_; \
+    u1 -= u_; \
+    uZh = OD_DCT_RSHIFT(uZ, 1); \
+    u2 += uZh; \
+    uZ = u2 - uZ; \
+    uY = u3h - uY; \
+    u3 -= uY; \
+    uXh = OD_DCT_RSHIFT(uX, 1); \
+    u4 += uXh; \
+    uX = u4 - uX; \
+    uW = u5h - uW; \
+    u5 -= uW; \
+    uVh = OD_DCT_RSHIFT(uV, 1); \
+    u6 += uVh; \
+    uV = u6 - uV; \
+    uU = u7h - uU; \
+    u7 -= uU; \
+    uTh = OD_DCT_RSHIFT(uT, 1); \
+    u8 += uTh; \
+    uT = u8 - uT; \
+    uS = u9h - uS; \
+    u9 -= uS; \
+    uRh = OD_DCT_RSHIFT(uR, 1); \
+    ua += uRh; \
+    uR = ua - uR; \
+    uQ = ubh - uQ; \
+    ub -= uQ; \
+    uPh = OD_DCT_RSHIFT(uP, 1); \
+    uc += uPh; \
+    uP = uc - uP; \
+    uO = udh - uO; \
+    ud -= uO; \
+    uNh = OD_DCT_RSHIFT(uN, 1); \
+    ue += uNh; \
+    uN = ue - uN; \
+    uM = ufh - uM; \
+    uf -= uM; \
+    uLh = OD_DCT_RSHIFT(uL, 1); \
+    ug += uLh; \
+    uL = ug - uL; \
+    uK = uhh - uK; \
+    uh -= uK; \
+    uJh = OD_DCT_RSHIFT(uJ, 1); \
+    ui += uJh; \
+    uJ = ui - uJ; \
+    uI = ujh - uI; \
+    uj -= uI; \
+    uHh = OD_DCT_RSHIFT(uH, 1); \
+    uk += uHh; \
+    uH = uk - uH; \
+    uG = ulh - uG; \
+    ul -= uG; \
+    uFh = OD_DCT_RSHIFT(uF, 1); \
+    um += uFh; \
+    uF = um - uF; \
+    uE = unh - uE; \
+    un -= uE; \
+    uDh = OD_DCT_RSHIFT(uD, 1); \
+    uo += uDh; \
+    uD = uo - uD; \
+    uC = uph - uC; \
+    up -= uC; \
+    uBh = OD_DCT_RSHIFT(uB, 1); \
+    uq += uBh; \
+    uB = uq - uB; \
+    uA = urh - uA; \
+    ur -= uA; \
+    uzh = OD_DCT_RSHIFT(uz, 1); \
+    us += uzh; \
+    uz = us - uz; \
+    uy = uth - uy; \
+    ut -= uy; \
+    uxh = OD_DCT_RSHIFT(ux, 1); \
+    uu += uxh; \
+    ux = uu - ux; \
+    uw = uvh - uw; \
+    uv -= uw; \
+  } while (0)
+#endif
+
 void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) {
   int q0;
   int q1;
@@ -478,6 +3150,38 @@ void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) {
   x[3*xstride] = q3;
 }
 
+void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) {
+  int q0;
+  int q1;
+  int q2;
+  int q3;
+  q0 = x[3*xstride];
+  q2 = x[2*xstride];
+  q1 = x[1*xstride];
+  q3 = x[0*xstride];
+  OD_FDST_4(q0, q2, q1, q3);
+  y[0] = (od_coeff)q3;
+  y[1] = (od_coeff)q2;
+  y[2] = (od_coeff)q1;
+  y[3] = (od_coeff)q0;
+}
+
+void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) {
+  int q0;
+  int q1;
+  int q2;
+  int q3;
+  q0 = y[3];
+  q2 = y[2];
+  q1 = y[1];
+  q3 = y[0];
+  OD_IDST_4(q0, q2, q1, q3);
+  x[0*xstride] = q3;
+  x[1*xstride] = q2;
+  x[2*xstride] = q1;
+  x[3*xstride] = q0;
+}
+
 void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) {
   int r0;
   int r1;
@@ -589,3 +3293,1039 @@ void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) {
   x[6*xstride] = (od_coeff)r6;
   x[7*xstride] = (od_coeff)r7;
 }
+
+void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) {
+  int s0;
+  int s1;
+  int s2;
+  int s3;
+  int s4;
+  int s5;
+  int s6;
+  int s7;
+  int s8;
+  int s9;
+  int sa;
+  int sb;
+  int sc;
+  int sd;
+  int se;
+  int sf;
+  s0 = x[0*xstride];
+  s8 = x[1*xstride];
+  s4 = x[2*xstride];
+  sc = x[3*xstride];
+  s2 = x[4*xstride];
+  sa = x[5*xstride];
+  s6 = x[6*xstride];
+  se = x[7*xstride];
+  s1 = x[8*xstride];
+  s9 = x[9*xstride];
+  s5 = x[10*xstride];
+  sd = x[11*xstride];
+  s3 = x[12*xstride];
+  sb = x[13*xstride];
+  s7 = x[14*xstride];
+  sf = x[15*xstride];
+  OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
+  y[0] = (od_coeff)s0;
+  y[1] = (od_coeff)s1;
+  y[2] = (od_coeff)s2;
+  y[3] = (od_coeff)s3;
+  y[4] = (od_coeff)s4;
+  y[5] = (od_coeff)s5;
+  y[6] = (od_coeff)s6;
+  y[7] = (od_coeff)s7;
+  y[8] = (od_coeff)s8;
+  y[9] = (od_coeff)s9;
+  y[10] = (od_coeff)sa;
+  y[11] = (od_coeff)sb;
+  y[12] = (od_coeff)sc;
+  y[13] = (od_coeff)sd;
+  y[14] = (od_coeff)se;
+  y[15] = (od_coeff)sf;
+}
+
+void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) {
+  int s0;
+  int s1;
+  int s2;
+  int s3;
+  int s4;
+  int s5;
+  int s6;
+  int s7;
+  int s8;
+  int s9;
+  int sa;
+  int sb;
+  int sc;
+  int sd;
+  int se;
+  int sf;
+  s0 = y[0];
+  s8 = y[1];
+  s4 = y[2];
+  sc = y[3];
+  s2 = y[4];
+  sa = y[5];
+  s6 = y[6];
+  se = y[7];
+  s1 = y[8];
+  s9 = y[9];
+  s5 = y[10];
+  sd = y[11];
+  s3 = y[12];
+  sb = y[13];
+  s7 = y[14];
+  sf = y[15];
+  OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
+  x[0*xstride] = (od_coeff)s0;
+  x[1*xstride] = (od_coeff)s1;
+  x[2*xstride] = (od_coeff)s2;
+  x[3*xstride] = (od_coeff)s3;
+  x[4*xstride] = (od_coeff)s4;
+  x[5*xstride] = (od_coeff)s5;
+  x[6*xstride] = (od_coeff)s6;
+  x[7*xstride] = (od_coeff)s7;
+  x[8*xstride] = (od_coeff)s8;
+  x[9*xstride] = (od_coeff)s9;
+  x[10*xstride] = (od_coeff)sa;
+  x[11*xstride] = (od_coeff)sb;
+  x[12*xstride] = (od_coeff)sc;
+  x[13*xstride] = (od_coeff)sd;
+  x[14*xstride] = (od_coeff)se;
+  x[15*xstride] = (od_coeff)sf;
+}
+
+void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) {
+  int s0;
+  int s1;
+  int s2;
+  int s3;
+  int s4;
+  int s5;
+  int s6;
+  int s7;
+  int s8;
+  int s9;
+  int sa;
+  int sb;
+  int sc;
+  int sd;
+  int se;
+  int sf;
+  s0 = x[15*xstride];
+  s8 = x[14*xstride];
+  s4 = x[13*xstride];
+  sc = x[12*xstride];
+  s2 = x[11*xstride];
+  sa = x[10*xstride];
+  s6 = x[9*xstride];
+  se = x[8*xstride];
+  s1 = x[7*xstride];
+  s9 = x[6*xstride];
+  s5 = x[5*xstride];
+  sd = x[4*xstride];
+  s3 = x[3*xstride];
+  sb = x[2*xstride];
+  s7 = x[1*xstride];
+  sf = x[0*xstride];
+  OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
+  y[0] = (od_coeff)sf;
+  y[1] = (od_coeff)se;
+  y[2] = (od_coeff)sd;
+  y[3] = (od_coeff)sc;
+  y[4] = (od_coeff)sb;
+  y[5] = (od_coeff)sa;
+  y[6] = (od_coeff)s9;
+  y[7] = (od_coeff)s8;
+  y[8] = (od_coeff)s7;
+  y[9] = (od_coeff)s6;
+  y[10] = (od_coeff)s5;
+  y[11] = (od_coeff)s4;
+  y[12] = (od_coeff)s3;
+  y[13] = (od_coeff)s2;
+  y[14] = (od_coeff)s1;
+  y[15] = (od_coeff)s0;
+}
+
+void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) {
+  int s0;
+  int s1;
+  int s2;
+  int s3;
+  int s4;
+  int s5;
+  int s6;
+  int s7;
+  int s8;
+  int s9;
+  int sa;
+  int sb;
+  int sc;
+  int sd;
+  int se;
+  int sf;
+  s0 = y[15];
+  s8 = y[14];
+  s4 = y[13];
+  sc = y[12];
+  s2 = y[11];
+  sa = y[10];
+  s6 = y[9];
+  se = y[8];
+  s1 = y[7];
+  s9 = y[6];
+  s5 = y[5];
+  sd = y[4];
+  s3 = y[3];
+  sb = y[2];
+  s7 = y[1];
+  sf = y[0];
+  OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf);
+  x[0*xstride] = (od_coeff)sf;
+  x[1*xstride] = (od_coeff)se;
+  x[2*xstride] = (od_coeff)sd;
+  x[3*xstride] = (od_coeff)sc;
+  x[4*xstride] = (od_coeff)sb;
+  x[5*xstride] = (od_coeff)sa;
+  x[6*xstride] = (od_coeff)s9;
+  x[7*xstride] = (od_coeff)s8;
+  x[8*xstride] = (od_coeff)s7;
+  x[9*xstride] = (od_coeff)s6;
+  x[10*xstride] = (od_coeff)s5;
+  x[11*xstride] = (od_coeff)s4;
+  x[12*xstride] = (od_coeff)s3;
+  x[13*xstride] = (od_coeff)s2;
+  x[14*xstride] = (od_coeff)s1;
+  x[15*xstride] = (od_coeff)s0;
+}
+
+void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) {
+  /*215 adds, 38 shifts, 87 "muls".*/
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int t8;
+  int t9;
+  int ta;
+  int tb;
+  int tc;
+  int td;
+  int te;
+  int tf;
+  int tg;
+  int th;
+  int ti;
+  int tj;
+  int tk;
+  int tl;
+  int tm;
+  int tn;
+  int to;
+  int tp;
+  int tq;
+  int tr;
+  int ts;
+  int tt;
+  int tu;
+  int tv;
+  t0 = x[0*xstride];
+  tg = x[1*xstride];
+  t8 = x[2*xstride];
+  to = x[3*xstride];
+  t4 = x[4*xstride];
+  tk = x[5*xstride];
+  tc = x[6*xstride];
+  ts = x[7*xstride];
+  t2 = x[8*xstride];
+  ti = x[9*xstride];
+  ta = x[10*xstride];
+  tq = x[11*xstride];
+  t6 = x[12*xstride];
+  tm = x[13*xstride];
+  te = x[14*xstride];
+  tu = x[15*xstride];
+  t1 = x[16*xstride];
+  th = x[17*xstride];
+  t9 = x[18*xstride];
+  tp = x[19*xstride];
+  t5 = x[20*xstride];
+  tl = x[21*xstride];
+  td = x[22*xstride];
+  tt = x[23*xstride];
+  t3 = x[24*xstride];
+  tj = x[25*xstride];
+  tb = x[26*xstride];
+  tr = x[27*xstride];
+  t7 = x[28*xstride];
+  tn = x[29*xstride];
+  tf = x[30*xstride];
+  tv = x[31*xstride];
+  OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
+    t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
+  y[0] = (od_coeff)t0;
+  y[1] = (od_coeff)t1;
+  y[2] = (od_coeff)t2;
+  y[3] = (od_coeff)t3;
+  y[4] = (od_coeff)t4;
+  y[5] = (od_coeff)t5;
+  y[6] = (od_coeff)t6;
+  y[7] = (od_coeff)t7;
+  y[8] = (od_coeff)t8;
+  y[9] = (od_coeff)t9;
+  y[10] = (od_coeff)ta;
+  y[11] = (od_coeff)tb;
+  y[12] = (od_coeff)tc;
+  y[13] = (od_coeff)td;
+  y[14] = (od_coeff)te;
+  y[15] = (od_coeff)tf;
+  y[16] = (od_coeff)tg;
+  y[17] = (od_coeff)th;
+  y[18] = (od_coeff)ti;
+  y[19] = (od_coeff)tj;
+  y[20] = (od_coeff)tk;
+  y[21] = (od_coeff)tl;
+  y[22] = (od_coeff)tm;
+  y[23] = (od_coeff)tn;
+  y[24] = (od_coeff)to;
+  y[25] = (od_coeff)tp;
+  y[26] = (od_coeff)tq;
+  y[27] = (od_coeff)tr;
+  y[28] = (od_coeff)ts;
+  y[29] = (od_coeff)tt;
+  y[30] = (od_coeff)tu;
+  y[31] = (od_coeff)tv;
+}
+
+void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) {
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int t8;
+  int t9;
+  int ta;
+  int tb;
+  int tc;
+  int td;
+  int te;
+  int tf;
+  int tg;
+  int th;
+  int ti;
+  int tj;
+  int tk;
+  int tl;
+  int tm;
+  int tn;
+  int to;
+  int tp;
+  int tq;
+  int tr;
+  int ts;
+  int tt;
+  int tu;
+  int tv;
+  t0 = y[0];
+  tg = y[1];
+  t8 = y[2];
+  to = y[3];
+  t4 = y[4];
+  tk = y[5];
+  tc = y[6];
+  ts = y[7];
+  t2 = y[8];
+  ti = y[9];
+  ta = y[10];
+  tq = y[11];
+  t6 = y[12];
+  tm = y[13];
+  te = y[14];
+  tu = y[15];
+  t1 = y[16];
+  th = y[17];
+  t9 = y[18];
+  tp = y[19];
+  t5 = y[20];
+  tl = y[21];
+  td = y[22];
+  tt = y[23];
+  t3 = y[24];
+  tj = y[25];
+  tb = y[26];
+  tr = y[27];
+  t7 = y[28];
+  tn = y[29];
+  tf = y[30];
+  tv = y[31];
+  OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu,
+    t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv);
+  x[0*xstride] = (od_coeff)t0;
+  x[1*xstride] = (od_coeff)t1;
+  x[2*xstride] = (od_coeff)t2;
+  x[3*xstride] = (od_coeff)t3;
+  x[4*xstride] = (od_coeff)t4;
+  x[5*xstride] = (od_coeff)t5;
+  x[6*xstride] = (od_coeff)t6;
+  x[7*xstride] = (od_coeff)t7;
+  x[8*xstride] = (od_coeff)t8;
+  x[9*xstride] = (od_coeff)t9;
+  x[10*xstride] = (od_coeff)ta;
+  x[11*xstride] = (od_coeff)tb;
+  x[12*xstride] = (od_coeff)tc;
+  x[13*xstride] = (od_coeff)td;
+  x[14*xstride] = (od_coeff)te;
+  x[15*xstride] = (od_coeff)tf;
+  x[16*xstride] = (od_coeff)tg;
+  x[17*xstride] = (od_coeff)th;
+  x[18*xstride] = (od_coeff)ti;
+  x[19*xstride] = (od_coeff)tj;
+  x[20*xstride] = (od_coeff)tk;
+  x[21*xstride] = (od_coeff)tl;
+  x[22*xstride] = (od_coeff)tm;
+  x[23*xstride] = (od_coeff)tn;
+  x[24*xstride] = (od_coeff)to;
+  x[25*xstride] = (od_coeff)tp;
+  x[26*xstride] = (od_coeff)tq;
+  x[27*xstride] = (od_coeff)tr;
+  x[28*xstride] = (od_coeff)ts;
+  x[29*xstride] = (od_coeff)tt;
+  x[30*xstride] = (od_coeff)tu;
+  x[31*xstride] = (od_coeff)tv;
+}
+
+#if CONFIG_TX64X64
+void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) {
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int t8;
+  int t9;
+  int ta;
+  int tb;
+  int tc;
+  int td;
+  int te;
+  int tf;
+  int tg;
+  int th;
+  int ti;
+  int tj;
+  int tk;
+  int tl;
+  int tm;
+  int tn;
+  int to;
+  int tp;
+  int tq;
+  int tr;
+  int ts;
+  int tt;
+  int tu;
+  int tv;
+  int tw;
+  int tx;
+  int ty;
+  int tz;
+  int tA;
+  int tB;
+  int tC;
+  int tD;
+  int tE;
+  int tF;
+  int tG;
+  int tH;
+  int tI;
+  int tJ;
+  int tK;
+  int tL;
+  int tM;
+  int tN;
+  int tO;
+  int tP;
+  int tQ;
+  int tR;
+  int tS;
+  int tT;
+  int tU;
+  int tV;
+  int tW;
+  int tX;
+  int tY;
+  int tZ;
+  int t_;
+  int t;
+  t0 = x[0*xstride];
+  tw = x[1*xstride];
+  tg = x[2*xstride];
+  tM = x[3*xstride];
+  t8 = x[4*xstride];
+  tE = x[5*xstride];
+  to = x[6*xstride];
+  tU = x[7*xstride];
+  t4 = x[8*xstride];
+  tA = x[9*xstride];
+  tk = x[10*xstride];
+  tQ = x[11*xstride];
+  tc = x[12*xstride];
+  tI = x[13*xstride];
+  ts = x[14*xstride];
+  tY = x[15*xstride];
+  t2 = x[16*xstride];
+  ty = x[17*xstride];
+  ti = x[18*xstride];
+  tO = x[19*xstride];
+  ta = x[20*xstride];
+  tG = x[21*xstride];
+  tq = x[22*xstride];
+  tW = x[23*xstride];
+  t6 = x[24*xstride];
+  tC = x[25*xstride];
+  tm = x[26*xstride];
+  tS = x[27*xstride];
+  te = x[28*xstride];
+  tK = x[29*xstride];
+  tu = x[30*xstride];
+  t_ = x[31*xstride];
+  t1 = x[32*xstride];
+  tx = x[33*xstride];
+  th = x[34*xstride];
+  tN = x[35*xstride];
+  t9 = x[36*xstride];
+  tF = x[37*xstride];
+  tp = x[38*xstride];
+  tV = x[39*xstride];
+  t5 = x[40*xstride];
+  tB = x[41*xstride];
+  tl = x[42*xstride];
+  tR = x[43*xstride];
+  td = x[44*xstride];
+  tJ = x[45*xstride];
+  tt = x[46*xstride];
+  tZ = x[47*xstride];
+  t3 = x[48*xstride];
+  tz = x[49*xstride];
+  tj = x[50*xstride];
+  tP = x[51*xstride];
+  tb = x[52*xstride];
+  tH = x[53*xstride];
+  tr = x[54*xstride];
+  tX = x[55*xstride];
+  t7 = x[56*xstride];
+  tD = x[57*xstride];
+  tn = x[58*xstride];
+  tT = x[59*xstride];
+  tf = x[60*xstride];
+  tL = x[61*xstride];
+  tv = x[62*xstride];
+  t = x[63*xstride];
+  OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
+    t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
+    th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
+    tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
+  y[0] = (od_coeff)t0;
+  y[1] = (od_coeff)t1;
+  y[2] = (od_coeff)t2;
+  y[3] = (od_coeff)t3;
+  y[4] = (od_coeff)t4;
+  y[5] = (od_coeff)t5;
+  y[6] = (od_coeff)t6;
+  y[7] = (od_coeff)t7;
+  y[8] = (od_coeff)t8;
+  y[9] = (od_coeff)t9;
+  y[10] = (od_coeff)ta;
+  y[11] = (od_coeff)tb;
+  y[12] = (od_coeff)tc;
+  y[13] = (od_coeff)td;
+  y[14] = (od_coeff)te;
+  y[15] = (od_coeff)tf;
+  y[16] = (od_coeff)tg;
+  y[17] = (od_coeff)th;
+  y[18] = (od_coeff)ti;
+  y[19] = (od_coeff)tj;
+  y[20] = (od_coeff)tk;
+  y[21] = (od_coeff)tl;
+  y[22] = (od_coeff)tm;
+  y[23] = (od_coeff)tn;
+  y[24] = (od_coeff)to;
+  y[25] = (od_coeff)tp;
+  y[26] = (od_coeff)tq;
+  y[27] = (od_coeff)tr;
+  y[28] = (od_coeff)ts;
+  y[29] = (od_coeff)tt;
+  y[30] = (od_coeff)tu;
+  y[31] = (od_coeff)tv;
+  y[32] = (od_coeff)tw;
+  y[33] = (od_coeff)tx;
+  y[34] = (od_coeff)ty;
+  y[35] = (od_coeff)tz;
+  y[36] = (od_coeff)tA;
+  y[37] = (od_coeff)tB;
+  y[38] = (od_coeff)tC;
+  y[39] = (od_coeff)tD;
+  y[40] = (od_coeff)tE;
+  y[41] = (od_coeff)tF;
+  y[41] = (od_coeff)tF;
+  y[42] = (od_coeff)tG;
+  y[43] = (od_coeff)tH;
+  y[44] = (od_coeff)tI;
+  y[45] = (od_coeff)tJ;
+  y[46] = (od_coeff)tK;
+  y[47] = (od_coeff)tL;
+  y[48] = (od_coeff)tM;
+  y[49] = (od_coeff)tN;
+  y[50] = (od_coeff)tO;
+  y[51] = (od_coeff)tP;
+  y[52] = (od_coeff)tQ;
+  y[53] = (od_coeff)tR;
+  y[54] = (od_coeff)tS;
+  y[55] = (od_coeff)tT;
+  y[56] = (od_coeff)tU;
+  y[57] = (od_coeff)tV;
+  y[58] = (od_coeff)tW;
+  y[59] = (od_coeff)tX;
+  y[60] = (od_coeff)tY;
+  y[61] = (od_coeff)tZ;
+  y[62] = (od_coeff)t_;
+  y[63] = (od_coeff)t;
+}
+
+void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) {
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int t8;
+  int t9;
+  int ta;
+  int tb;
+  int tc;
+  int td;
+  int te;
+  int tf;
+  int tg;
+  int th;
+  int ti;
+  int tj;
+  int tk;
+  int tl;
+  int tm;
+  int tn;
+  int to;
+  int tp;
+  int tq;
+  int tr;
+  int ts;
+  int tt;
+  int tu;
+  int tv;
+  int tw;
+  int tx;
+  int ty;
+  int tz;
+  int tA;
+  int tB;
+  int tC;
+  int tD;
+  int tE;
+  int tF;
+  int tG;
+  int tH;
+  int tI;
+  int tJ;
+  int tK;
+  int tL;
+  int tM;
+  int tN;
+  int tO;
+  int tP;
+  int tQ;
+  int tR;
+  int tS;
+  int tT;
+  int tU;
+  int tV;
+  int tW;
+  int tX;
+  int tY;
+  int tZ;
+  int t_;
+  int t;
+  t0 = y[0];
+  tw = y[1];
+  tg = y[2];
+  tM = y[3];
+  t8 = y[4];
+  tE = y[5];
+  to = y[6];
+  tU = y[7];
+  t4 = y[8];
+  tA = y[9];
+  tk = y[10];
+  tQ = y[11];
+  tc = y[12];
+  tI = y[13];
+  ts = y[14];
+  tY = y[15];
+  t2 = y[16];
+  ty = y[17];
+  ti = y[18];
+  tO = y[19];
+  ta = y[20];
+  tG = y[21];
+  tq = y[22];
+  tW = y[23];
+  t6 = y[24];
+  tC = y[25];
+  tm = y[26];
+  tS = y[27];
+  te = y[28];
+  tK = y[29];
+  tu = y[30];
+  t_ = y[31];
+  t1 = y[32];
+  tx = y[33];
+  th = y[34];
+  tN = y[35];
+  t9 = y[36];
+  tF = y[37];
+  tp = y[38];
+  tV = y[39];
+  t5 = y[40];
+  tB = y[41];
+  tl = y[42];
+  tR = y[43];
+  td = y[44];
+  tJ = y[45];
+  tt = y[46];
+  tZ = y[47];
+  t3 = y[48];
+  tz = y[49];
+  tj = y[50];
+  tP = y[51];
+  tb = y[52];
+  tH = y[53];
+  tr = y[54];
+  tX = y[55];
+  t7 = y[56];
+  tD = y[57];
+  tn = y[58];
+  tT = y[59];
+  tf = y[60];
+  tL = y[61];
+  tv = y[62];
+  t = y[63];
+  OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY,
+    t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx,
+    th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP,
+    tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t);
+  x[0*xstride] = (od_coeff)t0;
+  x[1*xstride] = (od_coeff)t1;
+  x[2*xstride] = (od_coeff)t2;
+  x[3*xstride] = (od_coeff)t3;
+  x[4*xstride] = (od_coeff)t4;
+  x[5*xstride] = (od_coeff)t5;
+  x[6*xstride] = (od_coeff)t6;
+  x[7*xstride] = (od_coeff)t7;
+  x[8*xstride] = (od_coeff)t8;
+  x[9*xstride] = (od_coeff)t9;
+  x[10*xstride] = (od_coeff)ta;
+  x[11*xstride] = (od_coeff)tb;
+  x[12*xstride] = (od_coeff)tc;
+  x[13*xstride] = (od_coeff)td;
+  x[14*xstride] = (od_coeff)te;
+  x[15*xstride] = (od_coeff)tf;
+  x[16*xstride] = (od_coeff)tg;
+  x[17*xstride] = (od_coeff)th;
+  x[18*xstride] = (od_coeff)ti;
+  x[19*xstride] = (od_coeff)tj;
+  x[20*xstride] = (od_coeff)tk;
+  x[21*xstride] = (od_coeff)tl;
+  x[22*xstride] = (od_coeff)tm;
+  x[23*xstride] = (od_coeff)tn;
+  x[24*xstride] = (od_coeff)to;
+  x[25*xstride] = (od_coeff)tp;
+  x[26*xstride] = (od_coeff)tq;
+  x[27*xstride] = (od_coeff)tr;
+  x[28*xstride] = (od_coeff)ts;
+  x[29*xstride] = (od_coeff)tt;
+  x[30*xstride] = (od_coeff)tu;
+  x[31*xstride] = (od_coeff)tv;
+  x[32*xstride] = (od_coeff)tw;
+  x[33*xstride] = (od_coeff)tx;
+  x[34*xstride] = (od_coeff)ty;
+  x[35*xstride] = (od_coeff)tz;
+  x[36*xstride] = (od_coeff)tA;
+  x[37*xstride] = (od_coeff)tB;
+  x[38*xstride] = (od_coeff)tC;
+  x[39*xstride] = (od_coeff)tD;
+  x[40*xstride] = (od_coeff)tE;
+  x[41*xstride] = (od_coeff)tF;
+  x[41*xstride] = (od_coeff)tF;
+  x[42*xstride] = (od_coeff)tG;
+  x[43*xstride] = (od_coeff)tH;
+  x[44*xstride] = (od_coeff)tI;
+  x[45*xstride] = (od_coeff)tJ;
+  x[46*xstride] = (od_coeff)tK;
+  x[47*xstride] = (od_coeff)tL;
+  x[48*xstride] = (od_coeff)tM;
+  x[49*xstride] = (od_coeff)tN;
+  x[50*xstride] = (od_coeff)tO;
+  x[51*xstride] = (od_coeff)tP;
+  x[52*xstride] = (od_coeff)tQ;
+  x[53*xstride] = (od_coeff)tR;
+  x[54*xstride] = (od_coeff)tS;
+  x[55*xstride] = (od_coeff)tT;
+  x[56*xstride] = (od_coeff)tU;
+  x[57*xstride] = (od_coeff)tV;
+  x[58*xstride] = (od_coeff)tW;
+  x[59*xstride] = (od_coeff)tX;
+  x[60*xstride] = (od_coeff)tY;
+  x[61*xstride] = (od_coeff)tZ;
+  x[62*xstride] = (od_coeff)t_;
+  x[63*xstride] = (od_coeff)t;
+}
+#endif
+
+void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[4];
+  od_coeff y[4];
+  for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdct4(y, x, 1);
+  for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idct4(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[4];
+  od_coeff y[4];
+  for (i = 0; i < 4; i++) y[i] = input[i];
+  od_bin_idct4(x, 1, y);
+  for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_fdst4(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[4];
+  od_coeff y[4];
+  for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdst4(y, x, 1);
+  for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idst4(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[4];
+  od_coeff y[4];
+  for (i = 0; i < 4; i++) y[i] = input[i];
+  od_bin_idst4(x, 1, y);
+  for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_idtx4(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 4; i++) output[i] = input[i];
+}
+
+void daala_fdct8(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[8];
+  od_coeff y[8];
+  for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdct8(y, x, 1);
+  for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idct8(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[8];
+  od_coeff y[8];
+  for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
+  od_bin_idct8(x, 1, y);
+  for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_fdst8(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[8];
+  od_coeff y[8];
+  for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdst8(y, x, 1);
+  for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idst8(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[8];
+  od_coeff y[8];
+  for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i];
+  od_bin_idst8(x, 1, y);
+  for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_idtx8(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 8; i++) output[i] = input[i];
+}
+
+void daala_fdct16(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[16];
+  od_coeff y[16];
+  for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdct16(y, x, 1);
+  for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idct16(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[16];
+  od_coeff y[16];
+  for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
+  od_bin_idct16(x, 1, y);
+  for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_fdst16(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[16];
+  od_coeff y[16];
+  for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdst16(y, x, 1);
+  for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idst16(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[16];
+  od_coeff y[16];
+  for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i];
+  od_bin_idst16(x, 1, y);
+  for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i];
+}
+
+void daala_idtx16(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 16; i++) output[i] = input[i];
+}
+
+void daala_fdct32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[32];
+  od_coeff y[32];
+  for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdct32(y, x, 1);
+  for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idct32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[32];
+  od_coeff y[32];
+  for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i];
+  od_bin_idct32(x, 1, y);
+  for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i];
+}
+
+/* Preserve the "half-right" transform behavior. */
+void daala_fdst32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    output[16 + i] = input[i];
+  }
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = input[i + 16];
+  }
+  daala_fdct16(inputhalf, output);
+}
+
+/* Preserve the "half-right" transform behavior. */
+void daala_idst32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[16];
+  for (i = 0; i < 16; ++i) {
+    inputhalf[i] = input[i];
+  }
+  for (i = 0; i < 16; ++i) {
+    output[i] = input[16 + i];
+  }
+  daala_idct16(inputhalf, output + 16);
+}
+
+void daala_idtx32(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 32; i++) output[i] = input[i];
+}
+
+#if CONFIG_TX64X64
+void daala_fdct64(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[64];
+  od_coeff y[64];
+  for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i];
+  od_bin_fdct64(y, x, 1);
+  for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i];
+}
+
+void daala_idct64(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  od_coeff x[64];
+  od_coeff y[64];
+  for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
+  od_bin_idct64(x, 1, y);
+  for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
+}
+
+/* Preserve the "half-right" transform behavior. */
+void daala_fdst64(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[32];
+  for (i = 0; i < 32; ++i) {
+    output[32 + i] = input[i];
+  }
+  for (i = 0; i < 32; ++i) {
+    inputhalf[i] = input[i + 32];
+  }
+  daala_fdct32(inputhalf, output);
+}
+
+/* Preserve the "half-right" transform behavior. */
+void daala_idst64(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  tran_low_t inputhalf[32];
+  for (i = 0; i < 32; ++i) {
+    inputhalf[i] = input[i];
+  }
+  for (i = 0; i < 32; ++i) {
+    output[i] = input[32 + i];
+  }
+  daala_idct32(inputhalf, output + 32);
+}
+
+void daala_idtx64(const tran_low_t *input, tran_low_t *output) {
+  int i;
+  for (i = 0; i < 64; i++) output[i] = input[i];
+}
+#endif
diff --git a/third_party/aom/av1/common/daala_tx.h b/third_party/aom/av1/common/daala_tx.h
index 39a844c44..7145b66a2 100644
--- a/third_party/aom/av1/common/daala_tx.h
+++ b/third_party/aom/av1/common/daala_tx.h
@@ -1,13 +1,53 @@
 #ifndef AOM_DSP_DAALA_TX_H_
 #define AOM_DSP_DAALA_TX_H_
 
+#include "aom_dsp/aom_dsp_common.h"
 #include "av1/common/odintrin.h"
 
+void daala_fdct4(const tran_low_t *input, tran_low_t *output);
+void daala_idct4(const tran_low_t *input, tran_low_t *output);
+void daala_fdst4(const tran_low_t *input, tran_low_t *output);
+void daala_idst4(const tran_low_t *input, tran_low_t *output);
+void daala_idtx4(const tran_low_t *input, tran_low_t *output);
+void daala_fdct8(const tran_low_t *input, tran_low_t *output);
+void daala_idct8(const tran_low_t *input, tran_low_t *output);
+void daala_fdst8(const tran_low_t *input, tran_low_t *output);
+void daala_idst8(const tran_low_t *input, tran_low_t *output);
+void daala_idtx8(const tran_low_t *input, tran_low_t *output);
+void daala_fdct16(const tran_low_t *input, tran_low_t *output);
+void daala_idct16(const tran_low_t *input, tran_low_t *output);
+void daala_fdst16(const tran_low_t *input, tran_low_t *output);
+void daala_idst16(const tran_low_t *input, tran_low_t *output);
+void daala_idtx16(const tran_low_t *input, tran_low_t *output);
+void daala_fdct32(const tran_low_t *input, tran_low_t *output);
+void daala_idct32(const tran_low_t *input, tran_low_t *output);
+void daala_fdst32(const tran_low_t *input, tran_low_t *output);
+void daala_idst32(const tran_low_t *input, tran_low_t *output);
+void daala_idtx32(const tran_low_t *input, tran_low_t *output);
+#if CONFIG_TX64X64
+void daala_fdct64(const tran_low_t *input, tran_low_t *output);
+void daala_idct64(const tran_low_t *input, tran_low_t *output);
+void daala_fdst64(const tran_low_t *input, tran_low_t *output);
+void daala_idst64(const tran_low_t *input, tran_low_t *output);
+void daala_idtx64(const tran_low_t *input, tran_low_t *output);
+#endif
+
 void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
 void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
+void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
+void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
 void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
 void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
 void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
 void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
-
+void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
+void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
+void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
+void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
+void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
+void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
+#if CONFIG_TX64X64
+void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
+void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
+#endif
 #endif
diff --git a/third_party/aom/av1/common/entropy.c b/third_party/aom/av1/common/entropy.c
index b3d573867..17a8f1356 100644
--- a/third_party/aom/av1/common/entropy.c
+++ b/third_party/aom/av1/common/entropy.c
@@ -17,6 +17,9 @@
 #include "av1/common/entropymode.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/scan.h"
+#if CONFIG_Q_ADAPT_PROBS
+#include "av1/common/token_cdfs.h"
+#endif  // CONFIG_Q_ADAPT_PROBS
 #if CONFIG_LV_MAP
 #include "av1/common/txb_common.h"
 #endif
@@ -146,6 +149,9 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
   { 1, 2, 3, 4, 8, 32 - 18, 0 },    { 1, 2, 3, 4, 8, 32 - 18, 0 },
   { 1, 2, 3, 4, 11, 128 - 21, 0 },  { 1, 2, 3, 4, 11, 128 - 21, 0 },
   { 1, 2, 3, 4, 11, 512 - 21, 0 },  { 1, 2, 3, 4, 11, 512 - 21, 0 },
+#if CONFIG_TX64X64
+  { 1, 2, 3, 4, 11, 2048 - 21, 0 }, { 1, 2, 3, 4, 11, 2048 - 21, 0 },
+#endif  // CONFIG_TX64X64
   { 1, 2, 3, 4, 11, 64 - 21, 0 },   { 1, 2, 3, 4, 11, 64 - 21, 0 },
   { 1, 2, 3, 4, 11, 256 - 21, 0 },  { 1, 2, 3, 4, 11, 256 - 21, 0 },
 };
@@ -162,6 +168,9 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
   { 0, 1, 3, 6, 10, 18, 32, 0 },   { 0, 1, 3, 6, 10, 18, 32, 0 },
   { 0, 1, 3, 6, 10, 21, 128, 0 },  { 0, 1, 3, 6, 10, 21, 128, 0 },
   { 0, 1, 3, 6, 10, 21, 512, 0 },  { 0, 1, 3, 6, 10, 21, 512, 0 },
+#if CONFIG_TX64X64
+  { 0, 1, 3, 6, 10, 21, 2048, 0 }, { 0, 1, 3, 6, 10, 21, 2048, 0 },
+#endif  // CONFIG_TX64X64
   { 0, 1, 3, 6, 10, 21, 64, 0 },   { 0, 1, 3, 6, 10, 21, 64, 0 },
   { 0, 1, 3, 6, 10, 21, 256, 0 },  { 0, 1, 3, 6, 10, 21, 256, 0 },
 };
@@ -891,3256 +900,7 @@ const aom_cdf_prob av1_pareto8_tail_probs[COEFF_PROB_MODELS][TAIL_NODES] = {
   { 31486, 1150, 107, 20, 1, 1, 1, 1, 1 },
 };
 
-/* clang-format off */
-#if CONFIG_Q_ADAPT_PROBS
-const av1_coeff_probs_model
-default_qctx_coef_probs[QCTX_BINS][TX_SIZES][PLANE_TYPES] = {
-    {  // Q_Index 0
-#if CONFIG_CHROMA_2X2
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {182,  34, 137}, { 79,  39, 103}, { 10,  28,  51},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 45,  88, 147}, { 46,  80, 140}, { 25,  69, 119},
-                        { 12,  57,  96}, {  4,  41,  65}, {  1,  20,  31},
-                    },
-                    {  // band 2
-                        { 58, 124, 190}, { 39, 106, 178}, { 16,  86, 147},
-                        {  7,  69, 114}, {  3,  50,  80}, {  1,  25,  42},
-                    },
-                    {  // band 3
-                        { 90, 138, 215}, { 54, 116, 198}, { 18,  86, 155},
-                        {  5,  62, 112}, {  1,  38,  68}, {  1,  17,  30},
-                    },
-                    {  // band 4
-                        {126, 149, 231}, { 82, 114, 211}, { 21,  80, 157},
-                        {  6,  56, 105}, {  1,  36,  64}, {  1,  17,  31},
-                    },
-                    {  // band 5
-                        {171,  56, 236}, {140,  54, 219}, { 57,  45, 167},
-                        { 26,  36, 113}, { 11,  29,  72}, {  3,  18,  39},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {153, 122, 186}, {106, 109, 171}, { 36,  84, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 27, 151, 201}, { 34, 131, 199}, { 23, 102, 161},
-                        { 10,  80, 120}, {  4,  52,  78}, {  1,  24,  37},
-                    },
-                    {  // band 2
-                        { 43, 158, 213}, { 35, 133, 203}, {  8,  92, 151},
-                        {  2,  64, 106}, {  1,  36,  60}, {  1,  13,  24},
-                    },
-                    {  // band 3
-                        { 68, 167, 223}, { 36, 135, 211}, {  9,  94, 157},
-                        {  2,  67, 112}, {  1,  40,  68}, {  1,  17,  31},
-                    },
-                    {  // band 4
-                        {131, 146, 237}, { 72, 119, 223}, { 17,  82, 164},
-                        {  4,  55, 107}, {  1,  34,  63}, {  1,  16,  29},
-                    },
-                    {  // band 5
-                        {184,  68, 244}, {153,  59, 232}, { 68,  51, 179},
-                        { 31,  40, 123}, { 13,  29,  77}, {  4,  17,  37},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {203,  41, 203}, {127,  56, 174}, { 49,  56, 127},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {110, 121, 217}, {119, 113, 213}, { 64,  95, 185},
-                        { 30,  72, 144}, {  8,  42,  76}, {  2,  17,  25},
-                    },
-                    {  // band 2
-                        {127, 159, 229}, {115, 134, 223}, { 36, 100, 189},
-                        { 11,  75, 142}, {  3,  48,  83}, {  1,  19,  33},
-                    },
-                    {  // band 3
-                        {150, 172, 241}, { 90, 133, 231}, { 28, 102, 192},
-                        {  7,  81, 147}, {  1,  53,  91}, {  1,  25,  42},
-                    },
-                    {  // band 4
-                        {184, 144, 248}, {114, 117, 237}, { 37,  89, 192},
-                        { 10,  63, 130}, {  4,  42,  76}, {  1,  19,  38},
-                    },
-                    {  // band 5
-                        {207,  79, 250}, {179,  74, 241}, { 83,  67, 199},
-                        { 38,  51, 142}, { 17,  37,  97}, { 10,  14,  55},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {220,  82, 232}, {150,  93, 214}, { 66,  95, 177},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {116, 160, 227}, {136, 141, 227}, { 67, 114, 190},
-                        { 40,  94, 148}, { 21,  70, 107}, { 10,  43,  63},
-                    },
-                    {  // band 2
-                        {124, 173, 235}, {105, 147, 226}, { 27, 107, 184},
-                        { 10,  80, 142}, {  3,  50,  86}, {  1,  16,  32},
-                    },
-                    {  // band 3
-                        {149, 179, 243}, { 89, 147, 234}, { 29, 112, 193},
-                        {  9,  94, 157}, {  1,  64, 111}, {  1,  25,  43},
-                    },
-                    {  // band 4
-                        {187, 153, 248}, {127, 130, 241}, { 52,  99, 202},
-                        { 20,  79, 152}, {  4,  50,  93}, {  1,  19,  32},
-                    },
-                    {  // band 5
-                        {215,  82, 251}, {195,  80, 246}, { 93,  70, 204},
-                        { 39,  54, 147}, { 14,  33,  88}, {  6,  14,  39},
-                    },
-                },
-            },
-        },
-#endif
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {182,  34, 137}, { 79,  39, 103}, { 10,  28,  51},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 45,  88, 147}, { 46,  80, 140}, { 25,  69, 119},
-                        { 12,  57,  96}, {  4,  41,  65}, {  1,  20,  31},
-                    },
-                    {  // band 2
-                        { 58, 124, 190}, { 39, 106, 178}, { 16,  86, 147},
-                        {  7,  69, 114}, {  3,  50,  80}, {  1,  25,  42},
-                    },
-                    {  // band 3
-                        { 90, 138, 215}, { 54, 116, 198}, { 18,  86, 155},
-                        {  5,  62, 112}, {  1,  38,  68}, {  1,  17,  30},
-                    },
-                    {  // band 4
-                        {126, 149, 231}, { 82, 114, 211}, { 21,  80, 157},
-                        {  6,  56, 105}, {  1,  36,  64}, {  1,  17,  31},
-                    },
-                    {  // band 5
-                        {171,  56, 236}, {140,  54, 219}, { 57,  45, 167},
-                        { 26,  36, 113}, { 11,  29,  72}, {  3,  18,  39},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {153, 122, 186}, {106, 109, 171}, { 36,  84, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 27, 151, 201}, { 34, 131, 199}, { 23, 102, 161},
-                        { 10,  80, 120}, {  4,  52,  78}, {  1,  24,  37},
-                    },
-                    {  // band 2
-                        { 43, 158, 213}, { 35, 133, 203}, {  8,  92, 151},
-                        {  2,  64, 106}, {  1,  36,  60}, {  1,  13,  24},
-                    },
-                    {  // band 3
-                        { 68, 167, 223}, { 36, 135, 211}, {  9,  94, 157},
-                        {  2,  67, 112}, {  1,  40,  68}, {  1,  17,  31},
-                    },
-                    {  // band 4
-                        {131, 146, 237}, { 72, 119, 223}, { 17,  82, 164},
-                        {  4,  55, 107}, {  1,  34,  63}, {  1,  16,  29},
-                    },
-                    {  // band 5
-                        {184,  68, 244}, {153,  59, 232}, { 68,  51, 179},
-                        { 31,  40, 123}, { 13,  29,  77}, {  4,  17,  37},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {203,  41, 203}, {127,  56, 174}, { 49,  56, 127},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {110, 121, 217}, {119, 113, 213}, { 64,  95, 185},
-                        { 30,  72, 144}, {  8,  42,  76}, {  2,  17,  25},
-                    },
-                    {  // band 2
-                        {127, 159, 229}, {115, 134, 223}, { 36, 100, 189},
-                        { 11,  75, 142}, {  3,  48,  83}, {  1,  19,  33},
-                    },
-                    {  // band 3
-                        {150, 172, 241}, { 90, 133, 231}, { 28, 102, 192},
-                        {  7,  81, 147}, {  1,  53,  91}, {  1,  25,  42},
-                    },
-                    {  // band 4
-                        {184, 144, 248}, {114, 117, 237}, { 37,  89, 192},
-                        { 10,  63, 130}, {  4,  42,  76}, {  1,  19,  38},
-                    },
-                    {  // band 5
-                        {207,  79, 250}, {179,  74, 241}, { 83,  67, 199},
-                        { 38,  51, 142}, { 17,  37,  97}, { 10,  14,  55},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {220,  82, 232}, {150,  93, 214}, { 66,  95, 177},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {116, 160, 227}, {136, 141, 227}, { 67, 114, 190},
-                        { 40,  94, 148}, { 21,  70, 107}, { 10,  43,  63},
-                    },
-                    {  // band 2
-                        {124, 173, 235}, {105, 147, 226}, { 27, 107, 184},
-                        { 10,  80, 142}, {  3,  50,  86}, {  1,  16,  32},
-                    },
-                    {  // band 3
-                        {149, 179, 243}, { 89, 147, 234}, { 29, 112, 193},
-                        {  9,  94, 157}, {  1,  64, 111}, {  1,  25,  43},
-                    },
-                    {  // band 4
-                        {187, 153, 248}, {127, 130, 241}, { 52,  99, 202},
-                        { 20,  79, 152}, {  4,  50,  93}, {  1,  19,  32},
-                    },
-                    {  // band 5
-                        {215,  82, 251}, {195,  80, 246}, { 93,  70, 204},
-                        { 39,  54, 147}, { 14,  33,  88}, {  6,  14,  39},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 1
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {116,  43, 131}, { 39,  41,  94}, {  4,  28,  47},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 28, 101, 141}, { 27,  95, 140}, { 18,  80, 121},
-                        { 10,  61,  95}, {  4,  39,  60}, {  1,  19,  26},
-                    },
-                    {  // band 2
-                        { 29, 150, 183}, { 19, 127, 175}, {  8,  98, 147},
-                        {  3,  76, 115}, {  1,  55,  84}, {  1,  29,  43},
-                    },
-                    {  // band 3
-                        { 26, 168, 202}, { 12, 138, 188}, {  2,  98, 149},
-                        {  1,  69, 110}, {  1,  40,  65}, {  1,  17,  25},
-                    },
-                    {  // band 4
-                        { 33, 188, 225}, { 12, 155, 207}, {  2, 101, 155},
-                        {  1,  65, 106}, {  1,  36,  60}, {  1,  18,  26},
-                    },
-                    {  // band 5
-                        { 79, 205, 242}, { 30, 168, 224}, {  5, 106, 164},
-                        {  1,  68, 110}, {  1,  39,  65}, {  1,  18,  28},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 96,  80, 201}, { 51,  88, 168}, { 14,  78, 116},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {  6, 167, 216}, { 32, 152, 211}, { 24, 121, 182},
-                        { 13,  98, 149}, { 12,  76, 108}, {  8,  48,  62},
-                    },
-                    {  // band 2
-                        { 17, 176, 225}, { 13, 147, 209}, {  3,  96, 155},
-                        {  1,  65, 108}, {  2,  43,  63}, {  2,  23,  25},
-                    },
-                    {  // band 3
-                        { 18, 183, 232}, { 10, 153, 214}, {  1,  96, 154},
-                        {  1,  63, 105}, {  1,  39,  59}, {  1,  21,  24},
-                    },
-                    {  // band 4
-                        { 23, 191, 239}, {  8, 159, 221}, {  1,  97, 158},
-                        {  1,  61, 105}, {  1,  37,  60}, {  1,  20,  26},
-                    },
-                    {  // band 5
-                        { 70, 201, 243}, { 29, 163, 228}, {  4, 102, 169},
-                        {  1,  67, 114}, {  1,  39,  66}, {  1,  17,  29},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {181,  38, 192}, { 95,  47, 151}, { 29,  49, 102},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 72, 131, 202}, { 93, 120, 205}, { 50, 103, 179},
-                        { 24,  79, 143}, { 11,  47,  78}, {  7,  19,  25},
-                    },
-                    {  // band 2
-                        { 84, 176, 221}, { 56, 144, 214}, { 21, 108, 182},
-                        {  8,  83, 139}, {  3,  55,  90}, {  2,  27,  41},
-                    },
-                    {  // band 3
-                        { 84, 195, 234}, { 42, 156, 222}, { 10, 109, 180},
-                        {  4,  77, 133}, {  1,  48,  80}, {  1,  23,  35},
-                    },
-                    {  // band 4
-                        { 89, 210, 238}, { 35, 165, 221}, {  6, 106, 172},
-                        {  2,  70, 123}, {  1,  44,  74}, {  1,  21,  30},
-                    },
-                    {  // band 5
-                        {114, 221, 247}, { 49, 170, 234}, {  7, 113, 184},
-                        {  2,  77, 132}, {  1,  48,  79}, {  1,  25,  33},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {192,  66, 237}, {113,  84, 211}, { 35,  84, 154},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 81, 180, 234}, {127, 165, 229}, { 58, 137, 204},
-                        { 41, 114, 174}, { 44,  94, 136}, { 29,  66,  86},
-                    },
-                    {  // band 2
-                        { 82, 193, 240}, { 39, 162, 223}, {  8, 113, 179},
-                        {  3,  83, 136}, {  6,  62,  84}, {  5,  45,  45},
-                    },
-                    {  // band 3
-                        { 78, 203, 242}, { 31, 170, 227}, {  4, 115, 181},
-                        {  1,  82, 135}, {  2,  59,  82}, {  1,  45,  47},
-                    },
-                    {  // band 4
-                        { 76, 210, 239}, { 25, 170, 213}, {  2,  99, 152},
-                        {  1,  69, 115}, {  1,  49,  80}, {  1,  47,  57},
-                    },
-                    {  // band 5
-                        {103, 217, 250}, { 42, 180, 237}, {  3, 124, 191},
-                        {  1,  90, 150}, {  1,  69, 116}, {  1,  52,  46},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 2
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 58,  38,  99}, {  9,  26,  51}, {  1,  14,  22},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 14,  78, 109}, { 16,  73, 105}, { 11,  62,  92},
-                        {  6,  47,  72}, {  2,  29,  45}, {  1,  12,  18},
-                    },
-                    {  // band 2
-                        { 17, 131, 148}, { 11, 112, 140}, {  5,  87, 118},
-                        {  2,  63,  90}, {  1,  42,  63}, {  1,  19,  31},
-                    },
-                    {  // band 3
-                        { 12, 151, 168}, {  6, 116, 152}, {  1,  76, 115},
-                        {  1,  50,  81}, {  1,  32,  52}, {  1,  14,  23},
-                    },
-                    {  // band 4
-                        { 10, 174, 191}, {  3, 130, 172}, {  1,  80, 126},
-                        {  1,  53,  88}, {  1,  32,  55}, {  1,  14,  24},
-                    },
-                    {  // band 5
-                        { 19, 219, 237}, {  3, 168, 211}, {  1,  90, 142},
-                        {  1,  53,  91}, {  1,  29,  51}, {  1,  12,  21},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 21,  46, 184}, { 10,  53, 130}, {  2,  49,  78},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {  3, 169, 198}, { 37, 165, 196}, { 26, 134, 176},
-                        { 11, 108, 149}, {  5,  81, 112}, {  3,  47,  64},
-                    },
-                    {  // band 2
-                        { 11, 183, 215}, {  8, 142, 192}, {  2,  91, 141},
-                        {  1,  62, 100}, {  1,  38,  62}, {  1,  17,  28},
-                    },
-                    {  // band 3
-                        { 12, 190, 223}, {  6, 149, 199}, {  1,  88, 139},
-                        {  1,  56,  93}, {  1,  31,  54}, {  1,  13,  21},
-                    },
-                    {  // band 4
-                        { 11, 197, 230}, {  3, 154, 204}, {  1,  83, 134},
-                        {  1,  50,  86}, {  1,  28,  49}, {  1,  12,  21},
-                    },
-                    {  // band 5
-                        { 17, 211, 240}, {  2, 167, 217}, {  1,  88, 143},
-                        {  1,  53,  91}, {  1,  30,  53}, {  1,  14,  24},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {151,  30, 151}, { 50,  36, 105}, {  8,  34,  66},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 39, 111, 160}, { 62, 111, 165}, { 37,  99, 147},
-                        { 15,  77, 118}, {  3,  47,  73}, {  1,  17,  27},
-                    },
-                    {  // band 2
-                        { 48, 170, 190}, { 32, 135, 180}, { 11, 100, 149},
-                        {  4,  76, 116}, {  1,  51,  80}, {  1,  22,  36},
-                    },
-                    {  // band 3
-                        { 39, 191, 208}, { 18, 141, 191}, {  3,  96, 150},
-                        {  1,  66, 110}, {  1,  41,  69}, {  1,  17,  28},
-                    },
-                    {  // band 4
-                        { 32, 209, 219}, {  8, 152, 201}, {  1,  96, 153},
-                        {  1,  63, 106}, {  1,  38,  66}, {  1,  17,  29},
-                    },
-                    {  // band 5
-                        { 33, 230, 237}, {  5, 173, 214}, {  1, 100, 155},
-                        {  1,  62, 105}, {  1,  38,  66}, {  1,  18,  32},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {149,  38, 231}, { 59,  51, 186}, { 12,  54, 117},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 53, 179, 226}, {126, 176, 223}, { 58, 147, 202},
-                        { 28, 118, 174}, { 15,  94, 138}, { 14,  63,  87},
-                    },
-                    {  // band 2
-                        { 58, 196, 232}, { 26, 158, 213}, {  5, 106, 166},
-                        {  1,  75, 124}, {  1,  46,  79}, {  1,  23,  39},
-                    },
-                    {  // band 3
-                        { 46, 203, 235}, { 17, 162, 213}, {  2, 104, 165},
-                        {  1,  72, 120}, {  1,  44,  74}, {  1,  22,  33},
-                    },
-                    {  // band 4
-                        { 37, 213, 238}, {  8, 167, 216}, {  1, 104, 168},
-                        {  1,  68, 119}, {  1,  40,  67}, {  1,  17,  29},
-                    },
-                    {  // band 5
-                        { 30, 228, 239}, {  4, 181, 213}, {  1, 103, 153},
-                        {  1,  65, 110}, {  1,  43,  79}, {  1,  27,  56},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 3
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 76,  25,  53}, {  9,  18,  32}, {  1,  12,  18},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 29,  55,  91}, { 19,  58,  95}, { 15,  57,  89},
-                        { 12,  49,  77}, {  3,  29,  44}, {  1,   8,  12},
-                    },
-                    {  // band 2
-                        { 32, 160, 148}, { 33, 143, 146}, { 19, 122, 132},
-                        {  6,  90, 102}, {  1,  58,  70}, {  1,  17,  24},
-                    },
-                    {  // band 3
-                        { 16, 181, 181}, {  6, 142, 165}, {  1,  90, 120},
-                        {  1,  50,  71}, {  1,  25,  38}, {  1,   9,  14},
-                    },
-                    {  // band 4
-                        { 13, 203, 203}, {  3, 154, 176}, {  1,  80, 108},
-                        {  1,  41,  61}, {  1,  24,  37}, {  1,  11,  17},
-                    },
-                    {  // band 5
-                        {  6, 234, 240}, {  1, 178, 204}, {  1,  80, 119},
-                        {  1,  45,  71}, {  1,  26,  42}, {  1,  12,  19},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 78,  20, 135}, { 25,  18, 101}, {  5,  19,  57},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {  7, 144, 183}, {117, 151, 195}, {109, 151, 187},
-                        { 39, 130, 168}, { 11, 100, 125}, {  4,  59,  64},
-                    },
-                    {  // band 2
-                        { 20, 184, 212}, { 12, 148, 191}, {  2,  98, 141},
-                        {  1,  65, 100}, {  1,  39,  61}, {  1,  14,  22},
-                    },
-                    {  // band 3
-                        { 15, 194, 222}, {  6, 153, 198}, {  1,  92, 138},
-                        {  1,  58,  91}, {  1,  32,  52}, {  1,  12,  18},
-                    },
-                    {  // band 4
-                        { 14, 206, 232}, {  3, 162, 206}, {  1,  89, 134},
-                        {  1,  52,  83}, {  1,  28,  46}, {  1,  11,  17},
-                    },
-                    {  // band 5
-                        {  6, 225, 241}, {  1, 175, 210}, {  1,  81, 125},
-                        {  1,  48,  78}, {  1,  28,  46}, {  1,  13,  21},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {124,  23,  93}, { 31,  24,  63}, {  6,  24,  46},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 23,  86, 126}, { 45,  90, 145}, { 31,  91, 133},
-                        { 19,  80, 114}, {  7,  53,  72}, {  1,  20,  27},
-                    },
-                    {  // band 2
-                        { 51, 186, 189}, { 48, 159, 182}, { 33, 128, 156},
-                        { 15,  92, 124}, {  2,  62,  83}, {  1,  29,  43},
-                    },
-                    {  // band 3
-                        { 36, 198, 211}, { 15, 156, 187}, {  3,  97, 137},
-                        {  1,  61,  93}, {  1,  35,  57}, {  1,  15,  23},
-                    },
-                    {  // band 4
-                        { 34, 219, 223}, {  9, 162, 193}, {  1,  91, 136},
-                        {  1,  58,  92}, {  1,  35,  54}, {  1,  14,  23},
-                    },
-                    {  // band 5
-                        { 19, 243, 243}, {  3, 191, 208}, {  1,  91, 137},
-                        {  1,  56,  90}, {  1,  34,  55}, {  1,  16,  24},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {119,  20, 197}, { 19,  29, 156}, {  3,  30, 107},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 24, 192, 226}, {161, 193, 227}, { 97, 185, 222},
-                        { 31, 158, 204}, { 16, 122, 165}, { 17,  84, 112},
-                    },
-                    {  // band 2
-                        { 26, 202, 229}, { 11, 165, 210}, {  2, 103, 152},
-                        {  1,  68, 104}, {  1,  42,  70}, {  1,  16,  36},
-                    },
-                    {  // band 3
-                        { 24, 209, 237}, {  6, 169, 214}, {  1, 102, 154},
-                        {  1,  65, 107}, {  1,  45,  68}, {  1,  17,  24},
-                    },
-                    {  // band 4
-                        { 19, 219, 243}, {  4, 183, 226}, {  1, 115, 172},
-                        {  1,  73, 119}, {  1,  43,  77}, {  1,  15,  37},
-                    },
-                    {  // band 5
-                        { 11, 237, 241}, {  2, 190, 216}, {  1, 108, 146},
-                        {  1,  59,  94}, {  1,  40,  67}, {  1,  30,  53},
-                    },
-                },
-            },
-        },
-#if CONFIG_TX64X64
-        {  // TX_SIZE 4
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 76,  25,  53}, {  9,  18,  32}, {  1,  12,  18},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 29,  55,  91}, { 19,  58,  95}, { 15,  57,  89},
-                        { 12,  49,  77}, {  3,  29,  44}, {  1,   8,  12},
-                    },
-                    {  // band 2
-                        { 32, 160, 148}, { 33, 143, 146}, { 19, 122, 132},
-                        {  6,  90, 102}, {  1,  58,  70}, {  1,  17,  24},
-                    },
-                    {  // band 3
-                        { 16, 181, 181}, {  6, 142, 165}, {  1,  90, 120},
-                        {  1,  50,  71}, {  1,  25,  38}, {  1,   9,  14},
-                    },
-                    {  // band 4
-                        { 13, 203, 203}, {  3, 154, 176}, {  1,  80, 108},
-                        {  1,  41,  61}, {  1,  24,  37}, {  1,  11,  17},
-                    },
-                    {  // band 5
-                        {  6, 234, 240}, {  1, 178, 204}, {  1,  80, 119},
-                        {  1,  45,  71}, {  1,  26,  42}, {  1,  12,  19},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 78,  20, 135}, { 25,  18, 101}, {  5,  19,  57},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {  7, 144, 183}, {117, 151, 195}, {109, 151, 187},
-                        { 39, 130, 168}, { 11, 100, 125}, {  4,  59,  64},
-                    },
-                    {  // band 2
-                        { 20, 184, 212}, { 12, 148, 191}, {  2,  98, 141},
-                        {  1,  65, 100}, {  1,  39,  61}, {  1,  14,  22},
-                    },
-                    {  // band 3
-                        { 15, 194, 222}, {  6, 153, 198}, {  1,  92, 138},
-                        {  1,  58,  91}, {  1,  32,  52}, {  1,  12,  18},
-                    },
-                    {  // band 4
-                        { 14, 206, 232}, {  3, 162, 206}, {  1,  89, 134},
-                        {  1,  52,  83}, {  1,  28,  46}, {  1,  11,  17},
-                    },
-                    {  // band 5
-                        {  6, 225, 241}, {  1, 175, 210}, {  1,  81, 125},
-                        {  1,  48,  78}, {  1,  28,  46}, {  1,  13,  21},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {124,  23,  93}, { 31,  24,  63}, {  6,  24,  46},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 23,  86, 126}, { 45,  90, 145}, { 31,  91, 133},
-                        { 19,  80, 114}, {  7,  53,  72}, {  1,  20,  27},
-                    },
-                    {  // band 2
-                        { 51, 186, 189}, { 48, 159, 182}, { 33, 128, 156},
-                        { 15,  92, 124}, {  2,  62,  83}, {  1,  29,  43},
-                    },
-                    {  // band 3
-                        { 36, 198, 211}, { 15, 156, 187}, {  3,  97, 137},
-                        {  1,  61,  93}, {  1,  35,  57}, {  1,  15,  23},
-                    },
-                    {  // band 4
-                        { 34, 219, 223}, {  9, 162, 193}, {  1,  91, 136},
-                        {  1,  58,  92}, {  1,  35,  54}, {  1,  14,  23},
-                    },
-                    {  // band 5
-                        { 19, 243, 243}, {  3, 191, 208}, {  1,  91, 137},
-                        {  1,  56,  90}, {  1,  34,  55}, {  1,  16,  24},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {119,  20, 197}, { 19,  29, 156}, {  3,  30, 107},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 24, 192, 226}, {161, 193, 227}, { 97, 185, 222},
-                        { 31, 158, 204}, { 16, 122, 165}, { 17,  84, 112},
-                    },
-                    {  // band 2
-                        { 26, 202, 229}, { 11, 165, 210}, {  2, 103, 152},
-                        {  1,  68, 104}, {  1,  42,  70}, {  1,  16,  36},
-                    },
-                    {  // band 3
-                        { 24, 209, 237}, {  6, 169, 214}, {  1, 102, 154},
-                        {  1,  65, 107}, {  1,  45,  68}, {  1,  17,  24},
-                    },
-                    {  // band 4
-                        { 19, 219, 243}, {  4, 183, 226}, {  1, 115, 172},
-                        {  1,  73, 119}, {  1,  43,  77}, {  1,  15,  37},
-                    },
-                    {  // band 5
-                        { 11, 237, 241}, {  2, 190, 216}, {  1, 108, 146},
-                        {  1,  59,  94}, {  1,  40,  67}, {  1,  30,  53},
-                    },
-                },
-            },
-        },
-#endif  // CONFIG_TX64X64
-    },
-    {  // Q_Index 1
-#if CONFIG_CHROMA_2X2
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {174,  30, 159}, { 76,  38, 115}, { 15,  33,  65},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 60,  80, 153}, { 72,  75, 147}, { 36,  68, 129},
-                        { 15,  59, 104}, {  4,  45,  74}, {  1,  28,  45},
-                    },
-                    {  // band 2
-                        { 70, 122, 186}, { 55, 104, 175}, { 21,  83, 144},
-                        {  8,  67, 112}, {  2,  51,  82}, {  1,  34,  57},
-                    },
-                    {  // band 3
-                        { 97, 144, 207}, { 52, 109, 195}, { 16,  77, 153},
-                        {  4,  58, 113}, {  1,  43,  77}, {  1,  27,  48},
-                    },
-                    {  // band 4
-                        {128, 148, 229}, { 76, 104, 210}, { 18,  77, 159},
-                        {  4,  65, 110}, {  1,  52,  82}, {  1,  31,  55},
-                    },
-                    {  // band 5
-                        {165,  51, 238}, {128,  50, 230}, { 57,  49, 185},
-                        { 28,  47, 130}, { 12,  44,  96}, {  3,  36,  60},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {169, 103, 203}, {117,  96, 176}, { 56,  81, 137},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 31, 150, 224}, { 49, 128, 212}, { 19,  92, 165},
-                        {  6,  67, 116}, {  2,  43,  71}, {  1,  21,  36},
-                    },
-                    {  // band 2
-                        { 58, 156, 230}, { 47, 130, 215}, {  7,  87, 158},
-                        {  2,  63, 114}, {  1,  39,  71}, {  1,  18,  36},
-                    },
-                    {  // band 3
-                        { 85, 176, 240}, { 43, 138, 226}, {  8,  93, 172},
-                        {  2,  70, 127}, {  1,  46,  81}, {  1,  26,  47},
-                    },
-                    {  // band 4
-                        {155, 144, 248}, { 93, 116, 235}, { 21,  83, 180},
-                        {  4,  59, 119}, {  1,  43,  80}, {  1,  25,  50},
-                    },
-                    {  // band 5
-                        {203,  61, 250}, {171,  57, 243}, { 71,  57, 199},
-                        { 31,  49, 144}, { 13,  42,  96}, {  7,  30,  52},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {204,  44, 204}, {137,  57, 184}, { 72,  62, 152},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {145, 117, 236}, {151, 112, 231}, { 87,  95, 208},
-                        { 31,  77, 165}, {  5,  49,  98}, {  1,  24,  39},
-                    },
-                    {  // band 2
-                        {146, 152, 241}, {140, 132, 236}, { 41, 103, 209},
-                        { 10,  86, 165}, {  2,  55, 106}, {  1,  25,  58},
-                    },
-                    {  // band 3
-                        {154, 181, 249}, { 84, 143, 240}, { 23, 114, 210},
-                        {  6, 102, 182}, {  2,  71, 137}, {  1,  35,  90},
-                    },
-                    {  // band 4
-                        {184, 150, 251}, {115, 130, 244}, { 34, 105, 215},
-                        { 15,  89, 173}, {  1,  51, 141}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {211,  71, 253}, {193,  78, 249}, {106,  91, 232},
-                        { 61,  87, 198}, { 85, 153, 254}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {232, 104, 242}, {165, 114, 227}, { 96, 120, 206},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {137, 178, 250}, {146, 153, 245}, { 74, 108, 205},
-                        { 41,  81, 149}, { 24,  55, 104}, { 13,  36,  68},
-                    },
-                    {  // band 2
-                        {147, 185, 252}, {127, 161, 246}, { 30, 104, 208},
-                        { 11,  74, 154}, {  6,  54, 100}, {  2,  29,  63},
-                    },
-                    {  // band 3
-                        {163, 191, 254}, {101, 161, 249}, { 22, 114, 215},
-                        {  6,  89, 173}, {  1,  65, 120}, {  1,   1, 170},
-                    },
-                    {  // band 4
-                        {197, 160, 254}, {142, 141, 251}, { 39, 102, 218},
-                        { 10,  76, 158}, {  1,  56, 122}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {224,  76, 254}, {215,  84, 253}, {107,  85, 232},
-                        { 43,  71, 177}, {  1,   1, 254}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {174,  30, 159}, { 76,  38, 115}, { 15,  33,  65},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 60,  80, 153}, { 72,  75, 147}, { 36,  68, 129},
-                        { 15,  59, 104}, {  4,  45,  74}, {  1,  28,  45},
-                    },
-                    {  // band 2
-                        { 70, 122, 186}, { 55, 104, 175}, { 21,  83, 144},
-                        {  8,  67, 112}, {  2,  51,  82}, {  1,  34,  57},
-                    },
-                    {  // band 3
-                        { 97, 144, 207}, { 52, 109, 195}, { 16,  77, 153},
-                        {  4,  58, 113}, {  1,  43,  77}, {  1,  27,  48},
-                    },
-                    {  // band 4
-                        {128, 148, 229}, { 76, 104, 210}, { 18,  77, 159},
-                        {  4,  65, 110}, {  1,  52,  82}, {  1,  31,  55},
-                    },
-                    {  // band 5
-                        {165,  51, 238}, {128,  50, 230}, { 57,  49, 185},
-                        { 28,  47, 130}, { 12,  44,  96}, {  3,  36,  60},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {169, 103, 203}, {117,  96, 176}, { 56,  81, 137},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 31, 150, 224}, { 49, 128, 212}, { 19,  92, 165},
-                        {  6,  67, 116}, {  2,  43,  71}, {  1,  21,  36},
-                    },
-                    {  // band 2
-                        { 58, 156, 230}, { 47, 130, 215}, {  7,  87, 158},
-                        {  2,  63, 114}, {  1,  39,  71}, {  1,  18,  36},
-                    },
-                    {  // band 3
-                        { 85, 176, 240}, { 43, 138, 226}, {  8,  93, 172},
-                        {  2,  70, 127}, {  1,  46,  81}, {  1,  26,  47},
-                    },
-                    {  // band 4
-                        {155, 144, 248}, { 93, 116, 235}, { 21,  83, 180},
-                        {  4,  59, 119}, {  1,  43,  80}, {  1,  25,  50},
-                    },
-                    {  // band 5
-                        {203,  61, 250}, {171,  57, 243}, { 71,  57, 199},
-                        { 31,  49, 144}, { 13,  42,  96}, {  7,  30,  52},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {204,  44, 204}, {137,  57, 184}, { 72,  62, 152},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {145, 117, 236}, {151, 112, 231}, { 87,  95, 208},
-                        { 31,  77, 165}, {  5,  49,  98}, {  1,  24,  39},
-                    },
-                    {  // band 2
-                        {146, 152, 241}, {140, 132, 236}, { 41, 103, 209},
-                        { 10,  86, 165}, {  2,  55, 106}, {  1,  25,  58},
-                    },
-                    {  // band 3
-                        {154, 181, 249}, { 84, 143, 240}, { 23, 114, 210},
-                        {  6, 102, 182}, {  2,  71, 137}, {  1,  35,  90},
-                    },
-                    {  // band 4
-                        {184, 150, 251}, {115, 130, 244}, { 34, 105, 215},
-                        { 15,  89, 173}, {  1,  51, 141}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {211,  71, 253}, {193,  78, 249}, {106,  91, 232},
-                        { 61,  87, 198}, { 85, 153, 254}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {232, 104, 242}, {165, 114, 227}, { 96, 120, 206},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {137, 178, 250}, {146, 153, 245}, { 74, 108, 205},
-                        { 41,  81, 149}, { 24,  55, 104}, { 13,  36,  68},
-                    },
-                    {  // band 2
-                        {147, 185, 252}, {127, 161, 246}, { 30, 104, 208},
-                        { 11,  74, 154}, {  6,  54, 100}, {  2,  29,  63},
-                    },
-                    {  // band 3
-                        {163, 191, 254}, {101, 161, 249}, { 22, 114, 215},
-                        {  6,  89, 173}, {  1,  65, 120}, {  1,   1, 170},
-                    },
-                    {  // band 4
-                        {197, 160, 254}, {142, 141, 251}, { 39, 102, 218},
-                        { 10,  76, 158}, {  1,  56, 122}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {224,  76, 254}, {215,  84, 253}, {107,  85, 232},
-                        { 43,  71, 177}, {  1,   1, 254}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 1
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 68,  37, 120}, { 21,  34,  82}, {  5,  26,  49},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 41,  89, 138}, { 56,  83, 132}, { 31,  73, 115},
-                        { 16,  62,  92}, {  5,  45,  62}, {  1,  24,  32},
-                    },
-                    {  // band 2
-                        { 48, 139, 165}, { 30, 114, 160}, { 13,  92, 132},
-                        {  6,  72, 103}, {  3,  49,  72}, {  1,  26,  41},
-                    },
-                    {  // band 3
-                        { 44, 162, 191}, { 20, 127, 175}, {  5,  90, 137},
-                        {  1,  62, 100}, {  1,  38,  63}, {  1,  20,  32},
-                    },
-                    {  // band 4
-                        { 51, 184, 213}, { 16, 137, 193}, {  2,  89, 143},
-                        {  1,  60, 102}, {  1,  39,  66}, {  1,  23,  37},
-                    },
-                    {  // band 5
-                        { 76, 200, 235}, { 27, 150, 216}, {  3,  99, 164},
-                        {  1,  70, 119}, {  1,  45,  77}, {  1,  22,  38},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 81, 112, 199}, { 49, 101, 164}, { 19,  80, 119},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 12, 181, 217}, { 48, 151, 212}, { 38, 118, 180},
-                        { 22,  95, 140}, { 11,  67,  92}, { 13,  46,  44},
-                    },
-                    {  // band 2
-                        { 29, 188, 226}, { 19, 147, 210}, {  5,  95, 154},
-                        {  4,  68, 106}, {  3,  44,  60}, {  1,  24,  27},
-                    },
-                    {  // band 3
-                        { 30, 195, 234}, { 15, 153, 216}, {  3,  95, 156},
-                        {  2,  66, 108}, {  2,  44,  62}, {  1,  24,  29},
-                    },
-                    {  // band 4
-                        { 36, 203, 243}, { 12, 162, 225}, {  2,  98, 163},
-                        {  2,  67, 113}, {  2,  45,  68}, {  1,  24,  34},
-                    },
-                    {  // band 5
-                        { 86, 207, 248}, { 35, 165, 236}, {  3, 107, 180},
-                        {  1,  73, 128}, {  1,  45,  78}, {  1,  20,  34},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {188,  37, 205}, {118,  51, 172}, { 56,  57, 135},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {116, 135, 225}, {144, 123, 221}, { 72, 103, 197},
-                        { 35,  77, 153}, { 15,  47,  82}, {  6,  25,  34},
-                    },
-                    {  // band 2
-                        {128, 171, 233}, { 82, 142, 226}, { 31, 106, 191},
-                        { 16,  82, 146}, {  9,  59,  98}, {  4,  33,  54},
-                    },
-                    {  // band 3
-                        {126, 197, 241}, { 66, 155, 230}, { 18, 108, 190},
-                        {  7,  82, 148}, {  3,  58,  98}, {  1,  25,  50},
-                    },
-                    {  // band 4
-                        {117, 207, 244}, { 44, 163, 233}, {  9, 112, 191},
-                        {  5,  84, 148}, {  3,  61,  87}, {  1,  28,  38},
-                    },
-                    {  // band 5
-                        {112, 214, 249}, { 39, 174, 240}, {  6, 125, 205},
-                        {  4,  96, 163}, {  5,  66, 100}, {  1, 128, 254},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {227,  70, 234}, {145,  91, 213}, { 61, 100, 173},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {108, 198, 243}, {171, 172, 240}, {118, 130, 210},
-                        {104, 107, 165}, { 64,  85, 114}, { 55,  64,  60},
-                    },
-                    {  // band 2
-                        {110, 208, 247}, { 64, 175, 237}, { 24, 112, 187},
-                        { 24,  81, 133}, { 24,  63,  83}, { 21,  47,  53},
-                    },
-                    {  // band 3
-                        { 91, 218, 249}, { 46, 188, 238}, {  8, 113, 184},
-                        {  5,  83, 137}, {  6,  62,  95}, { 17,  44,  94},
-                    },
-                    {  // band 4
-                        { 84, 216, 248}, { 30, 187, 237}, {  2, 117, 188},
-                        {  1,  88, 141}, {  3,  63,  98}, {  1,   1,   1},
-                    },
-                    {  // band 5
-                        {116, 218, 252}, { 47, 186, 242}, {  2, 132, 204},
-                        {  1, 106, 175}, {  1,  88, 104}, {  1, 254, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 2
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 35,  41, 129}, { 12,  30,  70}, {  2,  19,  32},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 30,  77, 116}, { 39,  70, 110}, { 20,  58,  96},
-                        {  8,  47,  77}, {  2,  33,  52}, {  1,  17,  26},
-                    },
-                    {  // band 2
-                        { 31, 123, 146}, { 18, 103, 140}, {  7,  81, 119},
-                        {  2,  62,  95}, {  1,  44,  70}, {  1,  26,  42},
-                    },
-                    {  // band 3
-                        { 21, 149, 170}, {  9, 114, 158}, {  2,  80, 126},
-                        {  1,  57,  94}, {  1,  36,  61}, {  1,  18,  31},
-                    },
-                    {  // band 4
-                        { 20, 178, 199}, {  6, 134, 183}, {  1,  87, 139},
-                        {  1,  60, 100}, {  1,  37,  64}, {  1,  18,  31},
-                    },
-                    {  // band 5
-                        { 36, 218, 233}, {  6, 160, 207}, {  1,  92, 147},
-                        {  1,  59, 101}, {  1,  35,  62}, {  1,  18,  31},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 17,  62, 211}, { 14,  62, 153}, {  5,  50,  84},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 11, 180, 205}, { 87, 160, 205}, { 53, 128, 184},
-                        { 27, 106, 156}, { 13,  79, 115}, {  6,  46,  67},
-                    },
-                    {  // band 2
-                        { 32, 194, 220}, { 20, 145, 202}, {  4,  96, 152},
-                        {  1,  67, 111}, {  1,  42,  70}, {  1,  21,  37},
-                    },
-                    {  // band 3
-                        { 30, 204, 228}, { 14, 152, 207}, {  1,  92, 149},
-                        {  1,  61, 103}, {  1,  34,  59}, {  1,  16,  28},
-                    },
-                    {  // band 4
-                        { 27, 213, 235}, {  7, 159, 210}, {  1,  88, 143},
-                        {  1,  55,  94}, {  1,  31,  53}, {  1,  16,  27},
-                    },
-                    {  // band 5
-                        { 28, 223, 243}, {  4, 173, 217}, {  1,  91, 146},
-                        {  1,  58,  98}, {  1,  35,  60}, {  1,  19,  33},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {172,  37, 202}, { 83,  51, 156}, { 24,  53, 110},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 76, 134, 206}, {110, 124, 200}, { 47, 106, 180},
-                        { 15,  82, 145}, {  3,  48,  83}, {  1,  19,  32},
-                    },
-                    {  // band 2
-                        { 80, 176, 220}, { 49, 145, 212}, { 17, 112, 180},
-                        {  7,  84, 140}, {  1,  53,  89}, {  1,  27,  43},
-                    },
-                    {  // band 3
-                        { 74, 201, 232}, { 38, 158, 221}, {  8, 112, 179},
-                        {  2,  79, 132}, {  1,  47,  82}, {  1,  26,  42},
-                    },
-                    {  // band 4
-                        { 73, 215, 239}, { 28, 169, 227}, {  3, 112, 176},
-                        {  1,  74, 126}, {  1,  48,  79}, {  1,  27,  44},
-                    },
-                    {  // band 5
-                        { 71, 233, 244}, { 18, 180, 230}, {  1, 114, 180},
-                        {  1,  80, 134}, {  1,  51,  85}, {  1,  26,  36},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {213,  34, 244}, {126,  57, 212}, { 46,  67, 151},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {120, 202, 245}, {198, 173, 241}, {119, 146, 224},
-                        { 76, 126, 195}, { 44, 102, 159}, { 40,  76, 115},
-                    },
-                    {  // band 2
-                        {120, 215, 248}, { 69, 171, 237}, { 23, 119, 194},
-                        { 10,  86, 147}, {  2,  56,  94}, {  1,  25,  44},
-                    },
-                    {  // band 3
-                        {102, 226, 250}, { 53, 183, 239}, {  9, 118, 188},
-                        {  2,  78, 131}, {  1,  48,  89}, {  1,  17,  36},
-                    },
-                    {  // band 4
-                        { 86, 235, 252}, { 34, 194, 240}, {  2, 109, 173},
-                        {  1,  68, 118}, {  1,  44,  79}, {  1,   1,  38},
-                    },
-                    {  // band 5
-                        { 59, 236, 243}, { 11, 189, 228}, {  1, 112, 187},
-                        {  1,  88, 145}, {  1,  55,  92}, {  1,   1, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 3
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 41,  40, 104}, { 12,  31,  64}, {  2,  16,  28},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 65,  58, 132}, { 50,  61, 130}, { 40,  57, 116},
-                        { 22,  46,  87}, {  2,  28,  44}, {  1,  11,  17},
-                    },
-                    {  // band 2
-                        { 55, 139, 135}, { 46, 122, 132}, { 21,  89, 110},
-                        {  6,  60,  78}, {  1,  38,  54}, {  1,  17,  27},
-                    },
-                    {  // band 3
-                        { 29, 167, 161}, { 10, 120, 141}, {  1,  69,  98},
-                        {  1,  42,  66}, {  1,  28,  44}, {  1,  15,  24},
-                    },
-                    {  // band 4
-                        { 19, 191, 180}, {  4, 125, 154}, {  1,  70, 107},
-                        {  1,  48,  77}, {  1,  33,  53}, {  1,  17,  28},
-                    },
-                    {  // band 5
-                        { 16, 238, 231}, {  2, 163, 198}, {  1,  85, 134},
-                        {  1,  54,  90}, {  1,  34,  57}, {  1,  17,  29},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 70,  15, 216}, { 40,  18, 164}, { 14,  17,  83},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 25, 150, 200}, {185, 154, 211}, {123, 137, 199},
-                        { 67, 119, 177}, { 31,  96, 137}, { 18,  63,  86},
-                    },
-                    {  // band 2
-                        { 57, 187, 223}, { 35, 148, 207}, {  7, 104, 159},
-                        {  2,  72, 113}, {  1,  44,  71}, {  1,  20,  34},
-                    },
-                    {  // band 3
-                        { 44, 203, 233}, { 18, 157, 212}, {  1,  98, 150},
-                        {  1,  61, 102}, {  1,  38,  62}, {  1,  19,  31},
-                    },
-                    {  // band 4
-                        { 41, 215, 238}, { 11, 166, 215}, {  1,  94, 146},
-                        {  1,  60, 101}, {  1,  37,  63}, {  1,  17,  28},
-                    },
-                    {  // band 5
-                        { 19, 236, 246}, {  3, 188, 223}, {  1,  95, 146},
-                        {  1,  58,  95}, {  1,  34,  56}, {  1,  17,  27},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {146,  27, 156}, { 49,  32, 116}, { 10,  39,  77},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 47, 101, 172}, { 93, 100, 178}, { 58,  91, 165},
-                        { 26,  75, 134}, {  4,  49,  82}, {  2,  22,  33},
-                    },
-                    {  // band 2
-                        { 60, 158, 196}, { 44, 135, 186}, { 25, 106, 157},
-                        {  8,  81, 124}, {  2,  56,  86}, {  1,  28,  45},
-                    },
-                    {  // band 3
-                        { 44, 169, 212}, { 15, 138, 196}, {  2, 100, 157},
-                        {  1,  74, 119}, {  1,  49,  76}, {  1,  20,  34},
-                    },
-                    {  // band 4
-                        { 38, 199, 231}, { 11, 158, 214}, {  1, 111, 167},
-                        {  1,  76, 122}, {  1,  44,  76}, {  1,  17,  39},
-                    },
-                    {  // band 5
-                        { 40, 236, 246}, { 10, 187, 230}, {  1, 115, 175},
-                        {  1,  74, 122}, {  1,  42,  71}, {  1,  14,  59},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {161,  26, 237}, { 65,  46, 209}, { 21,  46, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 87, 229, 245}, {206, 214, 244}, {148, 186, 236},
-                        { 89, 165, 221}, { 41, 132, 186}, { 37,  93, 141},
-                    },
-                    {  // band 2
-                        { 93, 231, 246}, { 47, 181, 231}, {  8, 117, 188},
-                        {  2,  84, 138}, {  1,  43,  87}, {  1,  27,  41},
-                    },
-                    {  // band 3
-                        { 80, 239, 250}, { 28, 190, 236}, {  1, 119, 183},
-                        {  1,  84, 135}, {  1,  81,  69}, {  1, 102,   1},
-                    },
-                    {  // band 4
-                        { 67, 245, 252}, { 22, 206, 242}, {  1, 130, 195},
-                        {  1,  77, 136}, {  1,  35,  88}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 43, 250, 228}, { 31, 185, 204}, {  6, 101, 183},
-                        {  1,  92, 151}, {  1,  84, 137}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#if CONFIG_TX64X64
-        {  // TX_SIZE 4
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 41,  40, 104}, { 12,  31,  64}, {  2,  16,  28},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 65,  58, 132}, { 50,  61, 130}, { 40,  57, 116},
-                        { 22,  46,  87}, {  2,  28,  44}, {  1,  11,  17},
-                    },
-                    {  // band 2
-                        { 55, 139, 135}, { 46, 122, 132}, { 21,  89, 110},
-                        {  6,  60,  78}, {  1,  38,  54}, {  1,  17,  27},
-                    },
-                    {  // band 3
-                        { 29, 167, 161}, { 10, 120, 141}, {  1,  69,  98},
-                        {  1,  42,  66}, {  1,  28,  44}, {  1,  15,  24},
-                    },
-                    {  // band 4
-                        { 19, 191, 180}, {  4, 125, 154}, {  1,  70, 107},
-                        {  1,  48,  77}, {  1,  33,  53}, {  1,  17,  28},
-                    },
-                    {  // band 5
-                        { 16, 238, 231}, {  2, 163, 198}, {  1,  85, 134},
-                        {  1,  54,  90}, {  1,  34,  57}, {  1,  17,  29},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 70,  15, 216}, { 40,  18, 164}, { 14,  17,  83},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 25, 150, 200}, {185, 154, 211}, {123, 137, 199},
-                        { 67, 119, 177}, { 31,  96, 137}, { 18,  63,  86},
-                    },
-                    {  // band 2
-                        { 57, 187, 223}, { 35, 148, 207}, {  7, 104, 159},
-                        {  2,  72, 113}, {  1,  44,  71}, {  1,  20,  34},
-                    },
-                    {  // band 3
-                        { 44, 203, 233}, { 18, 157, 212}, {  1,  98, 150},
-                        {  1,  61, 102}, {  1,  38,  62}, {  1,  19,  31},
-                    },
-                    {  // band 4
-                        { 41, 215, 238}, { 11, 166, 215}, {  1,  94, 146},
-                        {  1,  60, 101}, {  1,  37,  63}, {  1,  17,  28},
-                    },
-                    {  // band 5
-                        { 19, 236, 246}, {  3, 188, 223}, {  1,  95, 146},
-                        {  1,  58,  95}, {  1,  34,  56}, {  1,  17,  27},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {146,  27, 156}, { 49,  32, 116}, { 10,  39,  77},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 47, 101, 172}, { 93, 100, 178}, { 58,  91, 165},
-                        { 26,  75, 134}, {  4,  49,  82}, {  2,  22,  33},
-                    },
-                    {  // band 2
-                        { 60, 158, 196}, { 44, 135, 186}, { 25, 106, 157},
-                        {  8,  81, 124}, {  2,  56,  86}, {  1,  28,  45},
-                    },
-                    {  // band 3
-                        { 44, 169, 212}, { 15, 138, 196}, {  2, 100, 157},
-                        {  1,  74, 119}, {  1,  49,  76}, {  1,  20,  34},
-                    },
-                    {  // band 4
-                        { 38, 199, 231}, { 11, 158, 214}, {  1, 111, 167},
-                        {  1,  76, 122}, {  1,  44,  76}, {  1,  17,  39},
-                    },
-                    {  // band 5
-                        { 40, 236, 246}, { 10, 187, 230}, {  1, 115, 175},
-                        {  1,  74, 122}, {  1,  42,  71}, {  1,  14,  59},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {161,  26, 237}, { 65,  46, 209}, { 21,  46, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 87, 229, 245}, {206, 214, 244}, {148, 186, 236},
-                        { 89, 165, 221}, { 41, 132, 186}, { 37,  93, 141},
-                    },
-                    {  // band 2
-                        { 93, 231, 246}, { 47, 181, 231}, {  8, 117, 188},
-                        {  2,  84, 138}, {  1,  43,  87}, {  1,  27,  41},
-                    },
-                    {  // band 3
-                        { 80, 239, 250}, { 28, 190, 236}, {  1, 119, 183},
-                        {  1,  84, 135}, {  1,  81,  69}, {  1, 102,   1},
-                    },
-                    {  // band 4
-                        { 67, 245, 252}, { 22, 206, 242}, {  1, 130, 195},
-                        {  1,  77, 136}, {  1,  35,  88}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 43, 250, 228}, { 31, 185, 204}, {  6, 101, 183},
-                        {  1,  92, 151}, {  1,  84, 137}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif  // CONFIG_TX64X64
-    },
-    {  // Q_Index 2
-#if CONFIG_CHROMA_2X2
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {181,  22, 175}, { 96,  37, 147}, { 35,  41, 105},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 80,  95, 197}, {111,  92, 193}, { 59,  87, 175},
-                        { 29,  79, 150}, { 10,  65, 118}, {  2,  47,  82},
-                    },
-                    {  // band 2
-                        { 90, 141, 216}, { 77, 120, 210}, { 23,  95, 184},
-                        { 11,  81, 151}, {  6,  75, 130}, {  2,  58, 113},
-                    },
-                    {  // band 3
-                        {122, 167, 231}, { 66, 119, 225}, { 26,  87, 189},
-                        {  7,  76, 151}, {  2,  63, 125}, {  1,  59,  77},
-                    },
-                    {  // band 4
-                        {162, 147, 244}, {110,  97, 236}, { 32,  88, 204},
-                        { 11,  89, 174}, {  5,  78, 151}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {205,  59, 251}, {176,  68, 248}, { 90,  71, 223},
-                        { 49,  72, 188}, { 17,  74, 203}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {188,  70, 207}, {140,  73, 189}, { 85,  73, 163},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 59, 144, 239}, { 79, 126, 237}, { 31, 102, 202},
-                        { 10,  81, 153}, {  3,  56, 102}, {  2,  33,  59},
-                    },
-                    {  // band 2
-                        {100, 152, 243}, { 80, 129, 236}, { 14,  94, 194},
-                        {  4,  72, 150}, {  1,  50, 103}, {  1,  35,  60},
-                    },
-                    {  // band 3
-                        {130, 183, 247}, { 70, 139, 242}, { 19, 100, 203},
-                        {  4,  83, 159}, {  1,  59, 119}, {  1,  44,  72},
-                    },
-                    {  // band 4
-                        {197, 138, 252}, {135, 107, 247}, { 31,  86, 210},
-                        {  7,  74, 160}, {  1,  53, 107}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {229,  54, 254}, {200,  51, 251}, { 83,  61, 226},
-                        { 33,  55, 177}, { 12,  74, 145}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {229,  20, 235}, {183,  37, 221}, {127,  47, 198},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {188, 115, 251}, {208, 110, 250}, {101,  99, 235},
-                        { 38,  81, 197}, {  9,  56, 132}, {  9,  52,  63},
-                    },
-                    {  // band 2
-                        {189, 150, 252}, {186, 137, 251}, { 54, 107, 236},
-                        { 14,  90, 195}, {  1,  89, 104}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {209, 180, 254}, {142, 145, 253}, { 51, 130, 236},
-                        {  6, 128, 214}, {  1, 128, 254}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 140, 254}, {194, 128, 254}, { 75, 119, 233},
-                        {128,  23, 230}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {244,  59, 254}, {239,  81, 254}, {128,  85, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {246,  55, 247}, {197,  64, 235}, {141,  74, 218},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {178, 163, 254}, {192, 138, 252}, { 85, 103, 231},
-                        { 49,  81, 179}, { 32,  54, 133}, { 12,  26,  98},
-                    },
-                    {  // band 2
-                        {189, 173, 254}, {179, 150, 253}, { 60,  94, 237},
-                        { 34,  81, 198}, { 20,  53, 187}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {202, 191, 254}, {157, 160, 254}, { 57, 117, 240},
-                        { 28, 105, 211}, {  1, 128,   1}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 146, 254}, {208, 133, 254}, { 66,  78, 233},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {246,  49, 254}, {246,  63, 254}, { 85, 142, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {181,  22, 175}, { 96,  37, 147}, { 35,  41, 105},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 80,  95, 197}, {111,  92, 193}, { 59,  87, 175},
-                        { 29,  79, 150}, { 10,  65, 118}, {  2,  47,  82},
-                    },
-                    {  // band 2
-                        { 90, 141, 216}, { 77, 120, 210}, { 23,  95, 184},
-                        { 11,  81, 151}, {  6,  75, 130}, {  2,  58, 113},
-                    },
-                    {  // band 3
-                        {122, 167, 231}, { 66, 119, 225}, { 26,  87, 189},
-                        {  7,  76, 151}, {  2,  63, 125}, {  1,  59,  77},
-                    },
-                    {  // band 4
-                        {162, 147, 244}, {110,  97, 236}, { 32,  88, 204},
-                        { 11,  89, 174}, {  5,  78, 151}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {205,  59, 251}, {176,  68, 248}, { 90,  71, 223},
-                        { 49,  72, 188}, { 17,  74, 203}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {188,  70, 207}, {140,  73, 189}, { 85,  73, 163},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 59, 144, 239}, { 79, 126, 237}, { 31, 102, 202},
-                        { 10,  81, 153}, {  3,  56, 102}, {  2,  33,  59},
-                    },
-                    {  // band 2
-                        {100, 152, 243}, { 80, 129, 236}, { 14,  94, 194},
-                        {  4,  72, 150}, {  1,  50, 103}, {  1,  35,  60},
-                    },
-                    {  // band 3
-                        {130, 183, 247}, { 70, 139, 242}, { 19, 100, 203},
-                        {  4,  83, 159}, {  1,  59, 119}, {  1,  44,  72},
-                    },
-                    {  // band 4
-                        {197, 138, 252}, {135, 107, 247}, { 31,  86, 210},
-                        {  7,  74, 160}, {  1,  53, 107}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {229,  54, 254}, {200,  51, 251}, { 83,  61, 226},
-                        { 33,  55, 177}, { 12,  74, 145}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {229,  20, 235}, {183,  37, 221}, {127,  47, 198},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {188, 115, 251}, {208, 110, 250}, {101,  99, 235},
-                        { 38,  81, 197}, {  9,  56, 132}, {  9,  52,  63},
-                    },
-                    {  // band 2
-                        {189, 150, 252}, {186, 137, 251}, { 54, 107, 236},
-                        { 14,  90, 195}, {  1,  89, 104}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {209, 180, 254}, {142, 145, 253}, { 51, 130, 236},
-                        {  6, 128, 214}, {  1, 128, 254}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 140, 254}, {194, 128, 254}, { 75, 119, 233},
-                        {128,  23, 230}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {244,  59, 254}, {239,  81, 254}, {128,  85, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {246,  55, 247}, {197,  64, 235}, {141,  74, 218},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {178, 163, 254}, {192, 138, 252}, { 85, 103, 231},
-                        { 49,  81, 179}, { 32,  54, 133}, { 12,  26,  98},
-                    },
-                    {  // band 2
-                        {189, 173, 254}, {179, 150, 253}, { 60,  94, 237},
-                        { 34,  81, 198}, { 20,  53, 187}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {202, 191, 254}, {157, 160, 254}, { 57, 117, 240},
-                        { 28, 105, 211}, {  1, 128,   1}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 146, 254}, {208, 133, 254}, { 66,  78, 233},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {246,  49, 254}, {246,  63, 254}, { 85, 142, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 1
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 45,  28, 124}, { 23,  35, 107}, { 10,  34,  78},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 53,  99, 177}, { 82,  96, 174}, { 46,  89, 158},
-                        { 21,  76, 133}, {  6,  56,  94}, {  1,  33,  54},
-                    },
-                    {  // band 2
-                        { 68, 147, 201}, { 42, 124, 195}, { 17,  98, 166},
-                        {  7,  75, 131}, {  2,  53,  93}, {  1,  33,  59},
-                    },
-                    {  // band 3
-                        { 65, 176, 217}, { 30, 137, 206}, {  6,  97, 167},
-                        {  2,  70, 128}, {  1,  47,  88}, {  1,  29,  46},
-                    },
-                    {  // band 4
-                        { 69, 195, 232}, { 24, 146, 218}, {  4, 100, 175},
-                        {  2,  72, 134}, {  1,  51,  93}, {  1,  29,  52},
-                    },
-                    {  // band 5
-                        { 96, 212, 246}, { 39, 158, 234}, {  6, 109, 192},
-                        {  2,  77, 144}, {  1,  50,  95}, {  1,  20,  45},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 71,  80, 213}, { 53,  73, 181}, { 25,  66, 141},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 35, 168, 231}, { 91, 150, 229}, { 49, 122, 202},
-                        { 22,  97, 162}, { 10,  68, 108}, {  9,  48,  57},
-                    },
-                    {  // band 2
-                        { 56, 178, 236}, { 32, 148, 225}, {  9,  99, 176},
-                        {  4,  69, 127}, {  2,  44,  78}, {  1,  25,  41},
-                    },
-                    {  // band 3
-                        { 57, 191, 242}, { 27, 155, 230}, {  5, 102, 180},
-                        {  2,  71, 133}, {  1,  44,  78}, {  1,  27,  41},
-                    },
-                    {  // band 4
-                        { 67, 201, 247}, { 24, 162, 237}, {  3, 106, 188},
-                        {  3,  74, 137}, {  1,  46,  85}, {  1,  34,  48},
-                    },
-                    {  // band 5
-                        {111, 210, 251}, { 47, 166, 244}, {  3, 113, 199},
-                        {  2,  77, 146}, {  1,  48,  93}, {  1,  38,  22},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {206,  21, 221}, {150,  36, 195}, { 94,  44, 164},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {147, 128, 239}, {194, 122, 238}, { 95, 104, 220},
-                        { 39,  81, 183}, { 13,  53, 111}, {  3,  24,  49},
-                    },
-                    {  // band 2
-                        {164, 163, 244}, {106, 142, 239}, { 50, 112, 215},
-                        { 26,  90, 177}, { 12,  67, 130}, {  1,   1,  64},
-                    },
-                    {  // band 3
-                        {155, 193, 249}, { 88, 158, 244}, { 26, 124, 220},
-                        { 10,  98, 173}, {  1,  77, 126}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {141, 205, 252}, { 64, 174, 248}, { 17, 124, 221},
-                        { 12,  92, 176}, {  1,  29, 148}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {150, 217, 254}, { 74, 191, 252}, { 30, 144, 215},
-                        {  1, 106, 137}, {128,   1, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {241,  37, 242}, {175,  48, 223}, { 99,  53, 189},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {153, 183, 248}, {212, 156, 247}, {134, 124, 221},
-                        { 88, 103, 184}, { 59,  86, 132}, { 29,  61,  67},
-                    },
-                    {  // band 2
-                        {162, 199, 250}, {106, 167, 247}, { 56, 110, 207},
-                        { 32,  85, 165}, { 16,  71, 130}, {  1,  93, 254},
-                    },
-                    {  // band 3
-                        {143, 213, 252}, { 86, 187, 250}, { 23, 124, 220},
-                        {  7,  95, 176}, {  1, 109, 102}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {130, 219, 254}, { 70, 201, 253}, { 15, 128, 215},
-                        {  1, 101, 201}, {  1,  64, 170}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {155, 219, 254}, {105, 207, 254}, { 28, 155, 229},
-                        {  1, 153, 191}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 2
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 18,  26, 117}, { 10,  29,  82}, {  3,  25,  52},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 35,  88, 152}, { 62,  85, 150}, { 36,  77, 137},
-                        { 16,  66, 116}, {  4,  47,  81}, {  1,  26,  44},
-                    },
-                    {  // band 2
-                        { 55, 141, 182}, { 32, 119, 177}, { 12,  93, 154},
-                        {  4,  71, 123}, {  1,  51,  89}, {  1,  32,  56},
-                    },
-                    {  // band 3
-                        { 46, 171, 202}, { 21, 130, 191}, {  5,  91, 154},
-                        {  1,  64, 115}, {  1,  42,  77}, {  1,  25,  41},
-                    },
-                    {  // band 4
-                        { 43, 195, 219}, { 12, 142, 203}, {  1,  91, 156},
-                        {  1,  63, 115}, {  1,  41,  77}, {  1,  22,  43},
-                    },
-                    {  // band 5
-                        { 42, 221, 238}, {  8, 162, 219}, {  1,  98, 167},
-                        {  1,  67, 123}, {  1,  43,  83}, {  1,  25,  38},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 16,  51, 216}, { 20,  48, 168}, {  9,  44, 109},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 34, 164, 226}, {124, 148, 226}, { 72, 127, 207},
-                        { 36, 107, 175}, { 15,  81, 129}, {  6,  51,  79},
-                    },
-                    {  // band 2
-                        { 61, 182, 234}, { 35, 148, 220}, {  9, 101, 178},
-                        {  4,  71, 134}, {  1,  46,  90}, {  1,  24,  51},
-                    },
-                    {  // band 3
-                        { 54, 198, 239}, { 25, 156, 224}, {  3,  98, 173},
-                        {  1,  66, 124}, {  1,  41,  78}, {  1,  15,  37},
-                    },
-                    {  // band 4
-                        { 48, 209, 242}, { 12, 162, 226}, {  1,  96, 169},
-                        {  1,  63, 119}, {  1,  40,  78}, {  1,  18,  45},
-                    },
-                    {  // band 5
-                        { 44, 223, 247}, {  6, 173, 232}, {  1, 105, 178},
-                        {  1,  71, 131}, {  1,  44,  84}, {  1,  13,  46},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {188,  26, 214}, {121,  42, 181}, { 66,  49, 149},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {136, 128, 233}, {172, 124, 230}, { 80, 106, 211},
-                        { 27,  81, 174}, {  6,  49,  98}, {  8,  28,  49},
-                    },
-                    {  // band 2
-                        {145, 166, 239}, { 92, 141, 229}, { 28, 108, 196},
-                        {  8,  87, 154}, {  1,  58, 105}, {  1,  27,  59},
-                    },
-                    {  // band 3
-                        {131, 193, 242}, { 66, 151, 231}, { 13, 112, 192},
-                        {  2,  81, 152}, {  1,  66, 121}, {  1,  23,  64},
-                    },
-                    {  // band 4
-                        {112, 211, 246}, { 41, 164, 235}, {  5, 117, 202},
-                        {  1,  83, 162}, {  1,  64, 111}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 96, 230, 250}, { 28, 185, 243}, {  2, 132, 204},
-                        {  1,  91, 166}, {  1,  85,  46}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {238,  23, 242}, {157,  29, 215}, { 73,  27, 162},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {165, 173, 250}, {222, 151, 247}, {152, 134, 235},
-                        {114, 120, 210}, { 86, 109, 176}, { 53,  88, 145},
-                    },
-                    {  // band 2
-                        {164, 194, 249}, {100, 158, 241}, { 35, 111, 212},
-                        { 17,  85, 167}, {  1,  52, 112}, {  1,  73,   1},
-                    },
-                    {  // band 3
-                        {151, 215, 252}, { 83, 172, 245}, { 16, 122, 208},
-                        {  6, 101, 165}, {  1,  74, 113}, {  1,   1,   1},
-                    },
-                    {  // band 4
-                        {138, 230, 253}, { 65, 184, 248}, {  8, 128, 212},
-                        {  1, 111, 182}, {128,   1,   1}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {123, 240, 253}, { 36, 201, 250}, {  3, 127, 211},
-                        {  1,  68, 204}, {128,   1,   1}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 3
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 51,  21, 156}, { 30,  23,  86}, {  4,  18,  37},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 38,  77, 129}, { 79,  76, 129}, { 40,  66, 117},
-                        { 12,  54,  95}, {  1,  36,  60}, {  1,  17,  29},
-                    },
-                    {  // band 2
-                        { 44, 133, 149}, { 24, 107, 143}, {  8,  78, 121},
-                        {  3,  59,  97}, {  1,  42,  71}, {  1,  22,  37},
-                    },
-                    {  // band 3
-                        { 29, 160, 171}, {  9, 114, 158}, {  1,  76, 125},
-                        {  1,  54,  93}, {  1,  36,  63}, {  1,  20,  35},
-                    },
-                    {  // band 4
-                        { 22, 188, 205}, {  6, 132, 186}, {  1,  87, 144},
-                        {  1,  62, 107}, {  1,  41,  72}, {  1,  23,  41},
-                    },
-                    {  // band 5
-                        { 25, 233, 236}, {  5, 165, 214}, {  1,  96, 158},
-                        {  1,  63, 112}, {  1,  40,  73}, {  1,  23,  40},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 48,  20, 231}, { 37,  21, 179}, { 15,  18, 109},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 41, 154, 216}, {196, 142, 221}, {131, 125, 207},
-                        { 84, 111, 181}, { 45,  91, 142}, { 27,  62,  89},
-                    },
-                    {  // band 2
-                        { 72, 181, 230}, { 41, 147, 215}, { 10, 102, 173},
-                        {  3,  73, 132}, {  1,  47,  89}, {  1,  23,  50},
-                    },
-                    {  // band 3
-                        { 60, 201, 236}, { 23, 157, 219}, {  2,  99, 167},
-                        {  1,  69, 124}, {  1,  43,  80}, {  1,  22,  39},
-                    },
-                    {  // band 4
-                        { 53, 214, 242}, { 15, 165, 224}, {  1, 101, 173},
-                        {  1,  70, 131}, {  1,  44,  83}, {  1,  23,  49},
-                    },
-                    {  // band 5
-                        { 39, 239, 248}, {  7, 186, 233}, {  1, 108, 174},
-                        {  1,  70, 123}, {  1,  43,  77}, {  1,  16,  42},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {161,  26, 204}, { 77,  40, 160}, { 26,  50, 117},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 80, 140, 218}, {136, 133, 215}, { 63, 117, 197},
-                        { 20,  93, 170}, {  7,  55, 102}, { 13,  32,  52},
-                    },
-                    {  // band 2
-                        { 86, 173, 231}, { 46, 150, 220}, { 18, 118, 190},
-                        {  8,  90, 150}, {  2,  60,  95}, {  1,  39,  41},
-                    },
-                    {  // band 3
-                        { 80, 183, 242}, { 37, 160, 231}, {  6, 120, 182},
-                        {  1,  86, 137}, {  1,  46,  78}, {  1,  15,  24},
-                    },
-                    {  // band 4
-                        { 88, 215, 247}, { 42, 179, 235}, {  4, 116, 182},
-                        {  2,  80, 133}, {  1,  46,  85}, {  1,  64,  43},
-                    },
-                    {  // band 5
-                        {100, 236, 250}, { 31, 186, 234}, {  1, 114, 181},
-                        {  1,  85, 135}, {  1,  78,  64}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {213,  13, 245}, {106,  16, 211}, { 32,  11, 156},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {140, 214, 247}, {241, 186, 243}, {177, 172, 235},
-                        {128, 156, 219}, {106, 130, 191}, { 99, 105, 152},
-                    },
-                    {  // band 2
-                        {125, 218, 248}, { 75, 167, 239}, { 29, 111, 212},
-                        {  6,  66, 152}, {  1,  42,  96}, {  1,  85, 128},
-                    },
-                    {  // band 3
-                        {120, 232, 252}, { 60, 189, 247}, {  8, 141, 200},
-                        {  1,  89, 134}, {  1,  32, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {111, 238, 253}, { 56, 198, 245}, {  1, 123, 208},
-                        {  1,  93, 176}, {  1,   1,  73}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 98, 251, 249}, { 56, 189, 244}, { 17, 113, 220},
-                        {  1, 109, 179}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#if CONFIG_TX64X64
-        {  // TX_SIZE 4
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 51,  21, 156}, { 30,  23,  86}, {  4,  18,  37},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 38,  77, 129}, { 79,  76, 129}, { 40,  66, 117},
-                        { 12,  54,  95}, {  1,  36,  60}, {  1,  17,  29},
-                    },
-                    {  // band 2
-                        { 44, 133, 149}, { 24, 107, 143}, {  8,  78, 121},
-                        {  3,  59,  97}, {  1,  42,  71}, {  1,  22,  37},
-                    },
-                    {  // band 3
-                        { 29, 160, 171}, {  9, 114, 158}, {  1,  76, 125},
-                        {  1,  54,  93}, {  1,  36,  63}, {  1,  20,  35},
-                    },
-                    {  // band 4
-                        { 22, 188, 205}, {  6, 132, 186}, {  1,  87, 144},
-                        {  1,  62, 107}, {  1,  41,  72}, {  1,  23,  41},
-                    },
-                    {  // band 5
-                        { 25, 233, 236}, {  5, 165, 214}, {  1,  96, 158},
-                        {  1,  63, 112}, {  1,  40,  73}, {  1,  23,  40},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 48,  20, 231}, { 37,  21, 179}, { 15,  18, 109},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 41, 154, 216}, {196, 142, 221}, {131, 125, 207},
-                        { 84, 111, 181}, { 45,  91, 142}, { 27,  62,  89},
-                    },
-                    {  // band 2
-                        { 72, 181, 230}, { 41, 147, 215}, { 10, 102, 173},
-                        {  3,  73, 132}, {  1,  47,  89}, {  1,  23,  50},
-                    },
-                    {  // band 3
-                        { 60, 201, 236}, { 23, 157, 219}, {  2,  99, 167},
-                        {  1,  69, 124}, {  1,  43,  80}, {  1,  22,  39},
-                    },
-                    {  // band 4
-                        { 53, 214, 242}, { 15, 165, 224}, {  1, 101, 173},
-                        {  1,  70, 131}, {  1,  44,  83}, {  1,  23,  49},
-                    },
-                    {  // band 5
-                        { 39, 239, 248}, {  7, 186, 233}, {  1, 108, 174},
-                        {  1,  70, 123}, {  1,  43,  77}, {  1,  16,  42},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {161,  26, 204}, { 77,  40, 160}, { 26,  50, 117},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 80, 140, 218}, {136, 133, 215}, { 63, 117, 197},
-                        { 20,  93, 170}, {  7,  55, 102}, { 13,  32,  52},
-                    },
-                    {  // band 2
-                        { 86, 173, 231}, { 46, 150, 220}, { 18, 118, 190},
-                        {  8,  90, 150}, {  2,  60,  95}, {  1,  39,  41},
-                    },
-                    {  // band 3
-                        { 80, 183, 242}, { 37, 160, 231}, {  6, 120, 182},
-                        {  1,  86, 137}, {  1,  46,  78}, {  1,  15,  24},
-                    },
-                    {  // band 4
-                        { 88, 215, 247}, { 42, 179, 235}, {  4, 116, 182},
-                        {  2,  80, 133}, {  1,  46,  85}, {  1,  64,  43},
-                    },
-                    {  // band 5
-                        {100, 236, 250}, { 31, 186, 234}, {  1, 114, 181},
-                        {  1,  85, 135}, {  1,  78,  64}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {213,  13, 245}, {106,  16, 211}, { 32,  11, 156},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {140, 214, 247}, {241, 186, 243}, {177, 172, 235},
-                        {128, 156, 219}, {106, 130, 191}, { 99, 105, 152},
-                    },
-                    {  // band 2
-                        {125, 218, 248}, { 75, 167, 239}, { 29, 111, 212},
-                        {  6,  66, 152}, {  1,  42,  96}, {  1,  85, 128},
-                    },
-                    {  // band 3
-                        {120, 232, 252}, { 60, 189, 247}, {  8, 141, 200},
-                        {  1,  89, 134}, {  1,  32, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {111, 238, 253}, { 56, 198, 245}, {  1, 123, 208},
-                        {  1,  93, 176}, {  1,   1,  73}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 98, 251, 249}, { 56, 189, 244}, { 17, 113, 220},
-                        {  1, 109, 179}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif  // CONFIG_TX64X64
-    },
-    {  // Q_Index 3
-#if CONFIG_CHROMA_2X2
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {186,  16, 200}, {122,  31, 187}, { 78,  40, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {146, 119, 245}, {182, 115, 244}, {130, 113, 238},
-                        { 88, 110, 225}, { 47, 103, 208}, {  5, 102, 188},
-                    },
-                    {  // band 2
-                        {164, 157, 248}, {155, 141, 250}, { 71, 116, 243},
-                        { 88, 129, 233}, { 50,  99, 228}, { 26, 148, 191},
-                    },
-                    {  // band 3
-                        {200, 158, 253}, {177, 118, 252}, { 99, 113, 245},
-                        { 77, 120, 210}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 104, 254}, {209,  82, 254}, {143, 112, 252},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {250,  36, 254}, {243,  55, 254}, {223, 170, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {207,  37, 226}, {164,  46, 218}, {122,  58, 201},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {149, 154, 253}, {170, 137, 253}, { 94, 123, 247},
-                        { 42, 113, 222}, { 16,  97, 174}, { 49,  98, 159},
-                    },
-                    {  // band 2
-                        {177, 162, 253}, {165, 142, 252}, { 51, 108, 243},
-                        { 18, 108, 213}, {  1,  98, 254}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {211, 152, 254}, {184, 116, 254}, { 70, 110, 244},
-                        {  8, 108, 237}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {236,  89, 254}, {210,  67, 254}, {112, 111, 248},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {246,  26, 254}, {233,  35, 254}, {128,   1, 254},
-                        {254, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {247,   2, 247}, {226,   8, 242}, {191,  14, 235},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {231,  94, 254}, {248,  91, 254}, {186,  89, 252},
-                        {128,  92, 244}, { 79, 112, 254}, {128, 128, 128},
-                    },
-                    {  // band 2
-                        {228, 145, 253}, {240, 130, 254}, {223, 105, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {245, 153, 253}, {240, 120, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {254, 128, 254}, {204, 128, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {253,   7, 249}, {224,   9, 244}, {182,  13, 231},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {234, 109, 254}, {242, 104, 254}, {160,  98, 254},
-                        {123,  85, 243}, { 82,  43, 217}, {128, 128, 128},
-                    },
-                    {  // band 2
-                        {243, 137, 254}, {240, 118, 254}, {136,  53, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {251, 173, 254}, {229, 129, 250}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {254, 119, 254}, {254, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif
-        {  // TX_SIZE 0
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        {186,  16, 200}, {122,  31, 187}, { 78,  40, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {146, 119, 245}, {182, 115, 244}, {130, 113, 238},
-                        { 88, 110, 225}, { 47, 103, 208}, {  5, 102, 188},
-                    },
-                    {  // band 2
-                        {164, 157, 248}, {155, 141, 250}, { 71, 116, 243},
-                        { 88, 129, 233}, { 50,  99, 228}, { 26, 148, 191},
-                    },
-                    {  // band 3
-                        {200, 158, 253}, {177, 118, 252}, { 99, 113, 245},
-                        { 77, 120, 210}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {231, 104, 254}, {209,  82, 254}, {143, 112, 252},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {250,  36, 254}, {243,  55, 254}, {223, 170, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        {207,  37, 226}, {164,  46, 218}, {122,  58, 201},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {149, 154, 253}, {170, 137, 253}, { 94, 123, 247},
-                        { 42, 113, 222}, { 16,  97, 174}, { 49,  98, 159},
-                    },
-                    {  // band 2
-                        {177, 162, 253}, {165, 142, 252}, { 51, 108, 243},
-                        { 18, 108, 213}, {  1,  98, 254}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {211, 152, 254}, {184, 116, 254}, { 70, 110, 244},
-                        {  8, 108, 237}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {236,  89, 254}, {210,  67, 254}, {112, 111, 248},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {246,  26, 254}, {233,  35, 254}, {128,   1, 254},
-                        {254, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {247,   2, 247}, {226,   8, 242}, {191,  14, 235},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {231,  94, 254}, {248,  91, 254}, {186,  89, 252},
-                        {128,  92, 244}, { 79, 112, 254}, {128, 128, 128},
-                    },
-                    {  // band 2
-                        {228, 145, 253}, {240, 130, 254}, {223, 105, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {245, 153, 253}, {240, 120, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {254, 128, 254}, {204, 128, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {253,   7, 249}, {224,   9, 244}, {182,  13, 231},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {234, 109, 254}, {242, 104, 254}, {160,  98, 254},
-                        {123,  85, 243}, { 82,  43, 217}, {128, 128, 128},
-                    },
-                    {  // band 2
-                        {243, 137, 254}, {240, 118, 254}, {136,  53, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {251, 173, 254}, {229, 129, 250}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {254, 119, 254}, {254, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 1
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 49,  26, 159}, { 36,  34, 150}, { 26,  38, 124},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 99, 122, 226}, {143, 119, 225}, { 90, 113, 213},
-                        { 46, 102, 193}, { 14,  84, 157}, {  3,  59, 107},
-                    },
-                    {  // band 2
-                        {109, 164, 237}, { 74, 142, 233}, { 29, 112, 216},
-                        { 14,  92, 184}, { 10,  80, 156}, {  1,  52, 137},
-                    },
-                    {  // band 3
-                        {110, 191, 245}, { 59, 156, 240}, { 18, 121, 220},
-                        {  8,  97, 184}, {  3,  84, 150}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {115, 203, 250}, { 59, 167, 246}, { 16, 130, 226},
-                        {  7,  97, 192}, {  1,  71,  99}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {149, 218, 253}, { 93, 171, 251}, { 28, 125, 233},
-                        { 28,  99, 192}, {128,  85,  85}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 97,  45, 229}, { 79,  52, 205}, { 46,  58, 171},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 99, 180, 249}, {156, 165, 249}, { 73, 141, 237},
-                        { 31, 116, 208}, { 13,  81, 153}, {  5,  42,  86},
-                    },
-                    {  // band 2
-                        {113, 188, 251}, { 68, 161, 244}, { 16, 108, 216},
-                        {  6,  81, 168}, {  2,  65, 118}, {128,   1,   1},
-                    },
-                    {  // band 3
-                        {117, 201, 252}, { 62, 171, 248}, { 12, 119, 221},
-                        {  5,  90, 182}, {  4,  66, 116}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {128, 207, 253}, { 70, 176, 251}, { 11, 126, 228},
-                        {  6,  89, 189}, {  1,  44, 148}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {162, 218, 254}, {107, 170, 253}, { 22, 131, 238},
-                        {  1,  77, 182}, {  1, 254, 128}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {235,   5, 238}, {194,  14, 223}, {152,  22, 205},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {200, 121, 251}, {241, 115, 252}, {167, 108, 248},
-                        { 93,  93, 233}, { 36,  66, 189}, {128, 128, 128},
-                    },
-                    {  // band 2
-                        {220, 151, 253}, {176, 135, 252}, { 95, 124, 254},
-                        { 64, 105, 217}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {225, 189, 254}, {175, 155, 254}, {102, 119, 254},
-                        {  1,   1,   1}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {218, 195, 254}, {125, 157, 253}, {128, 128, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {221, 197, 254}, { 85, 210, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {250,   9, 246}, {204,  13, 234}, {144,  18, 211},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {213, 157, 253}, {243, 138, 253}, {170, 117, 250},
-                        {109,  91, 233}, { 66,  77, 163}, { 64,  85, 254},
-                    },
-                    {  // band 2
-                        {221, 169, 254}, {182, 141, 253}, {112, 120, 239},
-                        { 85, 165, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {226, 192, 254}, {189, 174, 251}, {153, 128, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {232, 192, 254}, {195, 187, 247}, {  1, 191, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {247, 185, 254}, {254,  93, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 2
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 14,  30, 136}, { 15,  33, 120}, { 10,  33,  90},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 92, 109, 209}, {113, 108, 207}, { 77, 102, 193},
-                        { 39,  91, 171}, { 11,  70, 129}, {  2,  44,  77},
-                    },
-                    {  // band 2
-                        { 99, 158, 223}, { 66, 135, 217}, { 23, 109, 194},
-                        {  9,  85, 160}, {  3,  66, 124}, {  1,  51, 100},
-                    },
-                    {  // band 3
-                        { 89, 189, 234}, { 46, 149, 225}, { 10, 110, 194},
-                        {  2,  83, 156}, {  1,  57, 113}, {  1,  47,  73},
-                    },
-                    {  // band 4
-                        { 78, 206, 242}, { 28, 161, 232}, {  3, 114, 200},
-                        {  1,  86, 161}, {  1,  62, 118}, {  1,   1,   1},
-                    },
-                    {  // band 5
-                        { 72, 227, 250}, { 20, 182, 242}, {  3, 126, 210},
-                        {  2,  91, 166}, {  1,  64, 126}, {128, 128, 128},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 23,  42, 227}, { 41,  43, 195}, { 25,  45, 146},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {100, 172, 245}, {165, 158, 246}, { 88, 137, 234},
-                        { 44, 116, 203}, { 18,  85, 149}, {  7,  56,  92},
-                    },
-                    {  // band 2
-                        {117, 188, 247}, { 70, 155, 239}, { 18, 105, 204},
-                        {  7,  78, 158}, {  2,  50, 111}, {  1,  38,  77},
-                    },
-                    {  // band 3
-                        {104, 207, 250}, { 54, 166, 241}, {  6, 110, 199},
-                        {  1,  78, 155}, {  1,  45, 100}, {  1,   1,   1},
-                    },
-                    {  // band 4
-                        { 87, 216, 251}, { 30, 177, 243}, {  1, 114, 203},
-                        {  1,  85, 157}, {  1,  53, 108}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        { 80, 230, 253}, { 23, 193, 248}, {  1, 127, 215},
-                        {  1,  94, 170}, {  1,  71,  59}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {222,   9, 234}, {161,  20, 210}, {113,  30, 185},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {195, 120, 248}, {231, 124, 247}, {148, 116, 238},
-                        { 64,  98, 207}, { 20,  70, 147}, { 87,  68, 100},
-                    },
-                    {  // band 2
-                        {186, 161, 250}, {124, 148, 245}, { 44, 123, 230},
-                        { 23, 107, 205}, {  1,  80, 131}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {172, 196, 252}, {110, 160, 248}, { 37, 134, 235},
-                        { 23, 125, 200}, {128, 254, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {173, 209, 253}, {103, 175, 250}, {  1, 120, 240},
-                        {  1, 146, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {184, 235, 254}, { 81, 186, 251}, {128, 109, 254},
-                        {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {248,   8, 243}, {185,  11, 225}, {108,  11, 189},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {208, 158, 254}, {244, 147, 252}, {195, 132, 248},
-                        {161, 122, 224}, {129, 114, 188}, { 59, 119, 159},
-                    },
-                    {  // band 2
-                        {202, 182, 253}, {143, 161, 251}, { 73, 115, 247},
-                        {146, 175, 204}, {128,   1, 254}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {202, 204, 254}, {131, 174, 251}, { 18, 153, 207},
-                        {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {192, 221, 254}, {114, 190, 254}, {128, 170, 254},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {166, 236, 254}, {119, 200, 254}, {128, 128, 128},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-        {  // TX_SIZE 3
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 30,  32, 144}, { 21,  35,  96}, {  4,  27,  55},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 35, 107, 172}, { 61, 104, 170}, { 33,  94, 160},
-                        { 13,  80, 139}, {  2,  55,  97}, {  1,  28,  49},
-                    },
-                    {  // band 2
-                        { 51, 153, 195}, { 29, 129, 189}, {  9,  99, 163},
-                        {  3,  75, 129}, {  1,  49,  88}, {  1,  29,  50},
-                    },
-                    {  // band 3
-                        { 53, 164, 210}, { 21, 134, 201}, {  3,  97, 164},
-                        {  1,  69, 124}, {  1,  45,  82}, {  1,  31,  58},
-                    },
-                    {  // band 4
-                        { 47, 205, 234}, { 18, 158, 220}, {  2, 109, 177},
-                        {  1,  78, 137}, {  1,  53, 101}, {  1,  34,  70},
-                    },
-                    {  // band 5
-                        { 55, 233, 245}, { 16, 179, 233}, {  1, 116, 191},
-                        {  1,  79, 145}, {  1,  53, 101}, {  1,  37,  58},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 36,  33, 227}, { 39,  28, 190}, { 18,  27, 134},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 76, 156, 235}, {184, 147, 235}, {114, 130, 220},
-                        { 72, 112, 191}, { 42,  87, 144}, { 21,  65,  93},
-                    },
-                    {  // band 2
-                        { 96, 179, 240}, { 51, 149, 228}, { 12, 105, 191},
-                        {  6,  74, 148}, {  1,  47, 100}, {  1,  29,  53},
-                    },
-                    {  // band 3
-                        { 88, 191, 242}, { 35, 154, 231}, {  3, 106, 187},
-                        {  1,  74, 140}, {  1,  41,  84}, {  1,  25,  38},
-                    },
-                    {  // band 4
-                        { 77, 212, 249}, { 28, 171, 239}, {  2, 117, 199},
-                        {  1,  79, 151}, {  1,  45,  99}, {  1,   1,   1},
-                    },
-                    {  // band 5
-                        { 77, 236, 252}, { 27, 190, 246}, {  2, 120, 203},
-                        {  1,  78, 147}, {  1,  42,  72}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {185,  11, 227}, {113,  30, 182}, { 57,  44, 144},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {151, 139, 244}, {212, 139, 241}, {124, 126, 231},
-                        { 59, 104, 213}, { 26,  73, 158}, { 20,  45,  95},
-                    },
-                    {  // band 2
-                        {155, 163, 247}, {108, 152, 239}, { 39, 124, 214},
-                        {  7, 109, 162}, { 29,  57, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {158, 176, 250}, { 89, 164, 243}, { 11, 114, 196},
-                        {  1,  96, 141}, {  1,  81, 118}, {128,   1,   1},
-                    },
-                    {  // band 4
-                        {148, 212, 251}, { 59, 174, 240}, {  2, 130, 203},
-                        {  1,  70, 168}, {  1,  51, 106}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {104, 237, 252}, { 39, 190, 246}, {  1, 154, 220},
-                        {128, 102,   1}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {236,   6, 242}, {111,   6, 206}, { 36,   5, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {193, 193, 252}, {248, 182, 251}, {218, 150, 246},
-                        {182, 134, 244}, {151, 137, 227}, { 45, 102, 195},
-                    },
-                    {  // band 2
-                        {188, 202, 251}, {125, 165, 249}, { 64,  75, 218},
-                        {  1, 128, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {178, 225, 254}, {107, 188, 231}, { 21, 135, 233},
-                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {164, 227, 253}, { 55, 193, 251}, {  1, 111, 225},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {151, 243, 254}, { 50, 203, 254}, {128, 179, 254},
-                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#if CONFIG_TX64X64
-        {  // TX_SIZE 4
-            {  // Y plane
-                {  // Intra
-                    {  // band 0
-                        { 30,  32, 144}, { 21,  35,  96}, {  4,  27,  55},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 35, 107, 172}, { 61, 104, 170}, { 33,  94, 160},
-                        { 13,  80, 139}, {  2,  55,  97}, {  1,  28,  49},
-                    },
-                    {  // band 2
-                        { 51, 153, 195}, { 29, 129, 189}, {  9,  99, 163},
-                        {  3,  75, 129}, {  1,  49,  88}, {  1,  29,  50},
-                    },
-                    {  // band 3
-                        { 53, 164, 210}, { 21, 134, 201}, {  3,  97, 164},
-                        {  1,  69, 124}, {  1,  45,  82}, {  1,  31,  58},
-                    },
-                    {  // band 4
-                        { 47, 205, 234}, { 18, 158, 220}, {  2, 109, 177},
-                        {  1,  78, 137}, {  1,  53, 101}, {  1,  34,  70},
-                    },
-                    {  // band 5
-                        { 55, 233, 245}, { 16, 179, 233}, {  1, 116, 191},
-                        {  1,  79, 145}, {  1,  53, 101}, {  1,  37,  58},
-                    },
-                },
-                {  // Intra
-                    {  // band 0
-                        { 36,  33, 227}, { 39,  28, 190}, { 18,  27, 134},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        { 76, 156, 235}, {184, 147, 235}, {114, 130, 220},
-                        { 72, 112, 191}, { 42,  87, 144}, { 21,  65,  93},
-                    },
-                    {  // band 2
-                        { 96, 179, 240}, { 51, 149, 228}, { 12, 105, 191},
-                        {  6,  74, 148}, {  1,  47, 100}, {  1,  29,  53},
-                    },
-                    {  // band 3
-                        { 88, 191, 242}, { 35, 154, 231}, {  3, 106, 187},
-                        {  1,  74, 140}, {  1,  41,  84}, {  1,  25,  38},
-                    },
-                    {  // band 4
-                        { 77, 212, 249}, { 28, 171, 239}, {  2, 117, 199},
-                        {  1,  79, 151}, {  1,  45,  99}, {  1,   1,   1},
-                    },
-                    {  // band 5
-                        { 77, 236, 252}, { 27, 190, 246}, {  2, 120, 203},
-                        {  1,  78, 147}, {  1,  42,  72}, {128, 128, 128},
-                    },
-                },
-            },
-            {  // UV plane
-                {  // Inter
-                    {  // band 0
-                        {185,  11, 227}, {113,  30, 182}, { 57,  44, 144},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {151, 139, 244}, {212, 139, 241}, {124, 126, 231},
-                        { 59, 104, 213}, { 26,  73, 158}, { 20,  45,  95},
-                    },
-                    {  // band 2
-                        {155, 163, 247}, {108, 152, 239}, { 39, 124, 214},
-                        {  7, 109, 162}, { 29,  57, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {158, 176, 250}, { 89, 164, 243}, { 11, 114, 196},
-                        {  1,  96, 141}, {  1,  81, 118}, {128,   1,   1},
-                    },
-                    {  // band 4
-                        {148, 212, 251}, { 59, 174, 240}, {  2, 130, 203},
-                        {  1,  70, 168}, {  1,  51, 106}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {104, 237, 252}, { 39, 190, 246}, {  1, 154, 220},
-                        {128, 102,   1}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-                {  // Inter
-                    {  // band 0
-                        {236,   6, 242}, {111,   6, 206}, { 36,   5, 161},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 1
-                        {193, 193, 252}, {248, 182, 251}, {218, 150, 246},
-                        {182, 134, 244}, {151, 137, 227}, { 45, 102, 195},
-                    },
-                    {  // band 2
-                        {188, 202, 251}, {125, 165, 249}, { 64,  75, 218},
-                        {  1, 128, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 3
-                        {178, 225, 254}, {107, 188, 231}, { 21, 135, 233},
-                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 4
-                        {164, 227, 253}, { 55, 193, 251}, {  1, 111, 225},
-                        {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
-                    },
-                    {  // band 5
-                        {151, 243, 254}, { 50, 203, 254}, {128, 179, 254},
-                        {128,   1, 254}, {128, 128, 128}, {128, 128, 128},
-                    },
-                },
-            },
-        },
-#endif  // CONFIG_TX64X64
-    },
-};
-#else
-static const av1_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
-  { // Y plane
-    { // Intra
-      { // Band 0
-        {97, 27, 144}, {81, 38, 128}, {51, 42, 99}
-      },
-      { // Band 1
-        {74, 113, 204}, {68, 101, 199}, {50, 87, 173},
-        {31, 76, 133}, {13, 55, 86}, {3, 30, 39}
-      },
-      { // Band 2
-        {83, 156, 222}, {74, 127, 215}, {46, 101, 179},
-        {30, 80, 129}, {14, 57, 81}, {3, 27, 37}
-      },
-      { // Band 3
-        {105, 164, 233}, {84, 128, 224}, {49, 92, 175},
-        {28, 60, 114}, {12, 34, 53}, {20, 59, 98}
-      },
-      { // Band 4
-        {131, 159, 243}, {98, 123, 228}, {40, 78, 151},
-        {19, 46, 97}, {13, 47, 19}, {19, 16, 19}
-      },
-      { // Band 5
-        {192, 71, 241}, {174, 70, 226}, {125, 46, 153},
-        {108, 49, 116}, {82, 24, 46}, {60, 14, 30}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {111, 66, 218}, {107, 87, 211}, {93, 99, 207}
-      },
-      { // Band 1
-        {107, 166, 250}, {107, 143, 247}, {73, 119, 221},
-        {43, 91, 166}, {17, 74, 102}, {3, 70, 53}
-      },
-      { // Band 2
-        {126, 177, 251}, {109, 148, 246}, {64, 99, 204},
-        {42, 68, 140}, {28, 52, 84}, {20, 34, 1}
-      },
-      { // Band 3
-        {143, 178, 252}, {114, 144, 245}, {46, 92, 188},
-        {45, 65, 104}, {40, 44, 76}, {1, 1, 1}
-      },
-      { // Band 4
-        {163, 159, 251}, {120, 131, 243}, {47, 81, 182},
-        {32, 39, 128}, {33, 44, 56}, {1, 17, 34}
-      },
-      { // Band 5
-        {209, 94, 251}, {190, 81, 241}, {139, 45, 147},
-        {123, 35, 73}, {118, 1, 118}, {3, 16, 42}
-      }
-    }
-  },
-  { // UV plane
-    { // Intra
-      { // Band 0
-        {189, 37, 229}, {145, 68, 205}, {99, 74, 171}
-      },
-      { // Band 1
-        {153, 139, 242}, {135, 125, 235}, {84, 100, 200},
-        {49, 75, 162}, {9, 21, 84}, {3, 31, 69}
-      },
-      { // Band 2
-        {165, 165, 244}, {128, 144, 240}, {68, 94, 204},
-        {39, 72, 132}, {22, 44, 93}, {26, 73, 26}
-      },
-      { // Band 3
-        {181, 174, 246}, {142, 132, 241}, {81, 96, 212},
-        {41, 70, 166}, {9, 48, 92}, {1, 19, 38}
-      },
-      { // Band 4
-        {197, 159, 251}, {168, 121, 245}, {107, 75, 218},
-        {70, 43, 158}, {1, 128, 1}, {1, 18, 37}
-      },
-      { // Band 5
-        {231, 79, 255}, {211, 74, 249}, {157, 104, 210},
-        {128, 102, 213}, {12, 34, 96}, {2, 20, 47}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {220, 53, 252}, {191, 80, 248}, {154, 100, 245}
-      },
-      { // Band 1
-        {205, 153, 255}, {182, 147, 254}, {110, 131, 231},
-        {68, 114, 161}, {50, 114, 140}, {1, 33, 57}
-      },
-      { // Band 2
-        {213, 171, 255}, {184, 163, 254}, {116, 104, 235},
-        {79, 71, 207}, {1, 41, 79}, {1, 20, 39}
-      },
-      { // Band 3
-        {223, 158, 255}, {203, 137, 255}, {111, 142, 244},
-        {2, 255, 133}, {1, 44, 85}, {1, 22, 47}
-      },
-      { // Band 4
-        {232, 148, 255}, {222, 123, 255}, {255, 128, 255},
-        {3, 61, 124}, {1, 41, 84}, {1, 21, 52}
-      },
-      { // Band 5
-        {248, 92, 255}, {248, 96, 255}, {69, 58, 184},
-        {31, 44, 137}, {14, 38, 105}, {8, 23, 61}
-      }
-    }
-  }
-};
-static const av1_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
-  { // Y plane
-    { // Intra
-      { // Band 0
-        {112, 31, 159}, {72, 37, 119}, {22, 35, 68}
-      },
-      { // Band 1
-        {42, 109, 174}, {45, 99, 172}, {32, 84, 149},
-        {18, 69, 119}, {6, 46, 76}, {1, 19, 31}
-      },
-      { // Band 2
-        {40, 154, 202}, {35, 126, 191}, {19, 98, 160},
-        {10, 75, 122}, {5, 53, 82}, {1, 23, 39}
-      },
-      { // Band 3
-        {39, 176, 215}, {28, 135, 200}, {11, 93, 156},
-        {5, 63, 109}, {1, 36, 64}, {1, 14, 26}
-      },
-      { // Band 4
-        {41, 191, 230}, {25, 147, 212}, {9, 97, 160},
-        {3, 65, 109}, {1, 33, 58}, {1, 14, 20}
-      },
-      { // Band 5
-        {68, 203, 242}, {40, 159, 220}, {12, 97, 153},
-        {5, 58, 97}, {1, 29, 55}, {1, 11, 18}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {99, 67, 221}, {86, 80, 204}, {60, 87, 184}
-      },
-      { // Band 1
-        {73, 169, 246}, {79, 158, 242}, {50, 135, 220},
-        {30, 113, 181}, {18, 76, 126}, {5, 54, 85}
-      },
-      { // Band 2
-        {90, 184, 250}, {78, 162, 243}, {47, 118, 214},
-        {35, 85, 171}, {32, 53, 115}, {20, 28, 76}
-      },
-      { // Band 3
-        {109, 197, 252}, {89, 172, 247}, {52, 119, 217},
-        {37, 80, 161}, {23, 44, 100}, {1, 18, 34}
-      },
-      { // Band 4
-        {132, 202, 254}, {110, 175, 251}, {63, 128, 228},
-        {37, 86, 168}, {64, 91, 102}, {1, 17, 34}
-      },
-      { // Band 5
-        {126, 204, 253}, {100, 174, 250}, {50, 148, 237},
-        {25, 90, 133}, {1, 64, 85}, {3, 16, 42}
-      }
-    }
-  },
-  { // UV plane
-    { // Intra
-      { // Band 0
-        {195, 35, 235}, {137, 63, 201}, {62, 70, 145}
-      },
-      { // Band 1
-        {110, 158, 233}, {102, 143, 227}, {60, 120, 199},
-        {30, 85, 156}, {9, 50, 90}, {1, 16, 33}
-      },
-      { // Band 2
-        {102, 185, 233}, {71, 152, 224}, {29, 111, 187},
-        {18, 74, 138}, {4, 56, 87}, {1, 18, 46}
-      },
-      { // Band 3
-        {101, 205, 239}, {66, 161, 229}, {23, 109, 183},
-        {9, 85, 135}, {5, 71, 142}, {1, 1, 102}
-      },
-      { // Band 4
-        {109, 216, 243}, {69, 168, 233}, {23, 119, 191},
-        {8, 137, 115}, {1, 54, 98}, {1, 1, 255}
-      },
-      { // Band 5
-        {139, 224, 249}, {98, 176, 238}, {55, 129, 187},
-        {25, 101, 131}, {26, 59, 154}, {2, 20, 47}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {220, 72, 254}, {176, 108, 251}, {114, 132, 247}
-      },
-      { // Band 1
-        {161, 185, 255}, {141, 185, 254}, {131, 180, 249},
-        {111, 164, 186}, {50, 98, 142}, {1, 128, 1}
-      },
-      { // Band 2
-        {171, 195, 255}, {133, 184, 254}, {68, 140, 231},
-        {102, 96, 205}, {1, 1, 128}, {1, 20, 39}
-      },
-      { // Band 3
-        {180, 206, 255}, {148, 191, 254}, {83, 157, 241},
-        {128, 171, 128}, {1, 44, 85}, {1, 22, 47}
-      },
-      { // Band 4
-        {194, 214, 255}, {159, 188, 255}, {122, 148, 250},
-        {3, 255, 124}, {1, 41, 84}, {1, 21, 52}
-      },
-      { // Band 5
-        {231, 217, 255}, {209, 149, 255}, {205, 145, 205},
-        {31, 44, 137}, {14, 38, 105}, {8, 23, 61}
-      }
-    }
-  }
-};
-static const av1_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
-  { // Y plane
-    { // Intra
-      { // Band 0
-        {91, 31, 117}, {49, 31, 89}, {14, 25, 48}
-      },
-      { // Band 1
-        {31, 97, 151}, {33, 89, 148}, {28, 76, 133},
-        {17, 60, 106}, {7, 42, 72}, {1, 19, 32}
-      },
-      { // Band 2
-        {28, 152, 182}, {28, 120, 174}, {15, 93, 146},
-        {9, 72, 116}, {5, 47, 82}, {1, 21, 37}
-      },
-      { // Band 3
-        {29, 174, 203}, {23, 127, 187}, {9, 89, 145},
-        {2, 56, 100}, {1, 31, 56}, {1, 12, 25}
-      },
-      { // Band 4
-        {28, 193, 220}, {17, 141, 197}, {4, 87, 142},
-        {1, 54, 95}, {1, 31, 56}, {1, 12, 26}
-      },
-      { // Band 5
-        {29, 221, 240}, {11, 167, 215}, {2, 93, 149},
-        {1, 58, 100}, {1, 35, 61}, {1, 16, 28}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {108, 52, 214}, {84, 60, 186}, {45, 69, 161}
-      },
-      { // Band 1
-        {43, 164, 236}, {57, 161, 233}, {38, 146, 214},
-        {24, 120, 182}, {15, 80, 126}, {5, 28, 66}
-      },
-      { // Band 2
-        {58, 187, 242}, {47, 163, 234}, {28, 118, 204},
-        {26, 82, 165}, {21, 54, 112}, {4, 28, 55}
-      },
-      { // Band 3
-        {65, 201, 248}, {51, 170, 239}, {22, 117, 204},
-        {11, 81, 159}, {10, 43, 102}, {1, 1, 1}
-      },
-      { // Band 4
-        {80, 206, 252}, {57, 179, 245}, {25, 129, 214},
-        {16, 97, 170}, {6, 60, 130}, {1, 128, 1}
-      },
-      { // Band 5
-        {97, 217, 253}, {68, 186, 250}, {26, 138, 216},
-        {20, 105, 166}, {11, 78, 111}, {3, 16, 42}
-      }
-    }
-  },
-  { // UV plane
-    { // Intra
-      { // Band 0
-        {181, 37, 233}, {121, 55, 192}, {46, 52, 124}
-      },
-      { // Band 1
-        {108, 157, 221}, {98, 140, 215}, {59, 124, 187},
-        {34, 92, 158}, {9, 68, 112}, {1, 41, 70}
-      },
-      { // Band 2
-        {80, 188, 223}, {46, 153, 204}, {25, 91, 173},
-        {11, 73, 131}, {5, 43, 82}, {1, 17, 91}
-      },
-      { // Band 3
-        {63, 209, 228}, {31, 157, 206}, {8, 104, 167},
-        {3, 63, 122}, {1, 44, 87}, {1, 43, 51}
-      },
-      { // Band 4
-        {52, 220, 234}, {22, 165, 216}, {4, 104, 163},
-        {2, 62, 129}, {1, 33, 50}, {1, 26, 28}
-      },
-      { // Band 5
-        {58, 238, 242}, {24, 183, 224}, {4, 109, 172},
-        {2, 87, 141}, {1, 52, 79}, {1, 51, 64}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {224, 52, 250}, {188, 81, 239}, {138, 114, 228}
-      },
-      { // Band 1
-        {131, 206, 255}, {128, 193, 254}, {119, 173, 247},
-        {106, 127, 187}, {50, 100, 124}, {1, 96, 1}
-      },
-      { // Band 2
-        {123, 214, 254}, {86, 194, 254}, {64, 119, 221},
-        {43, 51, 128}, {1, 32, 110}, {1, 20, 39}
-      },
-      { // Band 3
-        {115, 223, 255}, {78, 200, 254}, {75, 164, 203},
-        {128, 85, 255}, {1, 44, 85}, {1, 22, 47}
-      },
-      { // Band 4
-        {132, 226, 255}, {88, 207, 254}, {20, 140, 225},
-        {3, 61, 124}, {1, 41, 84}, {1, 21, 52}
-      },
-      { // Band 5
-        {180, 236, 255}, {138, 223, 254}, {73, 166, 238},
-        {31, 255, 137}, {14, 38, 105}, {8, 23, 61}
-      }
-    }
-  }
-};
-static const av1_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
-  { // Y plane
-    { // Intra
-      { // Band 0
-        {163, 26, 188}, {78, 29, 105}, {22, 22, 48}
-      },
-      { // Band 1
-        {72, 93, 168}, {74, 91, 170}, {62, 72, 151},
-        {37, 55, 112}, {10, 33, 63}, {1, 14, 23}
-      },
-      { // Band 2
-        {41, 163, 182}, {36, 136, 177}, {20, 102, 153},
-        {10, 76, 114}, {5, 45, 71}, {1, 17, 27}
-      },
-      { // Band 3
-        {43, 202, 213}, {28, 142, 193}, {10, 90, 141},
-        {2, 51, 93}, {1, 24, 48}, {1, 10, 19}
-      },
-      { // Band 4
-        {46, 216, 220}, {26, 150, 199}, {7, 87, 136},
-        {2, 49, 86}, {1, 28, 47}, {1, 12, 24}
-      },
-      { // Band 5
-        {19, 241, 237}, {5, 172, 200}, {1, 82, 126},
-        {1, 47, 79}, {1, 29, 47}, {1, 14, 25}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {185, 20, 226}, {151, 26, 187}, {109, 34, 144}
-      },
-      { // Band 1
-        {56, 151, 227}, {76, 165, 232}, {62, 161, 222},
-        {47, 139, 201}, {29, 106, 150}, {14, 61, 98}
-      },
-      { // Band 2
-        {57, 200, 237}, {43, 164, 227}, {22, 106, 190},
-        {14, 68, 140}, {10, 48, 90}, {1, 15, 40}
-      },
-      { // Band 3
-        {46, 209, 238}, {28, 165, 225}, {7, 107, 180},
-        {2, 69, 125}, {2, 36, 94}, {1, 1, 1}
-      },
-      { // Band 4
-        {55, 225, 248}, {28, 181, 237}, {7, 117, 198},
-        {6, 77, 144}, {3, 60, 90}, {1, 1, 1}
-      },
-      { // Band 5
-        {63, 243, 251}, {27, 193, 242}, {4, 124, 200},
-        {1, 58, 153}, {1, 59, 124}, {3, 16, 42}
-      }
-    }
-  },
-  { // UV plane
-    { // Intra
-      { // Band 0
-        {208, 28, 218}, {183, 32, 188}, {169, 21, 189}
-      },
-      { // Band 1
-        {205, 124, 247}, {190, 96, 240}, {233, 89, 233},
-        {177, 44, 212}, {59, 58, 59}, {32, 33, 38}
-      },
-      { // Band 2
-        {194, 195, 250}, {179, 190, 226}, {32, 174, 128},
-        {32, 85, 128}, {12, 64, 122}, {1, 85, 90}
-      },
-      { // Band 3
-        {149, 232, 249}, {95, 159, 227}, {28, 91, 171},
-        {28, 102, 114}, {1, 1, 73}, {1, 19, 38}
-      },
-      { // Band 4
-        {154, 239, 246}, {138, 151, 235}, {1, 123, 138},
-        {128, 183, 255}, {1, 128, 1}, {1, 18, 37}
-      },
-      { // Band 5
-        {157, 255, 253}, {75, 171, 241}, {43, 102, 171},
-        {30, 44, 136}, {12, 34, 96}, {2, 20, 47}
-      }
-    },
-    { // Inter
-      { // Band 0
-        {249, 13, 248}, {238, 14, 220}, {225, 16, 174}
-      },
-      { // Band 1
-        {190, 189, 254}, {169, 134, 253}, {124, 179, 248},
-        {138, 131, 223}, {64, 133, 192}, {1, 85, 128}
-      },
-      { // Band 2
-        {139, 212, 254}, {126, 177, 255}, {93, 39, 186},
-        {1, 1, 171}, {1, 41, 79}, {1, 20, 39}
-      },
-      { // Band 3
-        {153, 216, 255}, {165, 204, 255}, {1, 1, 255},
-        {2, 73, 133}, {1, 1, 1}, {1, 22, 47}
-      },
-      { // Band 4
-        {147, 226, 254}, {119, 196, 255}, {1, 128, 255},
-        {1, 1, 171}, {1, 1, 1}, {1, 21, 52}
-      },
-      { // Band 5
-        {168, 240, 255}, {95, 179, 255}, {1, 171, 1},
-        {31, 44, 137}, {14, 38, 105}, {8, 23, 61}
-      }
-    }
-  }
-};
-
-#if CONFIG_TX64X64
-// FIXME. Optimize for EC_MULTISYMBOL
-static const av1_coeff_probs_model default_coef_probs_64x64[PLANE_TYPES] = {
-  {  // Y plane
-    {  // Intra
-      {  // Band 0
-        {  17,  38, 140 }, {   7,  34,  80 }, {   1,  17,  29 }
-      }, {  // Band 1
-        {  37,  75, 128 }, {  41,  76, 128 }, {  26,  66, 116 },
-        {  12,  52,  94 }, {   2,  32,  55 }, {   1,  10,  16 }
-      }, {  // Band 2
-        {  50, 127, 154 }, {  37, 109, 152 }, {  16,  82, 121 },
-        {   5,  59,  85 }, {   1,  35,  54 }, {   1,  13,  20 }
-      }, {  // Band 3
-        {  40, 142, 167 }, {  17, 110, 157 }, {   2,  71, 112 },
-        {   1,  44,  72 }, {   1,  27,  45 }, {   1,  11,  17 }
-      }, {  // Band 4
-        {  30, 175, 188 }, {   9, 124, 169 }, {   1,  74, 116 },
-        {   1,  48,  78 }, {   1,  30,  49 }, {   1,  11,  18 }
-      }, {  // Band 5
-        {  10, 222, 223 }, {   2, 150, 194 }, {   1,  83, 128 },
-        {   1,  48,  79 }, {   1,  27,  45 }, {   1,  11,  17 }
-      }
-    }, {  // Inter
-      {  // Band 0
-        {  36,  41, 235 }, {  29,  36, 193 }, {  10,  27, 111 }
-      }, {  // Band 1
-        {  85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
-        {  57, 113, 168 }, {  23,  83, 120 }, {  10,  49,  61 }
-      }, {  // Band 2
-        {  85, 190, 223 }, {  36, 139, 200 }, {   5,  90, 146 },
-        {   1,  60, 103 }, {   1,  38,  65 }, {   1,  18,  30 }
-      }, {  // Band 3
-        {  72, 202, 223 }, {  23, 141, 199 }, {   2,  86, 140 },
-        {   1,  56,  97 }, {   1,  36,  61 }, {   1,  16,  27 }
-      }, {  // Band 4
-        {  55, 218, 225 }, {  13, 145, 200 }, {   1,  86, 141 },
-        {   1,  57,  99 }, {   1,  35,  61 }, {   1,  13,  22 }
-      }, {  // Band 5
-        {  15, 235, 212 }, {   1, 132, 184 }, {   1,  84, 139 },
-        {   1,  57,  97 }, {   1,  34,  56 }, {   1,  14,  23 }
-      }
-    }
-  }, {  // UV plane
-    {  // Intra
-      {  // Band 0
-        { 181,  21, 201 }, {  61,  37, 123 }, {  10,  38,  71 }
-      }, {  // Band 1
-        {  47, 106, 172 }, {  95, 104, 173 }, {  42,  93, 159 },
-        {  18,  77, 131 }, {   4,  50,  81 }, {   1,  17,  23 }
-      }, {  // Band 2
-        {  62, 147, 199 }, {  44, 130, 189 }, {  28, 102, 154 },
-        {  18,  75, 115 }, {   2,  44,  65 }, {   1,  12,  19 }
-      }, {  // Band 3
-        {  55, 153, 210 }, {  24, 130, 194 }, {   3,  93, 146 },
-        {   1,  61,  97 }, {   1,  31,  50 }, {   1,  10,  16 }
-      }, {  // Band 4
-        {  49, 186, 223 }, {  17, 148, 204 }, {   1,  96, 142 },
-        {   1,  53,  83 }, {   1,  26,  44 }, {   1,  11,  17 }
-      }, {  // Band 5
-        {  13, 217, 212 }, {   2, 136, 180 }, {   1,  78, 124 },
-        {   1,  50,  83 }, {   1,  29,  49 }, {   1,  14,  23 }
-      }
-    }, {  // Inter
-      {  // Band 0
-        { 197,  13, 247 }, {  82,  17, 222 }, {  25,  17, 162 }
-      }, {  // Band 1
-        { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
-        { 104, 158, 220 }, {  66, 128, 186 }, {  55,  90, 137 }
-      }, {  // Band 2
-        { 111, 197, 242 }, {  46, 158, 219 }, {   9, 104, 171 },
-        {   2,  65, 125 }, {   1,  44,  80 }, {   1,  17,  91 }
-      }, {  // Band 3
-        { 104, 208, 245 }, {  39, 168, 224 }, {   3, 109, 162 },
-        {   1,  79, 124 }, {   1,  50, 102 }, {   1,  43, 102 }
-      }, {  // Band 4
-        {  84, 220, 246 }, {  31, 177, 231 }, {   2, 115, 180 },
-        {   1,  79, 134 }, {   1,  55,  77 }, {   1,  60,  79 }
-      }, {  // Band 5
-        {  43, 243, 240 }, {   8, 180, 217 }, {   1, 115, 166 },
-        {   1,  84, 121 }, {   1,  51,  67 }, {   1,  16,   6 }
-      }
-    }
-  }
-};
-#endif  // CONFIG_TX64X64
-#endif  // CONFIG_Q_ADAPT_PROBS
-static const aom_prob av1_default_blockzero_probs[TX_SIZES][PLANE_TYPES]
-                                           [REF_TYPES][BLOCKZ_CONTEXTS] = {
-#if CONFIG_CHROMA_2X2
-  { // TX_2x2
-    { // Y plane
-      { 195, 84, 8, },  // Intra
-      { 191, 124, 25, },  // Inter
-    },
-    { // UV plane
-      { 214, 132, 42, },  // Intra
-      { 229, 143, 46, },  // Inter
-    },
-  },
-#endif
-  { // TX_4x4
-    { // Y plane
-      { 195, 84, 8, },  // Intra
-      { 191, 124, 25, },  // Inter
-    },
-    { // UV plane
-      { 214, 132, 42, },  // Intra
-      { 229, 143, 46, },  // Inter
-    },
-  },
-  { // TX_8x8
-    { // Y plane
-      { 125, 52, 6, },  // Intra
-      { 202, 108, 18, },  // Inter
-    },
-    { // UV plane
-      { 212, 113, 29, },  // Intra
-      { 225, 144, 42, },  // Inter
-    },
-  },
-  { // TX_16x16
-    { // Y plane
-      { 7, 5, 1, },  // Intra
-      { 19, 19, 3, },  // Inter
-    },
-    { // UV plane
-      { 211, 96, 22, },  // Intra
-      { 233, 146, 43, },  // Inter
-    },
-  },
-  { // TX_32x32
-    { //  Y plane
-      { 17, 7, 1, },  // Intra
-      { 36, 29, 10, },  // Inter
-    },
-    { // UV plane
-      { 181, 61, 10, },  // Intra
-      { 197, 82, 25, },  // Inter
-    },
-  },
-#if CONFIG_TX64X64
-  { // TX_64x64 FIXME: currently the same as 32x32
-    { //  Y plane
-      { 17, 7, 1, },  // Intra
-      { 36, 29, 10, },  // Inter
-    },
-    { // UV plane
-      { 181, 61, 10, },  // Intra
-      { 197, 82, 25, },  // Inter
-    },
-  },
-#endif
-};
-
+#if !CONFIG_Q_ADAPT_PROBS
 static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
   {     // Y plane
     {   // Intra
@@ -4152,70 +912,70 @@ static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
         { AOM_ICDF(1088), AOM_ICDF(6358), AOM_ICDF(8428), AOM_ICDF(16648),
           AOM_ICDF(18276), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(14529), AOM_ICDF(18769), AOM_ICDF(29100), AOM_ICDF(29634),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12993), AOM_ICDF(17117), AOM_ICDF(28404), AOM_ICDF(28988),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11201), AOM_ICDF(14084), AOM_ICDF(25818), AOM_ICDF(26504),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9793), AOM_ICDF(11267), AOM_ICDF(21775), AOM_ICDF(22451),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7105), AOM_ICDF(7562), AOM_ICDF(15777), AOM_ICDF(16225),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3905), AOM_ICDF(3966), AOM_ICDF(8359), AOM_ICDF(8526),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(14529), AOM_ICDF(18769), AOM_ICDF(29100), AOM_ICDF(29634),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12993), AOM_ICDF(17117), AOM_ICDF(28404), AOM_ICDF(28988),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11201), AOM_ICDF(14084), AOM_ICDF(25818), AOM_ICDF(26504),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9793), AOM_ICDF(11267), AOM_ICDF(21775), AOM_ICDF(22451),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7105), AOM_ICDF(7562), AOM_ICDF(15777), AOM_ICDF(16225),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3905), AOM_ICDF(3966), AOM_ICDF(8359), AOM_ICDF(8526),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(20033), AOM_ICDF(23643), AOM_ICDF(31102), AOM_ICDF(31374),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16321), AOM_ICDF(20350), AOM_ICDF(30167), AOM_ICDF(30546),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12993), AOM_ICDF(15512), AOM_ICDF(26859), AOM_ICDF(27396),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10305), AOM_ICDF(11659), AOM_ICDF(21669), AOM_ICDF(22330),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7361), AOM_ICDF(7819), AOM_ICDF(15450), AOM_ICDF(15940),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3521), AOM_ICDF(3580), AOM_ICDF(7805), AOM_ICDF(7976),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20033), AOM_ICDF(23643), AOM_ICDF(31102), AOM_ICDF(31374),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16321), AOM_ICDF(20350), AOM_ICDF(30167), AOM_ICDF(30546),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12993), AOM_ICDF(15512), AOM_ICDF(26859), AOM_ICDF(27396),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10305), AOM_ICDF(11659), AOM_ICDF(21669), AOM_ICDF(22330),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7361), AOM_ICDF(7819), AOM_ICDF(15450), AOM_ICDF(15940),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3521), AOM_ICDF(3580), AOM_ICDF(7805), AOM_ICDF(7976),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(21057), AOM_ICDF(25460), AOM_ICDF(31740), AOM_ICDF(31952),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(21173), AOM_ICDF(30761), AOM_ICDF(31092),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11841), AOM_ICDF(14615), AOM_ICDF(26188), AOM_ICDF(26824),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7745), AOM_ICDF(8991), AOM_ICDF(18937), AOM_ICDF(19707),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4417), AOM_ICDF(4706), AOM_ICDF(10342), AOM_ICDF(10890),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7617), AOM_ICDF(8392), AOM_ICDF(17295), AOM_ICDF(17915),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21057), AOM_ICDF(25460), AOM_ICDF(31740), AOM_ICDF(31952),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(21173), AOM_ICDF(30761), AOM_ICDF(31092),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11841), AOM_ICDF(14615), AOM_ICDF(26188), AOM_ICDF(26824),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7745), AOM_ICDF(8991), AOM_ICDF(18937), AOM_ICDF(19707),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4417), AOM_ICDF(4706), AOM_ICDF(10342), AOM_ICDF(10890),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7617), AOM_ICDF(8392), AOM_ICDF(17295), AOM_ICDF(17915),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(20417), AOM_ICDF(26452), AOM_ICDF(32166), AOM_ICDF(32321),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15809), AOM_ICDF(21634), AOM_ICDF(30947), AOM_ICDF(31298),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10049), AOM_ICDF(12176), AOM_ICDF(23495), AOM_ICDF(24229),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5953), AOM_ICDF(6731), AOM_ICDF(16166), AOM_ICDF(16798),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6081), AOM_ICDF(6188), AOM_ICDF(8114), AOM_ICDF(8764),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2291), AOM_ICDF(4448), AOM_ICDF(5527),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20417), AOM_ICDF(26452), AOM_ICDF(32166), AOM_ICDF(32321),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15809), AOM_ICDF(21634), AOM_ICDF(30947), AOM_ICDF(31298),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10049), AOM_ICDF(12176), AOM_ICDF(23495), AOM_ICDF(24229),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5953), AOM_ICDF(6731), AOM_ICDF(16166), AOM_ICDF(16798),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6081), AOM_ICDF(6188), AOM_ICDF(8114), AOM_ICDF(8764),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2291), AOM_ICDF(4448), AOM_ICDF(5527),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(9153), AOM_ICDF(25905), AOM_ICDF(31431), AOM_ICDF(31934),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9025), AOM_ICDF(23345), AOM_ICDF(30033), AOM_ICDF(30965),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5953), AOM_ICDF(13835), AOM_ICDF(22032), AOM_ICDF(24664),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6337), AOM_ICDF(11435), AOM_ICDF(18366), AOM_ICDF(21418),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3137), AOM_ICDF(4871), AOM_ICDF(8519), AOM_ICDF(12426),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1857), AOM_ICDF(2727), AOM_ICDF(5540), AOM_ICDF(8757),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(9153), AOM_ICDF(25905), AOM_ICDF(31431), AOM_ICDF(31934),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9025), AOM_ICDF(23345), AOM_ICDF(30033), AOM_ICDF(30965),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5953), AOM_ICDF(13835), AOM_ICDF(22032), AOM_ICDF(24664),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6337), AOM_ICDF(11435), AOM_ICDF(18366), AOM_ICDF(21418),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3137), AOM_ICDF(4871), AOM_ICDF(8519), AOM_ICDF(12426),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1857), AOM_ICDF(2727), AOM_ICDF(5540), AOM_ICDF(8757),
+          AOM_ICDF(32768) } } },
     {   // Intra
       { // Band 0
         { AOM_ICDF(24512), AOM_ICDF(26673), AOM_ICDF(28962), AOM_ICDF(31929),
@@ -4225,70 +985,70 @@ static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
         { AOM_ICDF(3264), AOM_ICDF(14756), AOM_ICDF(20107), AOM_ICDF(29407),
           AOM_ICDF(30032), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(21313), AOM_ICDF(26020), AOM_ICDF(32523), AOM_ICDF(32575),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18369), AOM_ICDF(24215), AOM_ICDF(32291), AOM_ICDF(32391),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15297), AOM_ICDF(19637), AOM_ICDF(30414), AOM_ICDF(30752),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11713), AOM_ICDF(14040), AOM_ICDF(25408), AOM_ICDF(26033),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9537), AOM_ICDF(10173), AOM_ICDF(18839), AOM_ICDF(19315),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9025), AOM_ICDF(9093), AOM_ICDF(13987), AOM_ICDF(14115),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21313), AOM_ICDF(26020), AOM_ICDF(32523), AOM_ICDF(32575),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18369), AOM_ICDF(24215), AOM_ICDF(32291), AOM_ICDF(32391),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15297), AOM_ICDF(19637), AOM_ICDF(30414), AOM_ICDF(30752),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11713), AOM_ICDF(14040), AOM_ICDF(25408), AOM_ICDF(26033),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9537), AOM_ICDF(10173), AOM_ICDF(18839), AOM_ICDF(19315),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9025), AOM_ICDF(9093), AOM_ICDF(13987), AOM_ICDF(14115),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(22721), AOM_ICDF(27599), AOM_ICDF(32592), AOM_ICDF(32636),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19009), AOM_ICDF(24676), AOM_ICDF(32258), AOM_ICDF(32367),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12737), AOM_ICDF(16769), AOM_ICDF(28739), AOM_ICDF(29247),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8769), AOM_ICDF(10956), AOM_ICDF(21941), AOM_ICDF(22840),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6721), AOM_ICDF(7678), AOM_ICDF(15319), AOM_ICDF(16290),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4417), AOM_ICDF(4430), AOM_ICDF(4583), AOM_ICDF(5712),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(22721), AOM_ICDF(27599), AOM_ICDF(32592), AOM_ICDF(32636),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19009), AOM_ICDF(24676), AOM_ICDF(32258), AOM_ICDF(32367),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12737), AOM_ICDF(16769), AOM_ICDF(28739), AOM_ICDF(29247),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8769), AOM_ICDF(10956), AOM_ICDF(21941), AOM_ICDF(22840),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6721), AOM_ICDF(7678), AOM_ICDF(15319), AOM_ICDF(16290),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4417), AOM_ICDF(4430), AOM_ICDF(4583), AOM_ICDF(5712),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(22849), AOM_ICDF(28333), AOM_ICDF(32633), AOM_ICDF(32671),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18497), AOM_ICDF(24619), AOM_ICDF(32184), AOM_ICDF(32315),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11841), AOM_ICDF(14640), AOM_ICDF(27251), AOM_ICDF(27752),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8385), AOM_ICDF(10154), AOM_ICDF(18339), AOM_ICDF(19621),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(6977), AOM_ICDF(13787), AOM_ICDF(15289),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(22849), AOM_ICDF(28333), AOM_ICDF(32633), AOM_ICDF(32671),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18497), AOM_ICDF(24619), AOM_ICDF(32184), AOM_ICDF(32315),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11841), AOM_ICDF(14640), AOM_ICDF(27251), AOM_ICDF(27752),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8385), AOM_ICDF(10154), AOM_ICDF(18339), AOM_ICDF(19621),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(6977), AOM_ICDF(13787), AOM_ICDF(15289),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(20417), AOM_ICDF(28167), AOM_ICDF(32552), AOM_ICDF(32621),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16833), AOM_ICDF(23968), AOM_ICDF(31991), AOM_ICDF(32174),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10433), AOM_ICDF(13387), AOM_ICDF(26356), AOM_ICDF(26951),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5057), AOM_ICDF(6823), AOM_ICDF(18967), AOM_ICDF(19843),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(6479), AOM_ICDF(11672), AOM_ICDF(13052),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20417), AOM_ICDF(28167), AOM_ICDF(32552), AOM_ICDF(32621),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16833), AOM_ICDF(23968), AOM_ICDF(31991), AOM_ICDF(32174),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10433), AOM_ICDF(13387), AOM_ICDF(26356), AOM_ICDF(26951),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5057), AOM_ICDF(6823), AOM_ICDF(18967), AOM_ICDF(19843),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(6479), AOM_ICDF(11672), AOM_ICDF(13052),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(12097), AOM_ICDF(28717), AOM_ICDF(32406), AOM_ICDF(32555),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10433), AOM_ICDF(26113), AOM_ICDF(31504), AOM_ICDF(31975),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5825), AOM_ICDF(14284), AOM_ICDF(21349), AOM_ICDF(24461),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4545), AOM_ICDF(8454), AOM_ICDF(12648), AOM_ICDF(17501),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(7173), AOM_ICDF(15272), AOM_ICDF(19322),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
-         AOM_ICDF(32768) } } } },
+        { AOM_ICDF(12097), AOM_ICDF(28717), AOM_ICDF(32406), AOM_ICDF(32555),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10433), AOM_ICDF(26113), AOM_ICDF(31504), AOM_ICDF(31975),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5825), AOM_ICDF(14284), AOM_ICDF(21349), AOM_ICDF(24461),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4545), AOM_ICDF(8454), AOM_ICDF(12648), AOM_ICDF(17501),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(7173), AOM_ICDF(15272), AOM_ICDF(19322),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
+          AOM_ICDF(32768) } } } },
   {     // UV plane
     {   // Inter
       { // Band 0
@@ -4299,70 +1059,70 @@ static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
         { AOM_ICDF(5440), AOM_ICDF(13412), AOM_ICDF(18469), AOM_ICDF(26423),
           AOM_ICDF(27669), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(17857), AOM_ICDF(26327), AOM_ICDF(31983), AOM_ICDF(32219),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16065), AOM_ICDF(24198), AOM_ICDF(31431), AOM_ICDF(31785),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12865), AOM_ICDF(18011), AOM_ICDF(28454), AOM_ICDF(29166),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9665), AOM_ICDF(12501), AOM_ICDF(24331), AOM_ICDF(25147),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(3121), AOM_ICDF(12661), AOM_ICDF(13034),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4033), AOM_ICDF(4140), AOM_ICDF(11834), AOM_ICDF(11977),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(17857), AOM_ICDF(26327), AOM_ICDF(31983), AOM_ICDF(32219),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16065), AOM_ICDF(24198), AOM_ICDF(31431), AOM_ICDF(31785),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12865), AOM_ICDF(18011), AOM_ICDF(28454), AOM_ICDF(29166),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9665), AOM_ICDF(12501), AOM_ICDF(24331), AOM_ICDF(25147),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(3121), AOM_ICDF(12661), AOM_ICDF(13034),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4033), AOM_ICDF(4140), AOM_ICDF(11834), AOM_ICDF(11977),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(21185), AOM_ICDF(28338), AOM_ICDF(32249), AOM_ICDF(32417),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18497), AOM_ICDF(25227), AOM_ICDF(31905), AOM_ICDF(32122),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12097), AOM_ICDF(16516), AOM_ICDF(28610), AOM_ICDF(29166),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9281), AOM_ICDF(11157), AOM_ICDF(21438), AOM_ICDF(22312),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(6566), AOM_ICDF(15585), AOM_ICDF(16340),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9409), AOM_ICDF(9659), AOM_ICDF(11827), AOM_ICDF(12911),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21185), AOM_ICDF(28338), AOM_ICDF(32249), AOM_ICDF(32417),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18497), AOM_ICDF(25227), AOM_ICDF(31905), AOM_ICDF(32122),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12097), AOM_ICDF(16516), AOM_ICDF(28610), AOM_ICDF(29166),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9281), AOM_ICDF(11157), AOM_ICDF(21438), AOM_ICDF(22312),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(6566), AOM_ICDF(15585), AOM_ICDF(16340),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9409), AOM_ICDF(9659), AOM_ICDF(11827), AOM_ICDF(12911),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(22337), AOM_ICDF(29459), AOM_ICDF(32382), AOM_ICDF(32519),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16961), AOM_ICDF(25262), AOM_ICDF(31874), AOM_ICDF(32123),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12353), AOM_ICDF(17748), AOM_ICDF(29300), AOM_ICDF(29852),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9025), AOM_ICDF(11528), AOM_ICDF(24468), AOM_ICDF(25141),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6209), AOM_ICDF(6565), AOM_ICDF(15806), AOM_ICDF(16121),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(22337), AOM_ICDF(29459), AOM_ICDF(32382), AOM_ICDF(32519),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16961), AOM_ICDF(25262), AOM_ICDF(31874), AOM_ICDF(32123),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12353), AOM_ICDF(17748), AOM_ICDF(29300), AOM_ICDF(29852),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9025), AOM_ICDF(11528), AOM_ICDF(24468), AOM_ICDF(25141),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6209), AOM_ICDF(6565), AOM_ICDF(15806), AOM_ICDF(16121),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(20417), AOM_ICDF(29779), AOM_ICDF(32552), AOM_ICDF(32636),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15553), AOM_ICDF(26420), AOM_ICDF(32063), AOM_ICDF(32295),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9665), AOM_ICDF(17946), AOM_ICDF(29385), AOM_ICDF(30096),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5569), AOM_ICDF(10207), AOM_ICDF(22410), AOM_ICDF(23836),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20417), AOM_ICDF(29779), AOM_ICDF(32552), AOM_ICDF(32636),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15553), AOM_ICDF(26420), AOM_ICDF(32063), AOM_ICDF(32295),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9665), AOM_ICDF(17946), AOM_ICDF(29385), AOM_ICDF(30096),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5569), AOM_ICDF(10207), AOM_ICDF(22410), AOM_ICDF(23836),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(10177), AOM_ICDF(30567), AOM_ICDF(32725), AOM_ICDF(32745),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9537), AOM_ICDF(28243), AOM_ICDF(32179), AOM_ICDF(32423),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13377), AOM_ICDF(23187), AOM_ICDF(29322), AOM_ICDF(30382),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13121), AOM_ICDF(21346), AOM_ICDF(29507), AOM_ICDF(30326),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(10177), AOM_ICDF(30567), AOM_ICDF(32725), AOM_ICDF(32745),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9537), AOM_ICDF(28243), AOM_ICDF(32179), AOM_ICDF(32423),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13377), AOM_ICDF(23187), AOM_ICDF(29322), AOM_ICDF(30382),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13121), AOM_ICDF(21346), AOM_ICDF(29507), AOM_ICDF(30326),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
+          AOM_ICDF(32768) } } },
     {   // Inter
       { // Band 0
         { AOM_ICDF(29376), AOM_ICDF(30098), AOM_ICDF(32421), AOM_ICDF(32766),
@@ -4372,70 +1132,70 @@ static const coeff_cdf_model default_coef_head_cdf_4x4[PLANE_TYPES] = {
         { AOM_ICDF(5952), AOM_ICDF(16505), AOM_ICDF(25955), AOM_ICDF(32163),
           AOM_ICDF(32365), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(19649), AOM_ICDF(30160), AOM_ICDF(32743), AOM_ICDF(32753),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18881), AOM_ICDF(28724), AOM_ICDF(32688), AOM_ICDF(32717),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16833), AOM_ICDF(23053), AOM_ICDF(31244), AOM_ICDF(31573),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14657), AOM_ICDF(17714), AOM_ICDF(26083), AOM_ICDF(26978),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14657), AOM_ICDF(16618), AOM_ICDF(24597), AOM_ICDF(25403),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4289), AOM_ICDF(4326), AOM_ICDF(10686), AOM_ICDF(10751),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(19649), AOM_ICDF(30160), AOM_ICDF(32743), AOM_ICDF(32753),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18881), AOM_ICDF(28724), AOM_ICDF(32688), AOM_ICDF(32717),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16833), AOM_ICDF(23053), AOM_ICDF(31244), AOM_ICDF(31573),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14657), AOM_ICDF(17714), AOM_ICDF(26083), AOM_ICDF(26978),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14657), AOM_ICDF(16618), AOM_ICDF(24597), AOM_ICDF(25403),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4289), AOM_ICDF(4326), AOM_ICDF(10686), AOM_ICDF(10751),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(21953), AOM_ICDF(30956), AOM_ICDF(32748), AOM_ICDF(32757),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20929), AOM_ICDF(29412), AOM_ICDF(32700), AOM_ICDF(32725),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13377), AOM_ICDF(21495), AOM_ICDF(31216), AOM_ICDF(31569),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9153), AOM_ICDF(15097), AOM_ICDF(28295), AOM_ICDF(28990),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21953), AOM_ICDF(30956), AOM_ICDF(32748), AOM_ICDF(32757),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20929), AOM_ICDF(29412), AOM_ICDF(32700), AOM_ICDF(32725),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13377), AOM_ICDF(21495), AOM_ICDF(31216), AOM_ICDF(31569),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9153), AOM_ICDF(15097), AOM_ICDF(28295), AOM_ICDF(28990),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(20289), AOM_ICDF(31164), AOM_ICDF(32745), AOM_ICDF(32755),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17601), AOM_ICDF(29635), AOM_ICDF(32739), AOM_ICDF(32751),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18241), AOM_ICDF(24284), AOM_ICDF(32116), AOM_ICDF(32258),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32739), AOM_ICDF(32740),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20289), AOM_ICDF(31164), AOM_ICDF(32745), AOM_ICDF(32755),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17601), AOM_ICDF(29635), AOM_ICDF(32739), AOM_ICDF(32751),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18241), AOM_ICDF(24284), AOM_ICDF(32116), AOM_ICDF(32258),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32739), AOM_ICDF(32740),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(19009), AOM_ICDF(31481), AOM_ICDF(32742), AOM_ICDF(32754),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15809), AOM_ICDF(30521), AOM_ICDF(32736), AOM_ICDF(32750),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(32705), AOM_ICDF(32737), AOM_ICDF(32753),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(19009), AOM_ICDF(31481), AOM_ICDF(32742), AOM_ICDF(32754),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15809), AOM_ICDF(30521), AOM_ICDF(32736), AOM_ICDF(32750),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(32705), AOM_ICDF(32737), AOM_ICDF(32753),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(11841), AOM_ICDF(32116), AOM_ICDF(32728), AOM_ICDF(32748),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12353), AOM_ICDF(32132), AOM_ICDF(32729), AOM_ICDF(32748),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7489), AOM_ICDF(12435), AOM_ICDF(25708), AOM_ICDF(26666),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
-         AOM_ICDF(32768) } } } }
+        { AOM_ICDF(11841), AOM_ICDF(32116), AOM_ICDF(32728), AOM_ICDF(32748),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12353), AOM_ICDF(32132), AOM_ICDF(32729), AOM_ICDF(32748),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7489), AOM_ICDF(12435), AOM_ICDF(25708), AOM_ICDF(26666),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
+          AOM_ICDF(32768) } } } }
 };
 static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
   {     // Y plane
@@ -4448,70 +1208,70 @@ static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
         { AOM_ICDF(832), AOM_ICDF(5270), AOM_ICDF(5918), AOM_ICDF(12645),
           AOM_ICDF(13532), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(14017), AOM_ICDF(16139), AOM_ICDF(26799), AOM_ICDF(27295),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12737), AOM_ICDF(15136), AOM_ICDF(26235), AOM_ICDF(26816),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10817), AOM_ICDF(12445), AOM_ICDF(23637), AOM_ICDF(24217),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8897), AOM_ICDF(9702), AOM_ICDF(20040), AOM_ICDF(20500),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5953), AOM_ICDF(6156), AOM_ICDF(13966), AOM_ICDF(14205),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2497), AOM_ICDF(2519), AOM_ICDF(6222), AOM_ICDF(6300),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(14017), AOM_ICDF(16139), AOM_ICDF(26799), AOM_ICDF(27295),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12737), AOM_ICDF(15136), AOM_ICDF(26235), AOM_ICDF(26816),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10817), AOM_ICDF(12445), AOM_ICDF(23637), AOM_ICDF(24217),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8897), AOM_ICDF(9702), AOM_ICDF(20040), AOM_ICDF(20500),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5953), AOM_ICDF(6156), AOM_ICDF(13966), AOM_ICDF(14205),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2497), AOM_ICDF(2519), AOM_ICDF(6222), AOM_ICDF(6300),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(19777), AOM_ICDF(21403), AOM_ICDF(30054), AOM_ICDF(30269),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16193), AOM_ICDF(17913), AOM_ICDF(28593), AOM_ICDF(28883),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12609), AOM_ICDF(13572), AOM_ICDF(25248), AOM_ICDF(25534),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9665), AOM_ICDF(10118), AOM_ICDF(20721), AOM_ICDF(20968),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6849), AOM_ICDF(7028), AOM_ICDF(15202), AOM_ICDF(15391),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3009), AOM_ICDF(3036), AOM_ICDF(7601), AOM_ICDF(7675),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(19777), AOM_ICDF(21403), AOM_ICDF(30054), AOM_ICDF(30269),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16193), AOM_ICDF(17913), AOM_ICDF(28593), AOM_ICDF(28883),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12609), AOM_ICDF(13572), AOM_ICDF(25248), AOM_ICDF(25534),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9665), AOM_ICDF(10118), AOM_ICDF(20721), AOM_ICDF(20968),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6849), AOM_ICDF(7028), AOM_ICDF(15202), AOM_ICDF(15391),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3009), AOM_ICDF(3036), AOM_ICDF(7601), AOM_ICDF(7675),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(22593), AOM_ICDF(23915), AOM_ICDF(31159), AOM_ICDF(31283),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17345), AOM_ICDF(18690), AOM_ICDF(29425), AOM_ICDF(29611),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11969), AOM_ICDF(12540), AOM_ICDF(24685), AOM_ICDF(24867),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8129), AOM_ICDF(8355), AOM_ICDF(18668), AOM_ICDF(18819),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4673), AOM_ICDF(4714), AOM_ICDF(11752), AOM_ICDF(11814),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1857), AOM_ICDF(1876), AOM_ICDF(5057), AOM_ICDF(5138),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(22593), AOM_ICDF(23915), AOM_ICDF(31159), AOM_ICDF(31283),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17345), AOM_ICDF(18690), AOM_ICDF(29425), AOM_ICDF(29611),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11969), AOM_ICDF(12540), AOM_ICDF(24685), AOM_ICDF(24867),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8129), AOM_ICDF(8355), AOM_ICDF(18668), AOM_ICDF(18819),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4673), AOM_ICDF(4714), AOM_ICDF(11752), AOM_ICDF(11814),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1857), AOM_ICDF(1876), AOM_ICDF(5057), AOM_ICDF(5138),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(24513), AOM_ICDF(25718), AOM_ICDF(31947), AOM_ICDF(32014),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18881), AOM_ICDF(20029), AOM_ICDF(30409), AOM_ICDF(30527),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12481), AOM_ICDF(12953), AOM_ICDF(25201), AOM_ICDF(25341),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8385), AOM_ICDF(8528), AOM_ICDF(18815), AOM_ICDF(18910),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4289), AOM_ICDF(4327), AOM_ICDF(10797), AOM_ICDF(10861),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1857), AOM_ICDF(1872), AOM_ICDF(4332), AOM_ICDF(4415),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(24513), AOM_ICDF(25718), AOM_ICDF(31947), AOM_ICDF(32014),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18881), AOM_ICDF(20029), AOM_ICDF(30409), AOM_ICDF(30527),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12481), AOM_ICDF(12953), AOM_ICDF(25201), AOM_ICDF(25341),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8385), AOM_ICDF(8528), AOM_ICDF(18815), AOM_ICDF(18910),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4289), AOM_ICDF(4327), AOM_ICDF(10797), AOM_ICDF(10861),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1857), AOM_ICDF(1872), AOM_ICDF(4332), AOM_ICDF(4415),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(26049), AOM_ICDF(27752), AOM_ICDF(32415), AOM_ICDF(32462),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20417), AOM_ICDF(22100), AOM_ICDF(31056), AOM_ICDF(31192),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12481), AOM_ICDF(13075), AOM_ICDF(24646), AOM_ICDF(24844),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7489), AOM_ICDF(7696), AOM_ICDF(17117), AOM_ICDF(17285),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3777), AOM_ICDF(3814), AOM_ICDF(10062), AOM_ICDF(10129),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1473), AOM_ICDF(1486), AOM_ICDF(3735), AOM_ICDF(3820),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(26049), AOM_ICDF(27752), AOM_ICDF(32415), AOM_ICDF(32462),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20417), AOM_ICDF(22100), AOM_ICDF(31056), AOM_ICDF(31192),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12481), AOM_ICDF(13075), AOM_ICDF(24646), AOM_ICDF(24844),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7489), AOM_ICDF(7696), AOM_ICDF(17117), AOM_ICDF(17285),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3777), AOM_ICDF(3814), AOM_ICDF(10062), AOM_ICDF(10129),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1473), AOM_ICDF(1486), AOM_ICDF(3735), AOM_ICDF(3820),
+          AOM_ICDF(32768) } } },
     {   // Intra
       { // Band 0
         { AOM_ICDF(25920), AOM_ICDF(27743), AOM_ICDF(29455), AOM_ICDF(32147),
@@ -4521,70 +1281,70 @@ static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
         { AOM_ICDF(2368), AOM_ICDF(12781), AOM_ICDF(16196), AOM_ICDF(27232),
           AOM_ICDF(27894), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(21697), AOM_ICDF(24758), AOM_ICDF(32358), AOM_ICDF(32417),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20289), AOM_ICDF(23960), AOM_ICDF(32111), AOM_ICDF(32213),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17345), AOM_ICDF(19966), AOM_ICDF(30630), AOM_ICDF(30841),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14529), AOM_ICDF(16070), AOM_ICDF(27461), AOM_ICDF(27777),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9793), AOM_ICDF(10613), AOM_ICDF(21146), AOM_ICDF(21566),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6977), AOM_ICDF(7162), AOM_ICDF(15591), AOM_ICDF(15776),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21697), AOM_ICDF(24758), AOM_ICDF(32358), AOM_ICDF(32417),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20289), AOM_ICDF(23960), AOM_ICDF(32111), AOM_ICDF(32213),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17345), AOM_ICDF(19966), AOM_ICDF(30630), AOM_ICDF(30841),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14529), AOM_ICDF(16070), AOM_ICDF(27461), AOM_ICDF(27777),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9793), AOM_ICDF(10613), AOM_ICDF(21146), AOM_ICDF(21566),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6977), AOM_ICDF(7162), AOM_ICDF(15591), AOM_ICDF(15776),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(23617), AOM_ICDF(26783), AOM_ICDF(32572), AOM_ICDF(32607),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20801), AOM_ICDF(24292), AOM_ICDF(32185), AOM_ICDF(32275),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15169), AOM_ICDF(17905), AOM_ICDF(29916), AOM_ICDF(30181),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(12972), AOM_ICDF(25565), AOM_ICDF(26064),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6849), AOM_ICDF(8334), AOM_ICDF(18543), AOM_ICDF(19446),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3649), AOM_ICDF(4346), AOM_ICDF(12351), AOM_ICDF(13169),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(23617), AOM_ICDF(26783), AOM_ICDF(32572), AOM_ICDF(32607),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20801), AOM_ICDF(24292), AOM_ICDF(32185), AOM_ICDF(32275),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15169), AOM_ICDF(17905), AOM_ICDF(29916), AOM_ICDF(30181),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(12972), AOM_ICDF(25565), AOM_ICDF(26064),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6849), AOM_ICDF(8334), AOM_ICDF(18543), AOM_ICDF(19446),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3649), AOM_ICDF(4346), AOM_ICDF(12351), AOM_ICDF(13169),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(25281), AOM_ICDF(28440), AOM_ICDF(32667), AOM_ICDF(32689),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22081), AOM_ICDF(25694), AOM_ICDF(32414), AOM_ICDF(32476),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15297), AOM_ICDF(18341), AOM_ICDF(30141), AOM_ICDF(30410),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10305), AOM_ICDF(12381), AOM_ICDF(24477), AOM_ICDF(25084),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(6673), AOM_ICDF(16325), AOM_ICDF(17080),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2369), AOM_ICDF(2393), AOM_ICDF(6466), AOM_ICDF(6543),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25281), AOM_ICDF(28440), AOM_ICDF(32667), AOM_ICDF(32689),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22081), AOM_ICDF(25694), AOM_ICDF(32414), AOM_ICDF(32476),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15297), AOM_ICDF(18341), AOM_ICDF(30141), AOM_ICDF(30410),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10305), AOM_ICDF(12381), AOM_ICDF(24477), AOM_ICDF(25084),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(6673), AOM_ICDF(16325), AOM_ICDF(17080),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2369), AOM_ICDF(2393), AOM_ICDF(6466), AOM_ICDF(6543),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(25921), AOM_ICDF(29445), AOM_ICDF(32729), AOM_ICDF(32739),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22465), AOM_ICDF(26834), AOM_ICDF(32588), AOM_ICDF(32627),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(20062), AOM_ICDF(31016), AOM_ICDF(31233),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11073), AOM_ICDF(13165), AOM_ICDF(25353), AOM_ICDF(25896),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11713), AOM_ICDF(13837), AOM_ICDF(20144), AOM_ICDF(21734),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25921), AOM_ICDF(29445), AOM_ICDF(32729), AOM_ICDF(32739),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22465), AOM_ICDF(26834), AOM_ICDF(32588), AOM_ICDF(32627),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(20062), AOM_ICDF(31016), AOM_ICDF(31233),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11073), AOM_ICDF(13165), AOM_ICDF(25353), AOM_ICDF(25896),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11713), AOM_ICDF(13837), AOM_ICDF(20144), AOM_ICDF(21734),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2241), AOM_ICDF(2265), AOM_ICDF(6355), AOM_ICDF(6432),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(26177), AOM_ICDF(29403), AOM_ICDF(32705), AOM_ICDF(32721),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22337), AOM_ICDF(26344), AOM_ICDF(32545), AOM_ICDF(32589),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19009), AOM_ICDF(21527), AOM_ICDF(31775), AOM_ICDF(31873),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11585), AOM_ICDF(12685), AOM_ICDF(22632), AOM_ICDF(23137),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8257), AOM_ICDF(8305), AOM_ICDF(16444), AOM_ICDF(16492),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
-         AOM_ICDF(32768) } } } },
+        { AOM_ICDF(26177), AOM_ICDF(29403), AOM_ICDF(32705), AOM_ICDF(32721),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22337), AOM_ICDF(26344), AOM_ICDF(32545), AOM_ICDF(32589),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19009), AOM_ICDF(21527), AOM_ICDF(31775), AOM_ICDF(31873),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11585), AOM_ICDF(12685), AOM_ICDF(22632), AOM_ICDF(23137),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8257), AOM_ICDF(8305), AOM_ICDF(16444), AOM_ICDF(16492),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
+          AOM_ICDF(32768) } } } },
   {     // UV plane
     {   // Inter
       { // Band 0
@@ -4595,70 +1355,70 @@ static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
         { AOM_ICDF(3776), AOM_ICDF(11778), AOM_ICDF(14700), AOM_ICDF(23745),
           AOM_ICDF(24854), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(20289), AOM_ICDF(25202), AOM_ICDF(31672), AOM_ICDF(31909),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18369), AOM_ICDF(23493), AOM_ICDF(31166), AOM_ICDF(31487),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15425), AOM_ICDF(18619), AOM_ICDF(28941), AOM_ICDF(29393),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(12535), AOM_ICDF(24287), AOM_ICDF(24792),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6465), AOM_ICDF(6810), AOM_ICDF(15764), AOM_ICDF(16080),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2137), AOM_ICDF(6125), AOM_ICDF(6203),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20289), AOM_ICDF(25202), AOM_ICDF(31672), AOM_ICDF(31909),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18369), AOM_ICDF(23493), AOM_ICDF(31166), AOM_ICDF(31487),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15425), AOM_ICDF(18619), AOM_ICDF(28941), AOM_ICDF(29393),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(12535), AOM_ICDF(24287), AOM_ICDF(24792),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6465), AOM_ICDF(6810), AOM_ICDF(15764), AOM_ICDF(16080),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2137), AOM_ICDF(6125), AOM_ICDF(6203),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(23745), AOM_ICDF(27041), AOM_ICDF(31976), AOM_ICDF(32135),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19521), AOM_ICDF(22766), AOM_ICDF(31139), AOM_ICDF(31367),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14273), AOM_ICDF(15834), AOM_ICDF(27820), AOM_ICDF(28105),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9537), AOM_ICDF(10445), AOM_ICDF(22106), AOM_ICDF(22491),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7233), AOM_ICDF(7386), AOM_ICDF(15961), AOM_ICDF(16109),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2369), AOM_ICDF(2401), AOM_ICDF(7891), AOM_ICDF(7964),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(23745), AOM_ICDF(27041), AOM_ICDF(31976), AOM_ICDF(32135),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19521), AOM_ICDF(22766), AOM_ICDF(31139), AOM_ICDF(31367),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14273), AOM_ICDF(15834), AOM_ICDF(27820), AOM_ICDF(28105),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9537), AOM_ICDF(10445), AOM_ICDF(22106), AOM_ICDF(22491),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7233), AOM_ICDF(7386), AOM_ICDF(15961), AOM_ICDF(16109),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2369), AOM_ICDF(2401), AOM_ICDF(7891), AOM_ICDF(7964),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(26305), AOM_ICDF(28703), AOM_ICDF(32352), AOM_ICDF(32435),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20673), AOM_ICDF(23490), AOM_ICDF(31517), AOM_ICDF(31680),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14017), AOM_ICDF(15251), AOM_ICDF(27458), AOM_ICDF(27702),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(11374), AOM_ICDF(22496), AOM_ICDF(22687),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9153), AOM_ICDF(9435), AOM_ICDF(22299), AOM_ICDF(22411),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(269), AOM_ICDF(13236), AOM_ICDF(13293),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26305), AOM_ICDF(28703), AOM_ICDF(32352), AOM_ICDF(32435),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20673), AOM_ICDF(23490), AOM_ICDF(31517), AOM_ICDF(31680),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14017), AOM_ICDF(15251), AOM_ICDF(27458), AOM_ICDF(27702),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(11374), AOM_ICDF(22496), AOM_ICDF(22687),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9153), AOM_ICDF(9435), AOM_ICDF(22299), AOM_ICDF(22411),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(269), AOM_ICDF(13236), AOM_ICDF(13293),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(27713), AOM_ICDF(29770), AOM_ICDF(32522), AOM_ICDF(32575),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21569), AOM_ICDF(24342), AOM_ICDF(31785), AOM_ICDF(31919),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15297), AOM_ICDF(16497), AOM_ICDF(28367), AOM_ICDF(28569),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17601), AOM_ICDF(17828), AOM_ICDF(24444), AOM_ICDF(24582),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6977), AOM_ICDF(7035), AOM_ICDF(16901), AOM_ICDF(16947),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27713), AOM_ICDF(29770), AOM_ICDF(32522), AOM_ICDF(32575),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21569), AOM_ICDF(24342), AOM_ICDF(31785), AOM_ICDF(31919),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15297), AOM_ICDF(16497), AOM_ICDF(28367), AOM_ICDF(28569),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17601), AOM_ICDF(17828), AOM_ICDF(24444), AOM_ICDF(24582),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6977), AOM_ICDF(7035), AOM_ICDF(16901), AOM_ICDF(16947),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(28737), AOM_ICDF(30879), AOM_ICDF(32667), AOM_ICDF(32695),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22593), AOM_ICDF(26241), AOM_ICDF(32073), AOM_ICDF(32207),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16577), AOM_ICDF(19148), AOM_ICDF(28436), AOM_ICDF(28906),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12993), AOM_ICDF(14005), AOM_ICDF(23151), AOM_ICDF(23630),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7617), AOM_ICDF(9188), AOM_ICDF(22797), AOM_ICDF(23313),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(28737), AOM_ICDF(30879), AOM_ICDF(32667), AOM_ICDF(32695),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22593), AOM_ICDF(26241), AOM_ICDF(32073), AOM_ICDF(32207),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16577), AOM_ICDF(19148), AOM_ICDF(28436), AOM_ICDF(28906),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12993), AOM_ICDF(14005), AOM_ICDF(23151), AOM_ICDF(23630),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7617), AOM_ICDF(9188), AOM_ICDF(22797), AOM_ICDF(23313),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
+          AOM_ICDF(32768) } } },
     {   // Inter
       { // Band 0
         { AOM_ICDF(28864), AOM_ICDF(29988), AOM_ICDF(32423), AOM_ICDF(32766),
@@ -4668,70 +1428,70 @@ static const coeff_cdf_model default_coef_head_cdf_8x8[PLANE_TYPES] = {
         { AOM_ICDF(5440), AOM_ICDF(19618), AOM_ICDF(25332), AOM_ICDF(32393),
           AOM_ICDF(32491), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(23745), AOM_ICDF(29427), AOM_ICDF(32751), AOM_ICDF(32757),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23745), AOM_ICDF(28704), AOM_ICDF(32716), AOM_ICDF(32731),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23105), AOM_ICDF(27943), AOM_ICDF(32524), AOM_ICDF(32587),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21057), AOM_ICDF(24773), AOM_ICDF(29589), AOM_ICDF(30282),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12609), AOM_ICDF(14823), AOM_ICDF(23831), AOM_ICDF(24713),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(23745), AOM_ICDF(29427), AOM_ICDF(32751), AOM_ICDF(32757),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23745), AOM_ICDF(28704), AOM_ICDF(32716), AOM_ICDF(32731),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23105), AOM_ICDF(27943), AOM_ICDF(32524), AOM_ICDF(32587),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21057), AOM_ICDF(24773), AOM_ICDF(29589), AOM_ICDF(30282),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12609), AOM_ICDF(14823), AOM_ICDF(23831), AOM_ICDF(24713),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(25025), AOM_ICDF(30203), AOM_ICDF(32754), AOM_ICDF(32759),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23617), AOM_ICDF(28361), AOM_ICDF(32715), AOM_ICDF(32729),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17985), AOM_ICDF(21562), AOM_ICDF(31354), AOM_ICDF(31543),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12353), AOM_ICDF(18915), AOM_ICDF(28742), AOM_ICDF(29548),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(289), AOM_ICDF(16545), AOM_ICDF(16593),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25025), AOM_ICDF(30203), AOM_ICDF(32754), AOM_ICDF(32759),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23617), AOM_ICDF(28361), AOM_ICDF(32715), AOM_ICDF(32729),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17985), AOM_ICDF(21562), AOM_ICDF(31354), AOM_ICDF(31543),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12353), AOM_ICDF(18915), AOM_ICDF(28742), AOM_ICDF(29548),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(289), AOM_ICDF(16545), AOM_ICDF(16593),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(26433), AOM_ICDF(30892), AOM_ICDF(32757), AOM_ICDF(32761),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24513), AOM_ICDF(29274), AOM_ICDF(32721), AOM_ICDF(32735),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20161), AOM_ICDF(24040), AOM_ICDF(32055), AOM_ICDF(32171),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21953), AOM_ICDF(24678), AOM_ICDF(27382), AOM_ICDF(28734),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26433), AOM_ICDF(30892), AOM_ICDF(32757), AOM_ICDF(32761),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24513), AOM_ICDF(29274), AOM_ICDF(32721), AOM_ICDF(32735),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20161), AOM_ICDF(24040), AOM_ICDF(32055), AOM_ICDF(32171),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21953), AOM_ICDF(24678), AOM_ICDF(27382), AOM_ICDF(28734),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(27457), AOM_ICDF(31485), AOM_ICDF(32759), AOM_ICDF(32763),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24129), AOM_ICDF(29502), AOM_ICDF(32752), AOM_ICDF(32757),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19009), AOM_ICDF(25452), AOM_ICDF(32473), AOM_ICDF(32544),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32737), AOM_ICDF(32738),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27457), AOM_ICDF(31485), AOM_ICDF(32759), AOM_ICDF(32763),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24129), AOM_ICDF(29502), AOM_ICDF(32752), AOM_ICDF(32757),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19009), AOM_ICDF(25452), AOM_ICDF(32473), AOM_ICDF(32544),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(32705), AOM_ICDF(32706), AOM_ICDF(32737), AOM_ICDF(32738),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(27841), AOM_ICDF(32288), AOM_ICDF(32759), AOM_ICDF(32764),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19137), AOM_ICDF(30271), AOM_ICDF(32742), AOM_ICDF(32753),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18625), AOM_ICDF(27739), AOM_ICDF(29979), AOM_ICDF(31099),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
-         AOM_ICDF(32768) } } } }
+        { AOM_ICDF(27841), AOM_ICDF(32288), AOM_ICDF(32759), AOM_ICDF(32764),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19137), AOM_ICDF(30271), AOM_ICDF(32742), AOM_ICDF(32753),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18625), AOM_ICDF(27739), AOM_ICDF(29979), AOM_ICDF(31099),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
+          AOM_ICDF(32768) } } } }
 };
 static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
   {     // Y plane
@@ -4744,70 +1504,70 @@ static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
         { AOM_ICDF(192), AOM_ICDF(3443), AOM_ICDF(3759), AOM_ICDF(9011),
           AOM_ICDF(9685), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(12481), AOM_ICDF(13958), AOM_ICDF(24487), AOM_ICDF(24997),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11457), AOM_ICDF(13075), AOM_ICDF(23820), AOM_ICDF(24406),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9793), AOM_ICDF(11127), AOM_ICDF(21775), AOM_ICDF(22387),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7745), AOM_ICDF(8457), AOM_ICDF(18155), AOM_ICDF(18655),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5441), AOM_ICDF(5668), AOM_ICDF(13180), AOM_ICDF(13467),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2497), AOM_ICDF(2520), AOM_ICDF(6340), AOM_ICDF(6417),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(12481), AOM_ICDF(13958), AOM_ICDF(24487), AOM_ICDF(24997),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11457), AOM_ICDF(13075), AOM_ICDF(23820), AOM_ICDF(24406),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9793), AOM_ICDF(11127), AOM_ICDF(21775), AOM_ICDF(22387),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7745), AOM_ICDF(8457), AOM_ICDF(18155), AOM_ICDF(18655),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5441), AOM_ICDF(5668), AOM_ICDF(13180), AOM_ICDF(13467),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2497), AOM_ICDF(2520), AOM_ICDF(6340), AOM_ICDF(6417),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(19521), AOM_ICDF(20572), AOM_ICDF(28965), AOM_ICDF(29177),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15425), AOM_ICDF(16741), AOM_ICDF(27247), AOM_ICDF(27554),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11969), AOM_ICDF(12690), AOM_ICDF(23872), AOM_ICDF(24141),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9281), AOM_ICDF(9678), AOM_ICDF(19970), AOM_ICDF(20207),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6081), AOM_ICDF(6266), AOM_ICDF(14682), AOM_ICDF(14876),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(2779), AOM_ICDF(7150), AOM_ICDF(7225),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(19521), AOM_ICDF(20572), AOM_ICDF(28965), AOM_ICDF(29177),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15425), AOM_ICDF(16741), AOM_ICDF(27247), AOM_ICDF(27554),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11969), AOM_ICDF(12690), AOM_ICDF(23872), AOM_ICDF(24141),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9281), AOM_ICDF(9678), AOM_ICDF(19970), AOM_ICDF(20207),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6081), AOM_ICDF(6266), AOM_ICDF(14682), AOM_ICDF(14876),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(2779), AOM_ICDF(7150), AOM_ICDF(7225),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(22337), AOM_ICDF(23293), AOM_ICDF(30630), AOM_ICDF(30753),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16321), AOM_ICDF(17427), AOM_ICDF(28368), AOM_ICDF(28570),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11457), AOM_ICDF(11907), AOM_ICDF(23570), AOM_ICDF(23741),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7233), AOM_ICDF(7331), AOM_ICDF(17258), AOM_ICDF(17334),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1601), AOM_ICDF(1619), AOM_ICDF(4706), AOM_ICDF(4788),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(22337), AOM_ICDF(23293), AOM_ICDF(30630), AOM_ICDF(30753),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16321), AOM_ICDF(17427), AOM_ICDF(28368), AOM_ICDF(28570),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11457), AOM_ICDF(11907), AOM_ICDF(23570), AOM_ICDF(23741),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7233), AOM_ICDF(7331), AOM_ICDF(17258), AOM_ICDF(17334),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1601), AOM_ICDF(1619), AOM_ICDF(4706), AOM_ICDF(4788),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(24769), AOM_ICDF(25536), AOM_ICDF(31660), AOM_ICDF(31722),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18113), AOM_ICDF(18886), AOM_ICDF(29420), AOM_ICDF(29534),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11201), AOM_ICDF(11412), AOM_ICDF(23207), AOM_ICDF(23291),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6977), AOM_ICDF(7033), AOM_ICDF(16599), AOM_ICDF(16646),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1601), AOM_ICDF(1620), AOM_ICDF(4827), AOM_ICDF(4909),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(24769), AOM_ICDF(25536), AOM_ICDF(31660), AOM_ICDF(31722),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18113), AOM_ICDF(18886), AOM_ICDF(29420), AOM_ICDF(29534),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11201), AOM_ICDF(11412), AOM_ICDF(23207), AOM_ICDF(23291),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6977), AOM_ICDF(7033), AOM_ICDF(16599), AOM_ICDF(16646),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4033), AOM_ICDF(4070), AOM_ICDF(10375), AOM_ICDF(10441),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1601), AOM_ICDF(1620), AOM_ICDF(4827), AOM_ICDF(4909),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(28353), AOM_ICDF(28831), AOM_ICDF(32502), AOM_ICDF(32517),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21441), AOM_ICDF(21869), AOM_ICDF(30977), AOM_ICDF(31017),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11969), AOM_ICDF(12088), AOM_ICDF(24116), AOM_ICDF(24158),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7489), AOM_ICDF(7547), AOM_ICDF(17413), AOM_ICDF(17458),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4545), AOM_ICDF(4585), AOM_ICDF(11325), AOM_ICDF(11388),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2133), AOM_ICDF(5526), AOM_ICDF(5606),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(28353), AOM_ICDF(28831), AOM_ICDF(32502), AOM_ICDF(32517),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21441), AOM_ICDF(21869), AOM_ICDF(30977), AOM_ICDF(31017),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11969), AOM_ICDF(12088), AOM_ICDF(24116), AOM_ICDF(24158),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7489), AOM_ICDF(7547), AOM_ICDF(17413), AOM_ICDF(17458),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4545), AOM_ICDF(4585), AOM_ICDF(11325), AOM_ICDF(11388),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2133), AOM_ICDF(5526), AOM_ICDF(5606),
+          AOM_ICDF(32768) } } },
     {   // Intra
       { // Band 0
         { AOM_ICDF(2496), AOM_ICDF(8717), AOM_ICDF(17280), AOM_ICDF(28922),
@@ -4817,70 +1577,70 @@ static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
         { AOM_ICDF(448), AOM_ICDF(9240), AOM_ICDF(11886), AOM_ICDF(24124),
           AOM_ICDF(24898), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(21057), AOM_ICDF(22896), AOM_ICDF(31877), AOM_ICDF(31953),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20673), AOM_ICDF(23151), AOM_ICDF(31706), AOM_ICDF(31825),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18753), AOM_ICDF(20519), AOM_ICDF(30497), AOM_ICDF(30668),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15425), AOM_ICDF(16608), AOM_ICDF(27789), AOM_ICDF(28027),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10305), AOM_ICDF(10977), AOM_ICDF(21405), AOM_ICDF(21749),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3649), AOM_ICDF(3812), AOM_ICDF(11213), AOM_ICDF(11445),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(21057), AOM_ICDF(22896), AOM_ICDF(31877), AOM_ICDF(31953),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20673), AOM_ICDF(23151), AOM_ICDF(31706), AOM_ICDF(31825),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18753), AOM_ICDF(20519), AOM_ICDF(30497), AOM_ICDF(30668),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15425), AOM_ICDF(16608), AOM_ICDF(27789), AOM_ICDF(28027),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10305), AOM_ICDF(10977), AOM_ICDF(21405), AOM_ICDF(21749),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3649), AOM_ICDF(3812), AOM_ICDF(11213), AOM_ICDF(11445),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(24001), AOM_ICDF(25899), AOM_ICDF(32307), AOM_ICDF(32360),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20929), AOM_ICDF(22941), AOM_ICDF(31775), AOM_ICDF(31867),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15169), AOM_ICDF(16734), AOM_ICDF(29228), AOM_ICDF(29425),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10561), AOM_ICDF(12047), AOM_ICDF(24918), AOM_ICDF(25324),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6977), AOM_ICDF(7929), AOM_ICDF(18311), AOM_ICDF(18918),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3649), AOM_ICDF(3760), AOM_ICDF(9962), AOM_ICDF(10162),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(24001), AOM_ICDF(25899), AOM_ICDF(32307), AOM_ICDF(32360),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20929), AOM_ICDF(22941), AOM_ICDF(31775), AOM_ICDF(31867),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15169), AOM_ICDF(16734), AOM_ICDF(29228), AOM_ICDF(29425),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10561), AOM_ICDF(12047), AOM_ICDF(24918), AOM_ICDF(25324),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6977), AOM_ICDF(7929), AOM_ICDF(18311), AOM_ICDF(18918),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3649), AOM_ICDF(3760), AOM_ICDF(9962), AOM_ICDF(10162),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(25793), AOM_ICDF(27526), AOM_ICDF(32565), AOM_ICDF(32591),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21825), AOM_ICDF(23885), AOM_ICDF(32064), AOM_ICDF(32135),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15041), AOM_ICDF(16286), AOM_ICDF(29203), AOM_ICDF(29360),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10433), AOM_ICDF(11058), AOM_ICDF(24349), AOM_ICDF(24538),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5569), AOM_ICDF(6016), AOM_ICDF(16460), AOM_ICDF(16794),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25793), AOM_ICDF(27526), AOM_ICDF(32565), AOM_ICDF(32591),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21825), AOM_ICDF(23885), AOM_ICDF(32064), AOM_ICDF(32135),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15041), AOM_ICDF(16286), AOM_ICDF(29203), AOM_ICDF(29360),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10433), AOM_ICDF(11058), AOM_ICDF(24349), AOM_ICDF(24538),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5569), AOM_ICDF(6016), AOM_ICDF(16460), AOM_ICDF(16794),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(26433), AOM_ICDF(28398), AOM_ICDF(32682), AOM_ICDF(32696),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22977), AOM_ICDF(25086), AOM_ICDF(32367), AOM_ICDF(32412),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16577), AOM_ICDF(17928), AOM_ICDF(30144), AOM_ICDF(30275),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12481), AOM_ICDF(13352), AOM_ICDF(25993), AOM_ICDF(26211),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7745), AOM_ICDF(8069), AOM_ICDF(20501), AOM_ICDF(20657),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26433), AOM_ICDF(28398), AOM_ICDF(32682), AOM_ICDF(32696),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22977), AOM_ICDF(25086), AOM_ICDF(32367), AOM_ICDF(32412),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16577), AOM_ICDF(17928), AOM_ICDF(30144), AOM_ICDF(30275),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12481), AOM_ICDF(13352), AOM_ICDF(25993), AOM_ICDF(26211),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7745), AOM_ICDF(8069), AOM_ICDF(20501), AOM_ICDF(20657),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(27841), AOM_ICDF(29700), AOM_ICDF(32721), AOM_ICDF(32730),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23873), AOM_ICDF(26202), AOM_ICDF(32578), AOM_ICDF(32604),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17729), AOM_ICDF(19046), AOM_ICDF(30448), AOM_ICDF(30568),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13505), AOM_ICDF(14508), AOM_ICDF(26034), AOM_ICDF(26304),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10049), AOM_ICDF(10494), AOM_ICDF(19945), AOM_ICDF(20233),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
-         AOM_ICDF(32768) } } } },
+        { AOM_ICDF(27841), AOM_ICDF(29700), AOM_ICDF(32721), AOM_ICDF(32730),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23873), AOM_ICDF(26202), AOM_ICDF(32578), AOM_ICDF(32604),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17729), AOM_ICDF(19046), AOM_ICDF(30448), AOM_ICDF(30568),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13505), AOM_ICDF(14508), AOM_ICDF(26034), AOM_ICDF(26304),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10049), AOM_ICDF(10494), AOM_ICDF(19945), AOM_ICDF(20233),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
+          AOM_ICDF(32768) } } } },
   {     // UV plane
     {   // Inter
       { // Band 0
@@ -4891,70 +1651,70 @@ static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
         { AOM_ICDF(2880), AOM_ICDF(9023), AOM_ICDF(11126), AOM_ICDF(20602),
           AOM_ICDF(21713), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(20161), AOM_ICDF(24785), AOM_ICDF(31070), AOM_ICDF(31430),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17985), AOM_ICDF(22773), AOM_ICDF(30430), AOM_ICDF(30880),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15937), AOM_ICDF(18802), AOM_ICDF(28265), AOM_ICDF(28788),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11841), AOM_ICDF(13587), AOM_ICDF(24798), AOM_ICDF(25335),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8769), AOM_ICDF(9160), AOM_ICDF(19316), AOM_ICDF(19566),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5357), AOM_ICDF(12874), AOM_ICDF(12932),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20161), AOM_ICDF(24785), AOM_ICDF(31070), AOM_ICDF(31430),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17985), AOM_ICDF(22773), AOM_ICDF(30430), AOM_ICDF(30880),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15937), AOM_ICDF(18802), AOM_ICDF(28265), AOM_ICDF(28788),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11841), AOM_ICDF(13587), AOM_ICDF(24798), AOM_ICDF(25335),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8769), AOM_ICDF(9160), AOM_ICDF(19316), AOM_ICDF(19566),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5357), AOM_ICDF(12874), AOM_ICDF(12932),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(24129), AOM_ICDF(26501), AOM_ICDF(31672), AOM_ICDF(31844),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19649), AOM_ICDF(21553), AOM_ICDF(30130), AOM_ICDF(30370),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11713), AOM_ICDF(13134), AOM_ICDF(25983), AOM_ICDF(26321),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9409), AOM_ICDF(9948), AOM_ICDF(21408), AOM_ICDF(21663),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5569), AOM_ICDF(5757), AOM_ICDF(14335), AOM_ICDF(14533),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2241), AOM_ICDF(2305), AOM_ICDF(13152), AOM_ICDF(13209),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(24129), AOM_ICDF(26501), AOM_ICDF(31672), AOM_ICDF(31844),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19649), AOM_ICDF(21553), AOM_ICDF(30130), AOM_ICDF(30370),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11713), AOM_ICDF(13134), AOM_ICDF(25983), AOM_ICDF(26321),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9409), AOM_ICDF(9948), AOM_ICDF(21408), AOM_ICDF(21663),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5569), AOM_ICDF(5757), AOM_ICDF(14335), AOM_ICDF(14533),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2241), AOM_ICDF(2305), AOM_ICDF(13152), AOM_ICDF(13209),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(26817), AOM_ICDF(28135), AOM_ICDF(32130), AOM_ICDF(32209),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20161), AOM_ICDF(21412), AOM_ICDF(30331), AOM_ICDF(30481),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13377), AOM_ICDF(13798), AOM_ICDF(26065), AOM_ICDF(26176),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8129), AOM_ICDF(8290), AOM_ICDF(19920), AOM_ICDF(20008),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(5751), AOM_ICDF(14950), AOM_ICDF(15002),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5569), AOM_ICDF(5601), AOM_ICDF(11041), AOM_ICDF(11105),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26817), AOM_ICDF(28135), AOM_ICDF(32130), AOM_ICDF(32209),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20161), AOM_ICDF(21412), AOM_ICDF(30331), AOM_ICDF(30481),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13377), AOM_ICDF(13798), AOM_ICDF(26065), AOM_ICDF(26176),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8129), AOM_ICDF(8290), AOM_ICDF(19920), AOM_ICDF(20008),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(5751), AOM_ICDF(14950), AOM_ICDF(15002),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5569), AOM_ICDF(5601), AOM_ICDF(11041), AOM_ICDF(11105),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(28225), AOM_ICDF(29079), AOM_ICDF(32387), AOM_ICDF(32426),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21185), AOM_ICDF(22046), AOM_ICDF(30982), AOM_ICDF(31061),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13377), AOM_ICDF(13595), AOM_ICDF(25762), AOM_ICDF(25824),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8001), AOM_ICDF(8123), AOM_ICDF(20530), AOM_ICDF(20590),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4289), AOM_ICDF(4322), AOM_ICDF(9907), AOM_ICDF(9974),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3393), AOM_ICDF(3412), AOM_ICDF(6663), AOM_ICDF(6739),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(28225), AOM_ICDF(29079), AOM_ICDF(32387), AOM_ICDF(32426),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21185), AOM_ICDF(22046), AOM_ICDF(30982), AOM_ICDF(31061),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13377), AOM_ICDF(13595), AOM_ICDF(25762), AOM_ICDF(25824),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8001), AOM_ICDF(8123), AOM_ICDF(20530), AOM_ICDF(20590),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4289), AOM_ICDF(4322), AOM_ICDF(9907), AOM_ICDF(9974),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3393), AOM_ICDF(3412), AOM_ICDF(6663), AOM_ICDF(6739),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(30529), AOM_ICDF(31014), AOM_ICDF(32651), AOM_ICDF(32664),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23489), AOM_ICDF(24268), AOM_ICDF(31627), AOM_ICDF(31682),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(14017), AOM_ICDF(14239), AOM_ICDF(26653), AOM_ICDF(26707),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11201), AOM_ICDF(11317), AOM_ICDF(23122), AOM_ICDF(23169),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6721), AOM_ICDF(6768), AOM_ICDF(14810), AOM_ICDF(14863),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6593), AOM_ICDF(6632), AOM_ICDF(13188), AOM_ICDF(13245),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(30529), AOM_ICDF(31014), AOM_ICDF(32651), AOM_ICDF(32664),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23489), AOM_ICDF(24268), AOM_ICDF(31627), AOM_ICDF(31682),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(14017), AOM_ICDF(14239), AOM_ICDF(26653), AOM_ICDF(26707),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11201), AOM_ICDF(11317), AOM_ICDF(23122), AOM_ICDF(23169),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6721), AOM_ICDF(6768), AOM_ICDF(14810), AOM_ICDF(14863),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6593), AOM_ICDF(6632), AOM_ICDF(13188), AOM_ICDF(13245),
+          AOM_ICDF(32768) } } },
     {   // Inter
       { // Band 0
         { AOM_ICDF(29888), AOM_ICDF(30492), AOM_ICDF(32500), AOM_ICDF(32766),
@@ -4964,70 +1724,70 @@ static const coeff_cdf_model default_coef_head_cdf_16x16[PLANE_TYPES] = {
         { AOM_ICDF(5568), AOM_ICDF(17762), AOM_ICDF(25039), AOM_ICDF(31213),
           AOM_ICDF(31651), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(26433), AOM_ICDF(29681), AOM_ICDF(32757), AOM_ICDF(32760),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24769), AOM_ICDF(28761), AOM_ICDF(32722), AOM_ICDF(32734),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22209), AOM_ICDF(26975), AOM_ICDF(32418), AOM_ICDF(32500),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16321), AOM_ICDF(21333), AOM_ICDF(28368), AOM_ICDF(29283),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12865), AOM_ICDF(14775), AOM_ICDF(22545), AOM_ICDF(23553),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12353), AOM_ICDF(12354), AOM_ICDF(12473), AOM_ICDF(12532),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26433), AOM_ICDF(29681), AOM_ICDF(32757), AOM_ICDF(32760),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24769), AOM_ICDF(28761), AOM_ICDF(32722), AOM_ICDF(32734),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22209), AOM_ICDF(26975), AOM_ICDF(32418), AOM_ICDF(32500),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16321), AOM_ICDF(21333), AOM_ICDF(28368), AOM_ICDF(29283),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12865), AOM_ICDF(14775), AOM_ICDF(22545), AOM_ICDF(23553),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12353), AOM_ICDF(12354), AOM_ICDF(12473), AOM_ICDF(12532),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(27457), AOM_ICDF(30005), AOM_ICDF(32738), AOM_ICDF(32745),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24897), AOM_ICDF(27541), AOM_ICDF(32723), AOM_ICDF(32731),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15297), AOM_ICDF(19106), AOM_ICDF(30414), AOM_ICDF(30711),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6593), AOM_ICDF(8826), AOM_ICDF(19732), AOM_ICDF(20840),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4161), AOM_ICDF(4233), AOM_ICDF(16509), AOM_ICDF(16557),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27457), AOM_ICDF(30005), AOM_ICDF(32738), AOM_ICDF(32745),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24897), AOM_ICDF(27541), AOM_ICDF(32723), AOM_ICDF(32731),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15297), AOM_ICDF(19106), AOM_ICDF(30414), AOM_ICDF(30711),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6593), AOM_ICDF(8826), AOM_ICDF(19732), AOM_ICDF(20840),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4161), AOM_ICDF(4233), AOM_ICDF(16509), AOM_ICDF(16557),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(28609), AOM_ICDF(30482), AOM_ICDF(32761), AOM_ICDF(32763),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(25665), AOM_ICDF(27830), AOM_ICDF(32727), AOM_ICDF(32733),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21057), AOM_ICDF(23803), AOM_ICDF(30367), AOM_ICDF(30721),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(21878), AOM_ICDF(32726), AOM_ICDF(32737),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(28609), AOM_ICDF(30482), AOM_ICDF(32761), AOM_ICDF(32763),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(25665), AOM_ICDF(27830), AOM_ICDF(32727), AOM_ICDF(32733),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21057), AOM_ICDF(23803), AOM_ICDF(30367), AOM_ICDF(30721),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(21878), AOM_ICDF(32726), AOM_ICDF(32737),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(5750), AOM_ICDF(14739), AOM_ICDF(14792),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(28993), AOM_ICDF(30944), AOM_ICDF(32762), AOM_ICDF(32764),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(26561), AOM_ICDF(28695), AOM_ICDF(32733), AOM_ICDF(32739),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17985), AOM_ICDF(19028), AOM_ICDF(31008), AOM_ICDF(31079),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(28993), AOM_ICDF(30944), AOM_ICDF(32762), AOM_ICDF(32764),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(26561), AOM_ICDF(28695), AOM_ICDF(32733), AOM_ICDF(32739),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17985), AOM_ICDF(19028), AOM_ICDF(31008), AOM_ICDF(31079),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7873), AOM_ICDF(8039), AOM_ICDF(19981), AOM_ICDF(20068),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5366), AOM_ICDF(14376), AOM_ICDF(14430),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(30273), AOM_ICDF(32029), AOM_ICDF(32764), AOM_ICDF(32766),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(28609), AOM_ICDF(30847), AOM_ICDF(32745), AOM_ICDF(32751),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21313), AOM_ICDF(24377), AOM_ICDF(31986), AOM_ICDF(32098),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(32705), AOM_ICDF(32709), AOM_ICDF(32739), AOM_ICDF(32741),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
-         AOM_ICDF(32768) } } } }
+        { AOM_ICDF(30273), AOM_ICDF(32029), AOM_ICDF(32764), AOM_ICDF(32766),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(28609), AOM_ICDF(30847), AOM_ICDF(32745), AOM_ICDF(32751),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21313), AOM_ICDF(24377), AOM_ICDF(31986), AOM_ICDF(32098),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(32705), AOM_ICDF(32709), AOM_ICDF(32739), AOM_ICDF(32741),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
+          AOM_ICDF(32768) } } } }
 };
 static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
   {     // Y plane
@@ -5040,70 +1800,70 @@ static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
         { AOM_ICDF(192), AOM_ICDF(3061), AOM_ICDF(3557), AOM_ICDF(8701),
           AOM_ICDF(9762), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(11969), AOM_ICDF(15846), AOM_ICDF(25660), AOM_ICDF(26667),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11713), AOM_ICDF(15794), AOM_ICDF(25737), AOM_ICDF(26760),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9281), AOM_ICDF(12675), AOM_ICDF(23181), AOM_ICDF(24351),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7105), AOM_ICDF(8757), AOM_ICDF(18383), AOM_ICDF(19437),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4289), AOM_ICDF(4579), AOM_ICDF(11353), AOM_ICDF(11792),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1857), AOM_ICDF(1874), AOM_ICDF(4695), AOM_ICDF(4777),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(11969), AOM_ICDF(15846), AOM_ICDF(25660), AOM_ICDF(26667),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11713), AOM_ICDF(15794), AOM_ICDF(25737), AOM_ICDF(26760),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9281), AOM_ICDF(12675), AOM_ICDF(23181), AOM_ICDF(24351),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7105), AOM_ICDF(8757), AOM_ICDF(18383), AOM_ICDF(19437),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4289), AOM_ICDF(4579), AOM_ICDF(11353), AOM_ICDF(11792),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1857), AOM_ICDF(1874), AOM_ICDF(4695), AOM_ICDF(4777),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(20929), AOM_ICDF(22297), AOM_ICDF(29370), AOM_ICDF(29646),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17473), AOM_ICDF(18985), AOM_ICDF(28079), AOM_ICDF(28413),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13121), AOM_ICDF(14064), AOM_ICDF(24902), AOM_ICDF(25217),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9793), AOM_ICDF(10214), AOM_ICDF(20069), AOM_ICDF(20329),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5825), AOM_ICDF(5987), AOM_ICDF(13350), AOM_ICDF(13559),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2241), AOM_ICDF(2260), AOM_ICDF(5520), AOM_ICDF(5600),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(20929), AOM_ICDF(22297), AOM_ICDF(29370), AOM_ICDF(29646),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17473), AOM_ICDF(18985), AOM_ICDF(28079), AOM_ICDF(28413),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13121), AOM_ICDF(14064), AOM_ICDF(24902), AOM_ICDF(25217),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9793), AOM_ICDF(10214), AOM_ICDF(20069), AOM_ICDF(20329),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5825), AOM_ICDF(5987), AOM_ICDF(13350), AOM_ICDF(13559),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2241), AOM_ICDF(2260), AOM_ICDF(5520), AOM_ICDF(5600),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(25921), AOM_ICDF(26891), AOM_ICDF(31632), AOM_ICDF(31729),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(18241), AOM_ICDF(19463), AOM_ICDF(29222), AOM_ICDF(29419),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11585), AOM_ICDF(12065), AOM_ICDF(23294), AOM_ICDF(23488),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6593), AOM_ICDF(6686), AOM_ICDF(16153), AOM_ICDF(16234),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3137), AOM_ICDF(3170), AOM_ICDF(8751), AOM_ICDF(8821),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1345), AOM_ICDF(1359), AOM_ICDF(3739), AOM_ICDF(3824),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25921), AOM_ICDF(26891), AOM_ICDF(31632), AOM_ICDF(31729),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(18241), AOM_ICDF(19463), AOM_ICDF(29222), AOM_ICDF(29419),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11585), AOM_ICDF(12065), AOM_ICDF(23294), AOM_ICDF(23488),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6593), AOM_ICDF(6686), AOM_ICDF(16153), AOM_ICDF(16234),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3137), AOM_ICDF(3170), AOM_ICDF(8751), AOM_ICDF(8821),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1345), AOM_ICDF(1359), AOM_ICDF(3739), AOM_ICDF(3824),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(27713), AOM_ICDF(28504), AOM_ICDF(32068), AOM_ICDF(32132),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19265), AOM_ICDF(20354), AOM_ICDF(29789), AOM_ICDF(29943),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11201), AOM_ICDF(11538), AOM_ICDF(22701), AOM_ICDF(22848),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6337), AOM_ICDF(6424), AOM_ICDF(15268), AOM_ICDF(15353),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3649), AOM_ICDF(3681), AOM_ICDF(9052), AOM_ICDF(9121),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1601), AOM_ICDF(1618), AOM_ICDF(4584), AOM_ICDF(4667),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27713), AOM_ICDF(28504), AOM_ICDF(32068), AOM_ICDF(32132),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19265), AOM_ICDF(20354), AOM_ICDF(29789), AOM_ICDF(29943),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11201), AOM_ICDF(11538), AOM_ICDF(22701), AOM_ICDF(22848),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6337), AOM_ICDF(6424), AOM_ICDF(15268), AOM_ICDF(15353),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3649), AOM_ICDF(3681), AOM_ICDF(9052), AOM_ICDF(9121),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1601), AOM_ICDF(1618), AOM_ICDF(4584), AOM_ICDF(4667),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(30913), AOM_ICDF(31044), AOM_ICDF(32635), AOM_ICDF(32640),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22081), AOM_ICDF(22261), AOM_ICDF(30452), AOM_ICDF(30477),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10561), AOM_ICDF(10625), AOM_ICDF(21535), AOM_ICDF(21568),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6081), AOM_ICDF(6130), AOM_ICDF(14369), AOM_ICDF(14423),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3777), AOM_ICDF(3809), AOM_ICDF(9156), AOM_ICDF(9225),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1857), AOM_ICDF(1875), AOM_ICDF(4936), AOM_ICDF(5018),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(30913), AOM_ICDF(31044), AOM_ICDF(32635), AOM_ICDF(32640),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22081), AOM_ICDF(22261), AOM_ICDF(30452), AOM_ICDF(30477),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10561), AOM_ICDF(10625), AOM_ICDF(21535), AOM_ICDF(21568),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6081), AOM_ICDF(6130), AOM_ICDF(14369), AOM_ICDF(14423),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3777), AOM_ICDF(3809), AOM_ICDF(9156), AOM_ICDF(9225),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1857), AOM_ICDF(1875), AOM_ICDF(4936), AOM_ICDF(5018),
+          AOM_ICDF(32768) } } },
     {   // Intra
       { // Band 0
         { AOM_ICDF(4672), AOM_ICDF(6927), AOM_ICDF(23534), AOM_ICDF(29846),
@@ -5113,70 +1873,70 @@ static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
         { AOM_ICDF(1344), AOM_ICDF(5588), AOM_ICDF(12166), AOM_ICDF(20966),
           AOM_ICDF(23504), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(19393), AOM_ICDF(22016), AOM_ICDF(31280), AOM_ICDF(31444),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21185), AOM_ICDF(24329), AOM_ICDF(31706), AOM_ICDF(31865),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20673), AOM_ICDF(23240), AOM_ICDF(31186), AOM_ICDF(31379),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17857), AOM_ICDF(20035), AOM_ICDF(29594), AOM_ICDF(29889),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13633), AOM_ICDF(14929), AOM_ICDF(24883), AOM_ICDF(25337),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7873), AOM_ICDF(8416), AOM_ICDF(17452), AOM_ICDF(17886),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(19393), AOM_ICDF(22016), AOM_ICDF(31280), AOM_ICDF(31444),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21185), AOM_ICDF(24329), AOM_ICDF(31706), AOM_ICDF(31865),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20673), AOM_ICDF(23240), AOM_ICDF(31186), AOM_ICDF(31379),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17857), AOM_ICDF(20035), AOM_ICDF(29594), AOM_ICDF(29889),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13633), AOM_ICDF(14929), AOM_ICDF(24883), AOM_ICDF(25337),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7873), AOM_ICDF(8416), AOM_ICDF(17452), AOM_ICDF(17886),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(25665), AOM_ICDF(27145), AOM_ICDF(32256), AOM_ICDF(32314),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21057), AOM_ICDF(22826), AOM_ICDF(31465), AOM_ICDF(31576),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13633), AOM_ICDF(14885), AOM_ICDF(27873), AOM_ICDF(28088),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8769), AOM_ICDF(9515), AOM_ICDF(21941), AOM_ICDF(22248),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(6209), AOM_ICDF(6594), AOM_ICDF(15598), AOM_ICDF(15950),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(1985), AOM_ICDF(2014), AOM_ICDF(6855), AOM_ICDF(6931),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25665), AOM_ICDF(27145), AOM_ICDF(32256), AOM_ICDF(32314),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21057), AOM_ICDF(22826), AOM_ICDF(31465), AOM_ICDF(31576),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13633), AOM_ICDF(14885), AOM_ICDF(27873), AOM_ICDF(28088),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8769), AOM_ICDF(9515), AOM_ICDF(21941), AOM_ICDF(22248),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(6209), AOM_ICDF(6594), AOM_ICDF(15598), AOM_ICDF(15950),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(1985), AOM_ICDF(2014), AOM_ICDF(6855), AOM_ICDF(6931),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(26817), AOM_ICDF(27824), AOM_ICDF(32362), AOM_ICDF(32399),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21185), AOM_ICDF(22321), AOM_ICDF(31389), AOM_ICDF(31466),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13761), AOM_ICDF(14154), AOM_ICDF(27163), AOM_ICDF(27245),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8897), AOM_ICDF(9011), AOM_ICDF(20600), AOM_ICDF(20659),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4673), AOM_ICDF(4774), AOM_ICDF(15044), AOM_ICDF(15131),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(26817), AOM_ICDF(27824), AOM_ICDF(32362), AOM_ICDF(32399),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21185), AOM_ICDF(22321), AOM_ICDF(31389), AOM_ICDF(31466),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13761), AOM_ICDF(14154), AOM_ICDF(27163), AOM_ICDF(27245),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8897), AOM_ICDF(9011), AOM_ICDF(20600), AOM_ICDF(20659),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4673), AOM_ICDF(4774), AOM_ICDF(15044), AOM_ICDF(15131),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(28865), AOM_ICDF(29687), AOM_ICDF(32655), AOM_ICDF(32667),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23233), AOM_ICDF(24218), AOM_ICDF(32080), AOM_ICDF(32118),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15041), AOM_ICDF(15444), AOM_ICDF(28787), AOM_ICDF(28845),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9921), AOM_ICDF(10248), AOM_ICDF(22818), AOM_ICDF(22944),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7745), AOM_ICDF(7866), AOM_ICDF(16591), AOM_ICDF(16702),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(28865), AOM_ICDF(29687), AOM_ICDF(32655), AOM_ICDF(32667),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23233), AOM_ICDF(24218), AOM_ICDF(32080), AOM_ICDF(32118),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15041), AOM_ICDF(15444), AOM_ICDF(28787), AOM_ICDF(28845),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9921), AOM_ICDF(10248), AOM_ICDF(22818), AOM_ICDF(22944),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7745), AOM_ICDF(7866), AOM_ICDF(16591), AOM_ICDF(16702),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(31169), AOM_ICDF(31559), AOM_ICDF(32741), AOM_ICDF(32744),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24769), AOM_ICDF(25583), AOM_ICDF(32347), AOM_ICDF(32370),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15937), AOM_ICDF(16169), AOM_ICDF(29120), AOM_ICDF(29152),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7489), AOM_ICDF(7578), AOM_ICDF(22647), AOM_ICDF(22677),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7617), AOM_ICDF(7689), AOM_ICDF(19849), AOM_ICDF(19887),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
-         AOM_ICDF(32768) } } } },
+        { AOM_ICDF(31169), AOM_ICDF(31559), AOM_ICDF(32741), AOM_ICDF(32744),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24769), AOM_ICDF(25583), AOM_ICDF(32347), AOM_ICDF(32370),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15937), AOM_ICDF(16169), AOM_ICDF(29120), AOM_ICDF(29152),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7489), AOM_ICDF(7578), AOM_ICDF(22647), AOM_ICDF(22677),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7617), AOM_ICDF(7689), AOM_ICDF(19849), AOM_ICDF(19887),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2113), AOM_ICDF(2183), AOM_ICDF(7202), AOM_ICDF(7377),
+          AOM_ICDF(32768) } } } },
   {     // UV plane
     {   // Inter
       { // Band 0
@@ -5187,70 +1947,70 @@ static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
         { AOM_ICDF(1344), AOM_ICDF(3989), AOM_ICDF(18125), AOM_ICDF(25340),
           AOM_ICDF(27820), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(15937), AOM_ICDF(29000), AOM_ICDF(32210), AOM_ICDF(32434),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(12353), AOM_ICDF(26626), AOM_ICDF(31533), AOM_ICDF(31993),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11457), AOM_ICDF(29187), AOM_ICDF(30896), AOM_ICDF(31750),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(21278), AOM_ICDF(28169), AOM_ICDF(29764),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(7489), AOM_ICDF(8855), AOM_ICDF(13365), AOM_ICDF(15620),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4289), AOM_ICDF(4833), AOM_ICDF(8572), AOM_ICDF(10108),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(15937), AOM_ICDF(29000), AOM_ICDF(32210), AOM_ICDF(32434),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(12353), AOM_ICDF(26626), AOM_ICDF(31533), AOM_ICDF(31993),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11457), AOM_ICDF(29187), AOM_ICDF(30896), AOM_ICDF(31750),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(21278), AOM_ICDF(28169), AOM_ICDF(29764),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(7489), AOM_ICDF(8855), AOM_ICDF(13365), AOM_ICDF(15620),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4289), AOM_ICDF(4833), AOM_ICDF(8572), AOM_ICDF(10108),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(25025), AOM_ICDF(30783), AOM_ICDF(32603), AOM_ICDF(32666),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(24385), AOM_ICDF(29586), AOM_ICDF(31803), AOM_ICDF(32142),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22337), AOM_ICDF(23002), AOM_ICDF(27573), AOM_ICDF(27903),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(12336), AOM_ICDF(21900), AOM_ICDF(22590),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(8257), AOM_ICDF(8830), AOM_ICDF(19986), AOM_ICDF(20298),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(10990), AOM_ICDF(18660), AOM_ICDF(18701),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(25025), AOM_ICDF(30783), AOM_ICDF(32603), AOM_ICDF(32666),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(24385), AOM_ICDF(29586), AOM_ICDF(31803), AOM_ICDF(32142),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22337), AOM_ICDF(23002), AOM_ICDF(27573), AOM_ICDF(27903),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(12336), AOM_ICDF(21900), AOM_ICDF(22590),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(8257), AOM_ICDF(8830), AOM_ICDF(19986), AOM_ICDF(20298),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(10990), AOM_ICDF(18660), AOM_ICDF(18701),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(29761), AOM_ICDF(31473), AOM_ICDF(32693), AOM_ICDF(32715),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(20417), AOM_ICDF(24512), AOM_ICDF(31394), AOM_ICDF(31650),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(11713), AOM_ICDF(13283), AOM_ICDF(25819), AOM_ICDF(26206),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13121), AOM_ICDF(14099), AOM_ICDF(21909), AOM_ICDF(22514),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(248), AOM_ICDF(9546), AOM_ICDF(9614),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(29761), AOM_ICDF(31473), AOM_ICDF(32693), AOM_ICDF(32715),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(20417), AOM_ICDF(24512), AOM_ICDF(31394), AOM_ICDF(31650),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(11713), AOM_ICDF(13283), AOM_ICDF(25819), AOM_ICDF(26206),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13121), AOM_ICDF(14099), AOM_ICDF(21909), AOM_ICDF(22514),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(248), AOM_ICDF(9546), AOM_ICDF(9614),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2497), AOM_ICDF(2524), AOM_ICDF(7050), AOM_ICDF(7125),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(30657), AOM_ICDF(31885), AOM_ICDF(32691), AOM_ICDF(32715),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(19393), AOM_ICDF(26050), AOM_ICDF(31698), AOM_ICDF(31988),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(15809), AOM_ICDF(15863), AOM_ICDF(24985), AOM_ICDF(25008),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(23489), AOM_ICDF(28138), AOM_ICDF(32751), AOM_ICDF(32756),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(30657), AOM_ICDF(31885), AOM_ICDF(32691), AOM_ICDF(32715),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(19393), AOM_ICDF(26050), AOM_ICDF(31698), AOM_ICDF(31988),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(15809), AOM_ICDF(15863), AOM_ICDF(24985), AOM_ICDF(25008),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(23489), AOM_ICDF(28138), AOM_ICDF(32751), AOM_ICDF(32756),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(16450), AOM_ICDF(16545), AOM_ICDF(16593),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2369), AOM_ICDF(2395), AOM_ICDF(6822), AOM_ICDF(6898),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(32705), AOM_ICDF(32744), AOM_ICDF(32766), AOM_ICDF(32767),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21953), AOM_ICDF(24962), AOM_ICDF(32156), AOM_ICDF(32246),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(13121), AOM_ICDF(15358), AOM_ICDF(26284), AOM_ICDF(26835),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(7417), AOM_ICDF(20132), AOM_ICDF(20885),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
-         AOM_ICDF(32768) } } },
+        { AOM_ICDF(32705), AOM_ICDF(32744), AOM_ICDF(32766), AOM_ICDF(32767),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21953), AOM_ICDF(24962), AOM_ICDF(32156), AOM_ICDF(32246),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(13121), AOM_ICDF(15358), AOM_ICDF(26284), AOM_ICDF(26835),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(7417), AOM_ICDF(20132), AOM_ICDF(20885),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4417), AOM_ICDF(4939), AOM_ICDF(15104), AOM_ICDF(15535),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2680), AOM_ICDF(8218), AOM_ICDF(8338),
+          AOM_ICDF(32768) } } },
     {   // Inter
       { // Band 0
         { AOM_ICDF(25280), AOM_ICDF(25678), AOM_ICDF(32446), AOM_ICDF(32622),
@@ -5260,84 +2020,74 @@ static const coeff_cdf_model default_coef_head_cdf_32x32[PLANE_TYPES] = {
         { AOM_ICDF(3264), AOM_ICDF(5170), AOM_ICDF(21779), AOM_ICDF(24026),
           AOM_ICDF(27905), AOM_ICDF(32768) } },
       { // Band 1
-        {AOM_ICDF(24257), AOM_ICDF(30554), AOM_ICDF(32719), AOM_ICDF(32738),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17217), AOM_ICDF(27413), AOM_ICDF(32617), AOM_ICDF(32667),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22977), AOM_ICDF(27600), AOM_ICDF(32482), AOM_ICDF(32552),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16833), AOM_ICDF(24360), AOM_ICDF(30746), AOM_ICDF(31293),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(17089), AOM_ICDF(20060), AOM_ICDF(28880), AOM_ICDF(29370),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(10945), AOM_ICDF(11009), AOM_ICDF(21900), AOM_ICDF(21932),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(24257), AOM_ICDF(30554), AOM_ICDF(32719), AOM_ICDF(32738),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17217), AOM_ICDF(27413), AOM_ICDF(32617), AOM_ICDF(32667),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22977), AOM_ICDF(27600), AOM_ICDF(32482), AOM_ICDF(32552),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16833), AOM_ICDF(24360), AOM_ICDF(30746), AOM_ICDF(31293),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(17089), AOM_ICDF(20060), AOM_ICDF(28880), AOM_ICDF(29370),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(10945), AOM_ICDF(11009), AOM_ICDF(21900), AOM_ICDF(21932),
+          AOM_ICDF(32768) } },
       { // Band 2
-        {AOM_ICDF(27201), AOM_ICDF(30217), AOM_ICDF(32736), AOM_ICDF(32745),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22721), AOM_ICDF(27676), AOM_ICDF(32749), AOM_ICDF(32754),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5057), AOM_ICDF(12431), AOM_ICDF(25246), AOM_ICDF(26620),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27201), AOM_ICDF(30217), AOM_ICDF(32736), AOM_ICDF(32745),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22721), AOM_ICDF(27676), AOM_ICDF(32749), AOM_ICDF(32754),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5057), AOM_ICDF(12431), AOM_ICDF(25246), AOM_ICDF(26620),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5313), AOM_ICDF(5363), AOM_ICDF(13839), AOM_ICDF(13894),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2625), AOM_ICDF(2652), AOM_ICDF(7276), AOM_ICDF(7351),
+          AOM_ICDF(32768) } },
       { // Band 3
-        {AOM_ICDF(27713), AOM_ICDF(30739), AOM_ICDF(32759), AOM_ICDF(32762),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(26177), AOM_ICDF(30430), AOM_ICDF(32756), AOM_ICDF(32760),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(9409), AOM_ICDF(9528), AOM_ICDF(21591), AOM_ICDF(21646),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(27713), AOM_ICDF(30739), AOM_ICDF(32759), AOM_ICDF(32762),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(26177), AOM_ICDF(30430), AOM_ICDF(32756), AOM_ICDF(32760),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(384), AOM_ICDF(32706), AOM_ICDF(32707),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(9409), AOM_ICDF(9528), AOM_ICDF(21591), AOM_ICDF(21646),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2881), AOM_ICDF(2913), AOM_ICDF(8427), AOM_ICDF(8498),
+          AOM_ICDF(32768) } },
       { // Band 4
-        {AOM_ICDF(28993), AOM_ICDF(31156), AOM_ICDF(32747), AOM_ICDF(32753),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(25153), AOM_ICDF(28701), AOM_ICDF(32754), AOM_ICDF(32758),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(16449), AOM_ICDF(16544), AOM_ICDF(32737), AOM_ICDF(32738),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
-         AOM_ICDF(32768) } },
+        { AOM_ICDF(28993), AOM_ICDF(31156), AOM_ICDF(32747), AOM_ICDF(32753),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(25153), AOM_ICDF(28701), AOM_ICDF(32754), AOM_ICDF(32758),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(16449), AOM_ICDF(16544), AOM_ICDF(32737), AOM_ICDF(32738),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(321), AOM_ICDF(22016), AOM_ICDF(22048),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(193), AOM_ICDF(194), AOM_ICDF(384), AOM_ICDF(479),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(2753), AOM_ICDF(2789), AOM_ICDF(8909), AOM_ICDF(8979),
+          AOM_ICDF(32768) } },
       { // Band 5
-        {AOM_ICDF(30785), AOM_ICDF(32088), AOM_ICDF(32765), AOM_ICDF(32766),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(22977), AOM_ICDF(26623), AOM_ICDF(32750), AOM_ICDF(32754),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(21953), AOM_ICDF(21954), AOM_ICDF(22017), AOM_ICDF(22049),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
-         AOM_ICDF(32768) },
-        {AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
-         AOM_ICDF(32768) } } } }
+        { AOM_ICDF(30785), AOM_ICDF(32088), AOM_ICDF(32765), AOM_ICDF(32766),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(22977), AOM_ICDF(26623), AOM_ICDF(32750), AOM_ICDF(32754),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(21953), AOM_ICDF(21954), AOM_ICDF(22017), AOM_ICDF(22049),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(5697), AOM_ICDF(7486), AOM_ICDF(20238), AOM_ICDF(21009),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(4929), AOM_ICDF(5579), AOM_ICDF(16402), AOM_ICDF(16866),
+          AOM_ICDF(32768) },
+        { AOM_ICDF(3009), AOM_ICDF(3246), AOM_ICDF(10158), AOM_ICDF(10533),
+          AOM_ICDF(32768) } } } }
 };
 
 /* clang-format on */
-
-static void extend_to_full_distribution(aom_prob *probs, aom_prob p) {
-  assert(p != 0);
-  memcpy(probs, av1_pareto8_full[p - 1], MODEL_NODES * sizeof(aom_prob));
-}
-
-void av1_model_to_full_probs(const aom_prob *model, aom_prob *full) {
-  if (full != model)
-    memcpy(full, model, sizeof(aom_prob) * UNCONSTRAINED_NODES);
-  extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
-}
+#endif  // !CONFIG_Q_ADAPT_PROBS
 
 static void build_tail_cdfs(aom_cdf_prob cdf_tail[CDF_SIZE(ENTROPY_TOKENS)],
                             aom_cdf_prob cdf_head[CDF_SIZE(ENTROPY_TOKENS)],
@@ -5364,122 +2114,21 @@ static void build_tail_cdfs(aom_cdf_prob cdf_tail[CDF_SIZE(ENTROPY_TOKENS)],
   }
 }
 
-static void build_head_cdfs(const aom_prob *pdf_model,
-                            const aom_prob *blockz_model,
-                            aom_cdf_prob cdf_head[ENTROPY_TOKENS + 1]) {
-  int i, p, p1, p2, phead[6], prob_NZ, prob_EOB_1, prob_EOB_2p, prob_NEOB_1,
-      prob_NEOB_2p;
-  int prob8_blocknz;
-  // We have the first coefficient position and so an extended CDF
-  const int is_dc = blockz_model != NULL;
-  const int last_head_val = HEAD_TOKENS - 1 + is_dc;
-
-  assert(pdf_model != NULL);
-  assert(pdf_model[2] != 0);
-
-  /* FIXME: maintain true CDF counts. */
-
-  /* Values are 0=BLOCK_ZERO 1=ZERO_TOKEN, 2=ONE_TOKEN_EOB
-     3=ONE_TOKEN_NEOB, 4=TWO_TOKEN_PLUS_EOB, 5=TWO_TOKEN_PLUS_NEOB
-     */
-  // Block zero probability
-  if (is_dc) {
-    phead[0] =
-        ((*blockz_model) << (CDF_PROB_BITS - 8)) + (1 << (CDF_PROB_BITS - 9));
-    phead[0] = AOMMIN(CDF_PROB_TOP - (HEAD_TOKENS + 1), AOMMAX(1, phead[0]));
-  }
-
-  // Will scale the remaining probabilities by the probability of the block
-  // being non-zero
-  prob8_blocknz = is_dc ? (256 - *blockz_model) : 256;
-
-  // Probability of zero
-  phead[is_dc + ZERO_TOKEN] =
-      (pdf_model[1] << (CDF_PROB_BITS - 8)) + (1 << (CDF_PROB_BITS - 9));
-
-  // Will scale the non-zero values
-  prob_NZ = CDF_PROB_TOP - phead[is_dc + ZERO_TOKEN];
-
-  // Will scale the EOBs by the probability of and EOB_TOKEN ..
-  prob_EOB_1 =
-      (pdf_model[0] << (CDF_PROB_BITS - 8)) + (1 << (CDF_PROB_BITS - 9));
-  // .. use a lower probability of EOB for larger values
-  prob_EOB_2p = prob_EOB_1 / 2;
-
-  prob_NEOB_1 = CDF_PROB_TOP - prob_EOB_1;
-  prob_NEOB_2p = CDF_PROB_TOP - prob_EOB_2p;
-  if (prob_NZ == 0 || prob_NZ == CDF_PROB_TOP) abort();
-  if (prob_EOB_1 == 0 || prob_EOB_1 == CDF_PROB_TOP) abort();
-  if (prob_EOB_2p == 0 || prob_EOB_2p == CDF_PROB_TOP) abort();
-
-  // ONE_CONTEXT_NODE prob
-  p = (pdf_model[2] << (CDF_PROB_BITS - 8)) + (1 << (CDF_PROB_BITS - 9));
-  // Scale by the non-zero factor to get the probability of token = 1
-  p1 = ROUND_POWER_OF_TWO(prob_NZ * p, 15);
-
-  // Scale by the EOB factors
-  phead[is_dc + ONE_TOKEN_EOB] = ROUND_POWER_OF_TWO(p1 * prob_EOB_1, 15);
-  phead[is_dc + ONE_TOKEN_NEOB] = ROUND_POWER_OF_TWO(p1 * prob_NEOB_1, 15);
-
-  // Probability token is 2 or more
-  p2 = CDF_PROB_TOP - p1 - phead[is_dc + ZERO_TOKEN];
-
-  phead[is_dc + TWO_TOKEN_PLUS_EOB] = ROUND_POWER_OF_TWO(p2 * prob_EOB_2p, 15);
-  phead[is_dc + TWO_TOKEN_PLUS_NEOB] =
-      ROUND_POWER_OF_TWO(p2 * prob_NEOB_2p, 15);
-
-  // Now use block non-zerp prob to scale the values
-  for (i = is_dc; i < last_head_val; ++i) {
-    phead[i] = (prob8_blocknz * phead[i] + 128) >> 8;
-  }
-
-  for (i = 0; i < last_head_val; ++i) {
-    int c0;
-    c0 = i > 0 ? AOM_ICDF(cdf_head[i - 1]) : 0;
-    p = AOMMAX(1, AOMMIN(CDF_PROB_TOP - (last_head_val - i) - c0, phead[i]));
-    cdf_head[i] = AOM_ICDF(c0 + p);
-  }
-  cdf_head[last_head_val] = AOM_ICDF(CDF_PROB_TOP);
-}
-
+#if !CONFIG_Q_ADAPT_PROBS
+// FIXME. Optimize for TX_2X2 and TX_64X64.
 static void av1_default_coef_cdfs(FRAME_CONTEXT *fc) {
-  int i, j, k, l;
-  for (i = 0; i < PLANE_TYPES; ++i)
-    for (j = 0; j < REF_TYPES; ++j)
-      for (k = 0; k < COEF_BANDS; ++k)
-        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
 #if CONFIG_CHROMA_2X2
-          av1_copy(fc->coef_head_cdfs[TX_2X2][i][j][k][l],
-                   default_coef_head_cdf_4x4[i][j][k][l]);
-#endif
-          av1_copy(fc->coef_head_cdfs[TX_4X4][i][j][k][l],
-                   default_coef_head_cdf_4x4[i][j][k][l]);
-          av1_copy(fc->coef_head_cdfs[TX_8X8][i][j][k][l],
-                   default_coef_head_cdf_8x8[i][j][k][l]);
-          av1_copy(fc->coef_head_cdfs[TX_16X16][i][j][k][l],
-                   default_coef_head_cdf_16x16[i][j][k][l]);
-          av1_copy(fc->coef_head_cdfs[TX_32X32][i][j][k][l],
-                   default_coef_head_cdf_32x32[i][j][k][l]);
+  av1_copy(fc->coef_head_cdfs[TX_2X2], default_coef_head_cdf_4x4);
+#endif  // CONFIG_CHROMA_2X2
+  av1_copy(fc->coef_head_cdfs[TX_4X4], default_coef_head_cdf_4x4);
+  av1_copy(fc->coef_head_cdfs[TX_8X8], default_coef_head_cdf_8x8);
+  av1_copy(fc->coef_head_cdfs[TX_16X16], default_coef_head_cdf_16x16);
+  av1_copy(fc->coef_head_cdfs[TX_32X32], default_coef_head_cdf_32x32);
 #if CONFIG_TX64X64
-          av1_copy(fc->coef_head_cdfs[TX_64X64][i][j][k][l],
-                   default_coef_head_cdf_32x32[i][j][k][l]);
-#endif
-        }
-}
-
-void av1_coef_head_cdfs(FRAME_CONTEXT *fc) {
-  TX_SIZE t;
-  int i, j, k, l;
-  for (t = 0; t < TX_SIZES; ++t)
-    for (i = 0; i < PLANE_TYPES; ++i)
-      for (j = 0; j < REF_TYPES; ++j)
-        for (k = 0; k < COEF_BANDS; ++k)
-          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
-            build_head_cdfs(fc->coef_probs[t][i][j][k][l],
-                            k == 0 ? &fc->blockzero_probs[t][i][j][l] : NULL,
-                            fc->coef_head_cdfs[t][i][j][k][l]);
-          }
+  av1_copy(fc->coef_head_cdfs[TX_64X64], default_coef_head_cdf_32x32);
+#endif  // CONFIG_TX64X64
 }
+#endif  // !CONFIG_Q_ADAPT_PROBS
 
 void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
   /* Build the tail based on a Pareto distribution */
@@ -5496,83 +2145,33 @@ void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
 
 void av1_default_coef_probs(AV1_COMMON *cm) {
 #if CONFIG_Q_ADAPT_PROBS
-  const int index = AOMMIN(
-      ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1);
-  av1_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]);
-#else
+  const int index = AOMMIN(TOKEN_CDF_Q_CTXS - 1, cm->base_qindex / 64);
 #if CONFIG_CHROMA_2X2
-  av1_copy(cm->fc->coef_probs[TX_2X2], default_coef_probs_4x4);
-#endif
-  av1_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
-  av1_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
-  av1_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
-  av1_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32);
+  av1_copy(cm->fc->coef_head_cdfs[TX_2X2],
+           (*av1_default_qctx_coef_cdfs[index])[TX_4X4]);
+#endif  // CONFIG_CHROMA_2X2
+  av1_copy(cm->fc->coef_head_cdfs[TX_4X4],
+           (*av1_default_qctx_coef_cdfs[index])[TX_4X4]);
+  av1_copy(cm->fc->coef_head_cdfs[TX_8X8],
+           (*av1_default_qctx_coef_cdfs[index])[TX_8X8]);
+  av1_copy(cm->fc->coef_head_cdfs[TX_16X16],
+           (*av1_default_qctx_coef_cdfs[index])[TX_16X16]);
+  av1_copy(cm->fc->coef_head_cdfs[TX_32X32],
+           (*av1_default_qctx_coef_cdfs[index])[TX_32X32]);
 #if CONFIG_TX64X64
-  av1_copy(cm->fc->coef_probs[TX_64X64], default_coef_probs_64x64);
+  av1_copy(cm->fc->coef_head_cdfs[TX_64X64],
+           (*av1_default_qctx_coef_cdfs[index])[TX_32X32]);
 #endif  // CONFIG_TX64X64
-#endif  // CONFIG_Q_ADAPT_PROBS
-  av1_copy(cm->fc->blockzero_probs, av1_default_blockzero_probs);
+#else
   /* Load the head tokens */
   av1_default_coef_cdfs(cm->fc);
+#endif  // CONFIG_Q_ADAPT_PROBS
   av1_coef_pareto_cdfs(cm->fc);
 }
 
-#if !CONFIG_LV_MAP
-static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
-                             unsigned int count_sat,
-                             unsigned int update_factor) {
-  const FRAME_CONTEXT *pre_fc = cm->pre_fc;
-  av1_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size];
-  const av1_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
-  const av1_coeff_count_model *const counts =
-      (const av1_coeff_count_model *)cm->counts.coef[tx_size];
-  const unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
-      (const unsigned int(*)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS])
-          cm->counts.eob_branch[tx_size];
-  const av1_blockz_probs_model *const pre_blockz_probs =
-      pre_fc->blockzero_probs[tx_size];
-  av1_blockz_probs_model *const blockz_probs = cm->fc->blockzero_probs[tx_size];
-  const av1_blockz_count_model *const blockz_counts =
-      (const av1_blockz_count_model *)&cm->counts.blockz_count[tx_size][0];
-  int i, j, k, l, m;
-#if CONFIG_RECT_TX
-  assert(!is_rect_tx(tx_size));
-#endif  // CONFIG_RECT_TX
-
-  for (i = 0; i < PLANE_TYPES; ++i)
-    for (j = 0; j < REF_TYPES; ++j)
-      for (k = 0; k < COEF_BANDS; ++k)
-        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
-          const int n0 = counts[i][j][k][l][ZERO_TOKEN];
-          const int n1 = counts[i][j][k][l][ONE_TOKEN];
-          const int n2 = counts[i][j][k][l][TWO_TOKEN];
-          const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN];
-          const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = {
-            { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 }
-          };
-          for (m = 0; m < UNCONSTRAINED_NODES; ++m)
-            probs[i][j][k][l][m] =
-                av1_merge_probs(pre_probs[i][j][k][l][m], branch_ct[m],
-                                count_sat, update_factor);
-        }
-
-  for (i = 0; i < PLANE_TYPES; ++i) {
-    for (j = 0; j < REF_TYPES; ++j) {
-      for (k = 0; k < BLOCKZ_CONTEXTS; ++k) {
-        const int n0 = blockz_counts[i][j][k][0];
-        const int n1 = blockz_counts[i][j][k][1];
-        const unsigned int branch_ct[2] = { n0, n1 };
-        blockz_probs[i][j][k] = av1_merge_probs(
-            pre_blockz_probs[i][j][k], branch_ct, count_sat, update_factor);
-      }
-    }
-  }
-}
-#endif  // !CONFIG_LV_MAP
-
+#if CONFIG_LV_MAP
 void av1_adapt_coef_probs(AV1_COMMON *cm) {
   unsigned int count_sat, update_factor;
-
   if (!frame_is_intra_only(cm) && cm->last_frame_type == KEY_FRAME) {
     update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
     count_sat = COEF_COUNT_SAT_AFTER_KEY;
@@ -5580,15 +2179,9 @@ void av1_adapt_coef_probs(AV1_COMMON *cm) {
     update_factor = COEF_MAX_UPDATE_FACTOR;
     count_sat = COEF_COUNT_SAT;
   }
-
-#if CONFIG_LV_MAP
   av1_adapt_txb_probs(cm, count_sat, update_factor);
-#else
-  TX_SIZE tx_size;
-  for (tx_size = 0; tx_size < TX_SIZES; tx_size++)
-    adapt_coef_probs(cm, tx_size, count_sat, update_factor);
-#endif
 }
+#endif  // CONFIG_LV_MAP
 
 static void av1_average_cdf(aom_cdf_prob *cdf_ptr[], aom_cdf_prob *fc_cdf_ptr,
                             int cdf_size, const int num_tiles) {
@@ -5623,8 +2216,25 @@ void av1_average_tile_coef_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
 
   aom_cdf_prob *fc_cdf_ptr;
 
+#if CONFIG_LV_MAP
+  AVERAGE_TILE_CDFS(txb_skip_cdf)
+  AVERAGE_TILE_CDFS(nz_map_cdf)
+  AVERAGE_TILE_CDFS(eob_flag_cdf)
+  AVERAGE_TILE_CDFS(dc_sign_cdf)
+  AVERAGE_TILE_CDFS(coeff_base_cdf)
+  AVERAGE_TILE_CDFS(coeff_lps_cdf)
+#if BR_NODE
+  AVERAGE_TILE_CDFS(coeff_br_cdf)
+#endif
+#if CONFIG_CTX1D
+  AVERAGE_TILE_CDFS(eob_mode_cdf)
+  AVERAGE_TILE_CDFS(empty_line_cdf)
+  AVERAGE_TILE_CDFS(hv_eob_cdf)
+#endif
+#else
   AVERAGE_TILE_CDFS(coef_head_cdfs)
   AVERAGE_TILE_CDFS(coef_tail_cdfs)
+#endif
 }
 
 void av1_average_tile_mv_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
@@ -5645,6 +2255,7 @@ void av1_average_tile_mv_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
       AVERAGE_TILE_CDFS(nmvc[j].comps[k].hp_cdf)
       AVERAGE_TILE_CDFS(nmvc[j].comps[k].class0_hp_cdf)
       AVERAGE_TILE_CDFS(nmvc[j].comps[k].class0_cdf)
+      AVERAGE_TILE_CDFS(nmvc[j].comps[k].bits_cdf)
 #endif
     }
   }
@@ -5668,17 +2279,16 @@ void av1_average_tile_intra_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
   AVERAGE_TILE_CDFS(uv_mode_cdf)
 
 #if CONFIG_CFL
+  AVERAGE_TILE_CDFS(cfl_sign_cdf)
   AVERAGE_TILE_CDFS(cfl_alpha_cdf)
 #endif
 
   AVERAGE_TILE_CDFS(partition_cdf)
 
-#if CONFIG_DELTA_Q
   AVERAGE_TILE_CDFS(delta_q_cdf)
 #if CONFIG_EXT_DELTA_Q
   AVERAGE_TILE_CDFS(delta_lf_cdf)
 #endif
-#endif
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
   AVERAGE_TILE_CDFS(intra_filter_cdf)
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
@@ -5688,13 +2298,26 @@ void av1_average_tile_intra_cdfs(FRAME_CONTEXT *fc, FRAME_CONTEXT *ec_ctxs[],
 #if CONFIG_VAR_TX
   AVERAGE_TILE_CDFS(txfm_partition_cdf)
 #endif
-#if CONFIG_PALETTE
+#endif  // CONFIG_NEW_MULTISYMBOL
   AVERAGE_TILE_CDFS(palette_y_size_cdf)
   AVERAGE_TILE_CDFS(palette_uv_size_cdf)
   AVERAGE_TILE_CDFS(palette_y_color_index_cdf)
   AVERAGE_TILE_CDFS(palette_uv_color_index_cdf)
+#if CONFIG_MRC_TX
+  AVERAGE_TILE_CDFS(mrc_mask_intra_cdf)
+#endif  // CONFIG_MRC_TX
+#if CONFIG_NEW_MULTISYMBOL
+  AVERAGE_TILE_CDFS(palette_y_mode_cdf)
+  AVERAGE_TILE_CDFS(palette_uv_mode_cdf)
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
+  AVERAGE_TILE_CDFS(quarter_tx_size_cdf)
 #endif
-#endif  // CONFIG_NEW_MULTISYMBOL
+#endif
+#if CONFIG_LPF_SB
+  AVERAGE_TILE_CDFS(lpf_reuse_cdf);
+  AVERAGE_TILE_CDFS(lpf_delta_cdf);
+  AVERAGE_TILE_CDFS(lpf_sign_cdf);
+#endif  // CONFIG_LPF_SB
 }
 
 void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
@@ -5707,6 +2330,7 @@ void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
 #if CONFIG_NEW_MULTISYMBOL
   AVERAGE_TILE_CDFS(comp_inter_cdf)
 #if CONFIG_EXT_REFS
+  AVERAGE_TILE_CDFS(comp_ref_cdf)
   AVERAGE_TILE_CDFS(comp_bwdref_cdf)
 #endif
 #endif
@@ -5718,14 +2342,19 @@ void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
   AVERAGE_TILE_CDFS(zeromv_cdf)
   AVERAGE_TILE_CDFS(refmv_cdf)
   AVERAGE_TILE_CDFS(drl_cdf)
+#if CONFIG_EXT_COMP_REFS
+  AVERAGE_TILE_CDFS(uni_comp_ref_cdf)
+  AVERAGE_TILE_CDFS(comp_ref_type_cdf)
+#endif
 #endif
 
-// FIXME: cdfs not defined for super_tx
+  // FIXME: cdfs not defined for super_tx
 
-#if CONFIG_EXT_INTER
   AVERAGE_TILE_CDFS(inter_compound_mode_cdf)
 
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   AVERAGE_TILE_CDFS(compound_type_cdf)
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 #if CONFIG_COMPOUND_SINGLEREF
   AVERAGE_TILE_CDFS(inter_singleref_comp_mode_cdf)
 #endif
@@ -5737,7 +2366,6 @@ void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
 #endif
   AVERAGE_TILE_CDFS(interintra_mode_cdf)
 #endif
-#endif  // CONFIG_EXT_INTER
 
   /* NB: kf_y_cdf is discarded after use, so no need
      for backwards update */
@@ -5755,6 +2383,14 @@ void av1_average_tile_inter_cdfs(AV1_COMMON *cm, FRAME_CONTEXT *fc,
 #endif
 #endif
 #endif
+#if CONFIG_MRC_TX
+  AVERAGE_TILE_CDFS(mrc_mask_inter_cdf)
+#endif  // CONFIG_MRC_TX
+#if CONFIG_LPF_SB
+  AVERAGE_TILE_CDFS(lpf_reuse_cdf);
+  AVERAGE_TILE_CDFS(lpf_delta_cdf);
+  AVERAGE_TILE_CDFS(lpf_sign_cdf);
+#endif  // CONFIG_LPF_SB
 }
 
 #if CONFIG_PVQ
diff --git a/third_party/aom/av1/common/entropy.h b/third_party/aom/av1/common/entropy.h
index 190b792b5..679aae837 100644
--- a/third_party/aom/av1/common/entropy.h
+++ b/third_party/aom/av1/common/entropy.h
@@ -28,8 +28,7 @@ extern "C" {
 #define GROUP_DIFF_UPDATE_PROB 252
 
 #if CONFIG_Q_ADAPT_PROBS
-#define QCTX_BIN_BITS 2
-#define QCTX_BINS (1 << QCTX_BIN_BITS)
+#define TOKEN_CDF_Q_CTXS 4
 #endif  // CONFIG_Q_ADAPT_PROBS
 
 // Coefficient token alphabet
@@ -61,8 +60,25 @@ extern "C" {
 
 #if CONFIG_LV_MAP
 #define TXB_SKIP_CONTEXTS 13
-#define SIG_COEF_CONTEXTS 20
+
+#if CONFIG_CTX1D
+#define EOB_COEF_CONTEXTS_2D 25
+#define EOB_COEF_CONTEXTS_1D 25
+#define EOB_COEF_CONTEXTS \
+  (EOB_COEF_CONTEXTS_2D + EOB_COEF_CONTEXTS_1D + EOB_COEF_CONTEXTS_1D)
+#else  // CONFIG_CTX1D
 #define EOB_COEF_CONTEXTS 25
+#endif  // CONFIG_CTX1D
+
+#if CONFIG_EXT_TX
+#define SIG_COEF_CONTEXTS_2D 16
+#define SIG_COEF_CONTEXTS_1D 16
+#define SIG_COEF_CONTEXTS \
+  (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D + SIG_COEF_CONTEXTS_1D)
+#else  // CONFIG_EXT_TX
+#define SIG_COEF_CONTEXTS_2D 16
+#define SIG_COEF_CONTEXTS 16
+#endif  // CONFIG_EXT_TX
 #define COEFF_BASE_CONTEXTS 42
 #define DC_SIGN_CONTEXTS 3
 
@@ -71,10 +87,26 @@ extern "C" {
 #define LEVEL_CONTEXTS (BR_TMP_OFFSET * BR_REF_CAT)
 
 #define NUM_BASE_LEVELS 2
-#define COEFF_BASE_RANGE (15 - NUM_BASE_LEVELS)
+#define COEFF_BASE_RANGE (16 - NUM_BASE_LEVELS)
+#define BASE_RANGE_SETS 3
 
 #define COEFF_CONTEXT_BITS 6
 #define COEFF_CONTEXT_MASK ((1 << COEFF_CONTEXT_BITS) - 1)
+
+#define BASE_CONTEXT_POSITION_NUM 12
+
+#if CONFIG_CTX1D
+#define EMPTY_LINE_CONTEXTS 5
+#define HV_EOB_CONTEXTS 24
+#endif  // CONFIG_CTX1D
+
+typedef enum TX_CLASS {
+  TX_CLASS_2D = 0,
+  TX_CLASS_HORIZ = 1,
+  TX_CLASS_VERT = 2,
+  TX_CLASSES = 3,
+} TX_CLASS;
+
 #endif
 
 DECLARE_ALIGNED(16, extern const uint8_t, av1_pt_energy_class[ENTROPY_TOKENS]);
@@ -169,26 +201,19 @@ static INLINE int av1_get_cat6_extrabits_size(TX_SIZE tx_size,
    distinct bands). */
 
 #define COEFF_CONTEXTS 6
-#define BLOCKZ_CONTEXTS 3
 #define COEFF_CONTEXTS0 3  // for band 0
 #define BAND_COEFF_CONTEXTS(band) \
   ((band) == 0 ? COEFF_CONTEXTS0 : COEFF_CONTEXTS)
 
-// #define ENTROPY_STATS
-
-typedef unsigned int av1_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
-                                    [ENTROPY_TOKENS];
-typedef unsigned int av1_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
-                                    [ENTROPY_NODES][2];
-
 #define SUBEXP_PARAM 4   /* Subexponential code parameter */
 #define MODULUS_PARAM 13 /* Modulus parameter */
 
 struct AV1Common;
 struct frame_contexts;
 void av1_default_coef_probs(struct AV1Common *cm);
+#if CONFIG_LV_MAP
 void av1_adapt_coef_probs(struct AV1Common *cm);
-void av1_adapt_coef_cdfs(struct AV1Common *cm, struct frame_contexts *pre_fc);
+#endif  // CONFIG_LV_MAP
 
 // This is the index in the scan order beyond which all coefficients for
 // 8x8 transform and above are in the top band.
@@ -221,26 +246,13 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
 
 #define UNCONSTRAINED_NODES 3
 
-#define PIVOT_NODE 2  // which node is pivot
-
 #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
 #define TAIL_NODES (MODEL_NODES + 1)
 extern const aom_tree_index av1_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
 extern const aom_prob av1_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
 
-typedef aom_prob av1_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
-                                      [UNCONSTRAINED_NODES];
-
-typedef unsigned int av1_coeff_count_model[REF_TYPES][COEF_BANDS]
-                                          [COEFF_CONTEXTS]
-                                          [UNCONSTRAINED_NODES + 1];
-
-void av1_model_to_full_probs(const aom_prob *model, aom_prob *full);
-
 typedef aom_cdf_prob coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
                                     [CDF_SIZE(ENTROPY_TOKENS)];
-typedef aom_prob av1_blockz_probs_model[REF_TYPES][BLOCKZ_CONTEXTS];
-typedef unsigned int av1_blockz_count_model[REF_TYPES][BLOCKZ_CONTEXTS][2];
 extern const aom_cdf_prob av1_pareto8_token_probs[COEFF_PROB_MODELS]
                                                  [ENTROPY_TOKENS - 2];
 extern const aom_cdf_prob av1_pareto8_tail_probs[COEFF_PROB_MODELS]
@@ -314,6 +326,16 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
       left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
                    *(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
       break;
+    case TX_32X64:
+      above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
+      left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
+                   *(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
+      break;
+    case TX_64X32:
+      above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8) |
+                    *(const uint64_t *)(a + 16) | *(const uint64_t *)(a + 24));
+      left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
+      break;
 #endif  // CONFIG_TX64X64
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     case TX_4X16:
@@ -384,6 +406,14 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
       above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
       left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
       break;
+    case TX_32X64:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
+      break;
+    case TX_64X32:
+      above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
+      left_ec = !!*(const uint64_t *)l;
+      break;
 #endif  // CONFIG_TX64X64
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     case TX_4X16:
@@ -414,7 +444,7 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
 
 #if CONFIG_ADAPT_SCAN
-#define ADAPT_SCAN_PROB_PRECISION 16
+#define ADAPT_SCAN_PROB_PRECISION 10
 // 1/8 update rate
 #define ADAPT_SCAN_UPDATE_LOG_RATE 3
 #define ADAPT_SCAN_UPDATE_RATE \
diff --git a/third_party/aom/av1/common/entropymode.c b/third_party/aom/av1/common/entropymode.c
index 9faa03e69..207f1e245 100644
--- a/third_party/aom/av1/common/entropymode.c
+++ b/third_party/aom/av1/common/entropymode.c
@@ -15,8 +15,12 @@
 #include "av1/common/scan.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/seg_common.h"
+#if CONFIG_LV_MAP
+#include "av1/common/txb_common.h"
+#endif
 
 #if CONFIG_LV_MAP
+#include "av1/common/txb_common.h"
 const aom_prob default_txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS] = {
 #if CONFIG_CHROMA_2X2
   { 252, 71, 126, 184, 178, 218, 251, 49, 133, 221, 27, 92, 197 },
@@ -168,40 +172,260 @@ const aom_prob default_coeff_base
     };
 
 const aom_prob default_nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS] = {
+#if CONFIG_EXT_TX
 #if CONFIG_CHROMA_2X2
+  { { 56,  137, 82,  136, 83,  187, 124, 65,
+      215, 118, 155, 97,  160, 111, 71,  55,
+
+      142, 156, 91,  226, 107, 231, 146, 65,
+      105, 91,  232, 97,  185, 121, 90,  74,
+
+      153, 195, 123, 154, 106, 196, 143, 67,
+      232, 125, 121, 105, 159, 113, 88,  66 },
+    { 50,  124, 89,  135, 116, 189, 150, 81,
+      202, 126, 130, 107, 149, 110, 85,  67,
+
+      139, 174, 112, 200, 94,  206, 146, 71,
+      163, 164, 212, 99,  177, 143, 125, 85,
+
+      151, 181, 126, 168, 135, 186, 143, 94,
+      207, 129, 142, 135, 145, 112, 98,  81 } },
+#endif
+  { { 56,  137, 82,  136, 83,  187, 124, 65,
+      215, 118, 155, 97,  160, 111, 71,  55,
+
+      142, 156, 91,  226, 107, 231, 146, 65,
+      105, 91,  232, 97,  185, 121, 90,  74,
+
+      153, 195, 123, 154, 106, 196, 143, 67,
+      232, 125, 121, 105, 159, 113, 88,  66 },
+    { 50,  124, 89,  135, 116, 189, 150, 81,
+      202, 126, 130, 107, 149, 110, 85,  67,
+
+      139, 174, 112, 200, 94,  206, 146, 71,
+      163, 164, 212, 99,  177, 143, 125, 85,
+
+      151, 181, 126, 168, 135, 186, 143, 94,
+      207, 129, 142, 135, 145, 112, 98,  81 } },
+  { { 57,  156, 91,  162, 99,  212, 149, 81,
+      223, 128, 182, 121, 216, 163, 119, 94,
+
+      139, 183, 100, 206, 98,  242, 163, 79,
+      200, 127, 234, 112, 230, 169, 115, 90,
+
+      156, 190, 130, 172, 117, 209, 163, 80,
+      217, 145, 182, 135, 204, 163, 120, 88 },
+    { 48,  133, 102, 143, 119, 190, 170, 109,
+      197, 127, 176, 137, 214, 168, 130, 119,
+
+      139, 185, 129, 210, 84,  237, 177, 75,
+      182, 165, 216, 121, 206, 177, 147, 102,
+
+      159, 192, 153, 182, 139, 203, 160, 125,
+      193, 161, 176, 142, 173, 145, 131, 114 } },
+  { { 33,  148, 81,  149, 84,  219, 152, 76,
+      229, 127, 205, 120, 234, 170, 123, 88,
+
+      134, 197, 101, 213, 91,  244, 169, 85,
+      220, 141, 234, 123, 242, 183, 130, 94,
+
+      141, 184, 121, 173, 98,  213, 156, 85,
+      204, 156, 197, 119, 212, 174, 127, 92 },
+    { 14,  75,  45,  98,  83,  197, 150, 90,
+      235, 124, 242, 155, 246, 187, 143, 103,
+
+      78,  185, 111, 255, 116, 255, 224, 171,
+      185, 157, 255, 85,  219, 122, 128, 128,
+
+      117, 187, 102, 181, 132, 233, 197, 93,
+      207, 135, 191, 107, 222, 175, 130, 47 } },
   {
-      { 34, 103, 61, 106, 62,  160, 112, 54, 173, 121,
-        75, 157, 92, 75,  157, 129, 94,  65, 52,  37 },
-      { 52,  124, 84,  136, 107, 197, 161, 82, 183, 151,
-        109, 153, 140, 103, 152, 134, 109, 81, 69,  50 },
+      { 14,  79,  44,  86,  59,  178, 124, 63,
+        244, 106, 233, 117, 252, 185, 132, 92,
+
+        85,  225, 47,  236, 103, 255, 190, 116,
+        235, 114, 247, 123, 250, 174, 122, 110,
+
+        109, 197, 78,  177, 76,  242, 148, 68,
+        236, 123, 231, 103, 247, 171, 122, 91 },
+      { 11,  40,  27,  92,  78,  183, 171, 70,
+        216, 74,  251, 146, 252, 213, 171, 148,
+
+        85,  225, 47,  236, 103, 255, 190, 116,
+        235, 114, 247, 123, 250, 174, 122, 110,
+
+        109, 197, 78,  177, 76,  242, 148, 68,
+        236, 123, 231, 103, 247, 171, 122, 91 },
+  },
+#else  // CONFIG_EXT_TX
+#if CONFIG_CHROMA_2X2
+  {
+      {
+          34, 103, 61, 106, 62, 160, 112, 54, 173, 121, 157, 92, 157, 129, 94,
+          65,
+      },
+
+      {
+          52, 124, 84, 136, 107, 197, 161, 82, 183, 151, 153, 140, 152, 134,
+          109, 81,
+      },
   },
 #endif
   {
-      { 34, 103, 61, 106, 62,  160, 112, 54, 173, 121,
-        75, 157, 92, 75,  157, 129, 94,  65, 52,  37 },
-      { 52,  124, 84,  136, 107, 197, 161, 82, 183, 151,
-        109, 153, 140, 103, 152, 134, 109, 81, 69,  50 },
+      {
+          34, 103, 61, 106, 62, 160, 112, 54, 173, 121, 157, 92, 157, 129, 94,
+          65,
+      },
+
+      {
+          52, 124, 84, 136, 107, 197, 161, 82, 183, 151, 153, 140, 152, 134,
+          109, 81,
+      },
   },
   {
-      { 34, 127, 74,  124, 74,  204, 153, 76,  226, 162,
-        92, 207, 126, 91,  227, 192, 149, 108, 85,  55 },
-      { 43,  136, 115, 158, 130, 212, 187, 112, 231, 180,
-        130, 202, 164, 130, 236, 204, 168, 139, 112, 114 },
+      {
+          34, 127, 74, 124, 74, 204, 153, 76, 226, 162, 207, 126, 227, 192, 149,
+          108,
+      },
+
+      {
+          43, 136, 115, 158, 130, 212, 187, 112, 231, 180, 202, 164, 236, 204,
+          168, 139,
+      },
   },
   {
-      { 25,  117, 70,  120, 77,  215, 171, 102, 234, 156,
-        105, 235, 155, 109, 247, 220, 176, 127, 92,  72 },
-      { 24,  88,  49,  100, 62,  202, 148, 62,  237, 178,
-        102, 233, 168, 105, 244, 198, 162, 127, 103, 71 },
+      {
+          25, 117, 70, 120, 77, 215, 171, 102, 234, 156, 235, 155, 247, 220,
+          176, 127,
+      },
+
+      {
+          24, 88, 49, 100, 62, 202, 148, 62, 237, 178, 233, 168, 244, 198, 162,
+          127,
+      },
   },
   {
-      { 11, 54,  17,  69, 26,  128, 125, 56,  232, 130,
-        60, 237, 121, 66, 250, 168, 134, 114, 93,  53 },
-      { 21, 52,  32,  95,  64,  171, 152, 70,  247, 159,
-        81, 252, 177, 100, 252, 221, 192, 143, 195, 146 },
+      {
+          11, 54, 17, 69, 26, 128, 125, 56, 232, 130, 237, 121, 250, 168, 134,
+          114,
+      },
+
+      {
+          21, 52, 32, 95, 64, 171, 152, 70, 247, 159, 252, 177, 252, 221, 192,
+          143,
+      },
   },
+#endif  // CONFIG_EXT_TX
 };
 
+#if CONFIG_CTX1D
+const aom_prob default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] = {
+#if CONFIG_CHROMA_2X2
+  { { 220, 225, 220, 216, 233, 225, 189, 178, 222, 199, 164, 112, 207,
+      171, 115, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      147, 125, 104, 36,  117, 107, 26,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      156, 124, 128, 128, 146, 68,  128, 128, 131, 17,  128, 128, 64,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+
+    { 146, 150, 142, 144, 178, 167, 131, 116, 150, 123, 107, 63,  119,
+      89,  74,  128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      117, 127, 105, 69,  53,  56,  30,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      128, 86,  128, 128, 140, 72,  128, 128, 120, 44,  128, 128, 80,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+  { { 237, 242, 242, 219, 192, 246, 246, 243, 233, 184, 155, 234, 217,
+      188, 152, 195, 167, 114, 89,  128, 128, 128, 128, 128, 128,
+
+      180, 173, 154, 133, 112, 147, 145, 142, 102, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      198, 173, 130, 200, 128, 208, 182, 160, 106, 171, 128, 144, 128,
+      128, 128, 124, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+
+    { 140, 170, 162, 111, 94,  182, 195, 165, 153, 110, 81,  178, 169,
+      158, 83,  133, 85,  85,  38,  128, 128, 128, 128, 128, 128,
+
+      112, 127, 107, 87,  31,  57,  49,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      160, 143, 99,  126, 128, 164, 133, 126, 59,  71,  128, 138, 128,
+      128, 128, 99,  128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+#endif
+  { { 220, 225, 220, 216, 233, 225, 189, 178, 222, 199, 164, 112, 207,
+      171, 115, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      147, 125, 104, 36,  117, 107, 26,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      156, 124, 128, 128, 146, 68,  128, 128, 131, 17,  128, 128, 64,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+
+    { 146, 150, 142, 144, 178, 167, 131, 116, 150, 123, 107, 63,  119,
+      89,  74,  128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      117, 127, 105, 69,  53,  56,  30,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      128, 86,  128, 128, 140, 72,  128, 128, 120, 44,  128, 128, 80,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+  { { 237, 242, 242, 219, 192, 246, 246, 243, 233, 184, 155, 234, 217,
+      188, 152, 195, 167, 114, 89,  128, 128, 128, 128, 128, 128,
+
+      180, 173, 154, 133, 112, 147, 145, 142, 102, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      198, 173, 130, 200, 128, 208, 182, 160, 106, 171, 128, 144, 128,
+      128, 128, 124, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+
+    { 140, 170, 162, 111, 94,  182, 195, 165, 153, 110, 81,  178, 169,
+      158, 83,  133, 85,  85,  38,  128, 128, 128, 128, 128, 128,
+
+      112, 127, 107, 87,  31,  57,  49,  128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      160, 143, 99,  126, 128, 164, 133, 126, 59,  71,  128, 138, 128,
+      128, 128, 99,  128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+  { { 229, 241, 243, 245, 247, 247, 251, 248, 235, 210, 247, 235, 208,
+      166, 245, 247, 244, 182, 236, 229, 180, 136, 128, 128, 128,
+
+      191, 197, 96,  70,  199, 128, 128, 191, 174, 117, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      211, 183, 215, 188, 138, 209, 136, 128, 170, 128, 191, 128, 161,
+      128, 182, 128, 128, 128, 164, 128, 128, 128, 128, 128, 128 },
+
+    { 106, 153, 182, 191, 186, 202, 211, 203, 166, 147, 205, 205, 195,
+      128, 206, 212, 182, 109, 192, 154, 139, 79,  128, 128, 128,
+
+      112, 133, 128, 255, 128, 128, 128, 130, 154, 98,  128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      144, 185, 169, 199, 85,  183, 128, 128, 64,  128, 146, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+  { { 169, 203, 224, 222, 220, 228, 229, 223, 234, 247, 242, 230, 222,
+      238, 246, 234, 196, 245, 249, 245, 192, 240, 235, 199, 161,
+
+      176, 148, 158, 77,  178, 128, 128, 158, 128, 128, 196, 208, 155,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      232, 187, 191, 221, 116, 217, 154, 128, 203, 128, 128, 192, 128,
+      201, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+
+    { 133, 182, 215, 204, 176, 220, 182, 168, 187, 197, 181, 145, 75,
+      164, 136, 51,  57,  156, 128, 128, 128, 85,  128, 128, 128,
+
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+      128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } },
+};
+#else  // CONFIG_CTX1D
 const aom_prob default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] = {
 #if CONFIG_CHROMA_2X2
   {
@@ -236,61 +460,290 @@ const aom_prob default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS] = {
         236, 186, 182, 57,  209, 140, 128, 85,  184, 110, 128, 128 },
   },
 };
+#endif  // CONFIG_CTX1D
 
 const aom_prob default_coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS] = {
 #if CONFIG_CHROMA_2X2
-  {
-      { 164, 128, 134, 165, 128, 137, 168, 128, 97,  136, 167, 128,
-        182, 205, 143, 172, 200, 145, 173, 193, 103, 137, 170, 191,
-        198, 214, 162, 187, 209, 162, 187, 207, 128, 156, 183, 201,
-        219, 230, 204, 210, 225, 201, 209, 225, 187, 190, 203, 214 },
-      { 106, 128, 98,  126, 128, 87,  122, 128, 54,  89,  131, 128,
-        142, 180, 123, 154, 189, 115, 149, 175, 79,  115, 157, 182,
-        175, 197, 147, 174, 199, 145, 174, 201, 89,  135, 173, 194,
-        212, 222, 206, 203, 223, 188, 201, 220, 128, 144, 202, 206 },
-  },
+  { { 96,  128, 86,  122, 128, 84,  125, 128, 88,  99,  126, 128,
+      135, 159, 99,  130, 134, 100, 128, 144, 70,  97,  128, 139,
+      157, 168, 127, 148, 162, 121, 149, 157, 118, 127, 143, 157,
+      178, 186, 168, 171, 183, 165, 169, 180, 180, 169, 166, 177 },
+    { 81,  128, 72,  95,  128, 64,  98,  128, 42,  66,  101, 128,
+      129, 163, 97,  122, 130, 91,  119, 141, 70,  94,  118, 166,
+      157, 168, 117, 143, 151, 111, 144, 154, 76,  113, 128, 158,
+      177, 185, 165, 167, 179, 155, 166, 179, 110, 137, 115, 165 } },
 #endif
-  {
-      { 164, 128, 134, 165, 128, 137, 168, 128, 97,  136, 167, 128,
-        182, 205, 143, 172, 200, 145, 173, 193, 103, 137, 170, 191,
-        198, 214, 162, 187, 209, 162, 187, 207, 128, 156, 183, 201,
-        219, 230, 204, 210, 225, 201, 209, 225, 187, 190, 203, 214 },
-      { 106, 128, 98,  126, 128, 87,  122, 128, 54,  89,  131, 128,
-        142, 180, 123, 154, 189, 115, 149, 175, 79,  115, 157, 182,
-        175, 197, 147, 174, 199, 145, 174, 201, 89,  135, 173, 194,
-        212, 222, 206, 203, 223, 188, 201, 220, 128, 144, 202, 206 },
-  },
-  {
-      { 171, 128, 123, 169, 128, 121, 165, 128, 82,  125, 168, 128,
-        191, 213, 143, 177, 199, 136, 170, 194, 95,  135, 171, 195,
-        206, 222, 166, 191, 212, 154, 184, 207, 115, 149, 180, 204,
-        223, 237, 196, 215, 231, 186, 209, 228, 158, 178, 201, 222 },
-      { 115, 128, 115, 146, 128, 91,  147, 128, 55,  93,  139, 128,
-        147, 190, 141, 176, 201, 123, 156, 173, 68,  114, 156, 195,
-        186, 205, 153, 191, 214, 141, 179, 205, 107, 132, 166, 184,
-        215, 225, 200, 212, 230, 102, 207, 222, 128, 119, 200, 212 },
-  },
-  {
-      { 185, 128, 134, 198, 128, 128, 195, 128, 58,  110, 162, 128,
-        208, 227, 154, 196, 206, 144, 188, 209, 83,  130, 168, 198,
-        219, 232, 167, 205, 222, 158, 196, 216, 107, 143, 178, 204,
-        233, 244, 202, 226, 238, 191, 217, 234, 153, 178, 200, 223 },
-      { 160, 128, 154, 197, 128, 129, 178, 128, 53,  112, 157, 128,
-        185, 214, 169, 196, 221, 134, 179, 186, 82,  131, 168, 194,
-        204, 220, 176, 209, 221, 173, 194, 209, 107, 154, 181, 203,
-        230, 241, 202, 226, 237, 185, 223, 234, 162, 187, 203, 222 },
-  },
-  {
-      { 177, 128, 165, 226, 128, 152, 219, 128, 45,  129, 188, 128,
-        198, 218, 179, 220, 228, 163, 214, 220, 72,  134, 181, 206,
-        216, 225, 177, 218, 231, 158, 213, 223, 112, 150, 185, 210,
-        245, 251, 204, 234, 247, 195, 231, 243, 163, 186, 213, 235 },
-      { 161, 128, 174, 205, 128, 146, 182, 128, 59,  125, 179, 128,
-        183, 208, 199, 220, 239, 184, 213, 217, 71,  141, 196, 217,
-        213, 219, 215, 230, 237, 171, 224, 238, 112, 173, 193, 221,
-        239, 246, 168, 243, 249, 93,  241, 247, 128, 195, 216, 233 },
-  },
+  { { 96,  128, 86,  122, 128, 84,  125, 128, 88,  99,  126, 128,
+      135, 159, 99,  130, 134, 100, 128, 144, 70,  97,  128, 139,
+      157, 168, 127, 148, 162, 121, 149, 157, 118, 127, 143, 157,
+      178, 186, 168, 171, 183, 165, 169, 180, 180, 169, 166, 177 },
+    { 81,  128, 72,  95,  128, 64,  98,  128, 42,  66,  101, 128,
+      129, 163, 97,  122, 130, 91,  119, 141, 70,  94,  118, 166,
+      157, 168, 117, 143, 151, 111, 144, 154, 76,  113, 128, 158,
+      177, 185, 165, 167, 179, 155, 166, 179, 110, 137, 115, 165 } },
+  { { 102, 128, 79,  125, 128, 74,  121, 128, 61,  98,  128, 128,
+      141, 164, 96,  132, 150, 90,  128, 153, 62,  100, 128, 153,
+      162, 172, 120, 146, 162, 113, 142, 154, 96,  113, 138, 155,
+      181, 188, 151, 170, 179, 147, 167, 181, 158, 157, 163, 176 },
+    { 103, 128, 80,  116, 128, 66,  94,  128, 35,  65,  109, 128,
+      134, 163, 104, 137, 154, 92,  128, 104, 58,  94,  129, 132,
+      156, 173, 137, 149, 165, 104, 143, 143, 112, 101, 133, 159,
+      176, 186, 134, 172, 175, 155, 169, 177, 255, 107, 137, 168 } },
+  { { 125, 128, 85,  157, 128, 82,  155, 128, 42,  83,  116, 128,
+      155, 174, 101, 144, 155, 93,  140, 155, 57,  92,  124, 149,
+      173, 178, 114, 148, 161, 111, 145, 161, 77,  101, 131, 153,
+      190, 191, 140, 169, 183, 140, 169, 179, 108, 122, 150, 171 },
+    { 136, 128, 108, 163, 128, 96,  140, 128, 48,  90,  85,  128,
+      144, 164, 113, 158, 179, 107, 159, 128, 43,  75,  133, 160,
+      157, 184, 144, 160, 189, 154, 152, 184, 128, 124, 137, 140,
+      188, 196, 148, 170, 178, 128, 177, 159, 128, 179, 135, 135 } },
+  { { 133, 128, 110, 153, 128, 101, 157, 128, 49,  91,  134, 128,
+      151, 168, 129, 158, 162, 112, 154, 168, 63,  99,  130, 158,
+      171, 178, 128, 160, 173, 111, 155, 171, 86,  108, 143, 159,
+      194, 196, 162, 177, 185, 123, 172, 181, 101, 132, 156, 178 },
+    { 133, 128, 129, 144, 128, 116, 135, 128, 43,  101, 100, 128,
+      140, 163, 158, 173, 205, 128, 165, 171, 128, 128, 210, 163,
+      172, 184, 192, 176, 201, 183, 177, 190, 128, 192, 199, 144,
+      192, 192, 1,   196, 192, 255, 171, 178, 255, 128, 171, 179 } }
+};
+#if BR_NODE
+const aom_prob
+    default_coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS] = {
+#if CONFIG_CHROMA_2X2
+      { { { 62,  128, 54,  116, 128, 51,  97,  128, 59,  68,  107, 128,
+            119, 158, 68,  115, 131, 65,  112, 138, 34,  71,  118, 137,
+            171, 184, 110, 152, 178, 105, 146, 172, 89,  111, 145, 173,
+            214, 226, 201, 198, 214, 196, 193, 210, 239, 196, 186, 202 },
+          { 41,  128, 58,  52,  128, 51,  61,  128, 92,  54,  48,  128,
+            67,  113, 36,  55,  75,  30,  56,  72,  12,  25,  50,  79,
+            94,  131, 37,  75,  108, 42,  78,  103, 5,   31,  67,  103,
+            172, 192, 131, 135, 167, 129, 136, 165, 149, 144, 120, 149 },
+          { 35, 128, 74, 50, 128, 63, 59, 128, 87,  74,  38, 128,
+            32, 53,  23, 34, 50,  18, 30, 41,  15,  13,  18, 18,
+            52, 74,  18, 29, 36,  18, 31, 47,  51,  9,   15, 27,
+            96, 134, 85, 70, 93,  96, 79, 100, 108, 100, 55, 65 } },
+        { { 52,  128, 35,  79,  128, 29,  66,  128, 12,  30, 57,  128,
+            113, 156, 64,  107, 172, 54,  103, 145, 23,  57, 96,  110,
+            165, 184, 95,  138, 166, 95,  141, 184, 55,  80, 133, 165,
+            212, 222, 134, 175, 206, 158, 177, 197, 102, 61, 154, 190 },
+          { 36,  128, 18, 26,  128, 15, 29,  128, 4, 6,  30, 128,
+            63,  113, 25, 44,  66,  22, 40,  67,  9, 14, 34, 55,
+            90,  125, 26, 66,  82,  29, 73,  88,  1, 26, 34, 67,
+            158, 179, 70, 121, 134, 69, 111, 129, 1, 85, 54, 105 },
+          { 24, 128, 8,  31, 128, 15, 16, 128, 1,   1, 1,  128,
+            32, 39,  16, 18, 43,  5,  17, 13,  1,   1, 22, 1,
+            37, 65,  26, 20, 28,  16, 15, 24,  128, 1, 1,  1,
+            83, 107, 57, 56, 74,  34, 29, 73,  128, 1, 37, 47 } } },
+#endif
+      { { { 62,  128, 54,  116, 128, 51,  97,  128, 59,  68,  107, 128,
+            119, 158, 68,  115, 131, 65,  112, 138, 34,  71,  118, 137,
+            171, 184, 110, 152, 178, 105, 146, 172, 89,  111, 145, 173,
+            214, 226, 201, 198, 214, 196, 193, 210, 239, 196, 186, 202 },
+          { 41,  128, 58,  52,  128, 51,  61,  128, 92,  54,  48,  128,
+            67,  113, 36,  55,  75,  30,  56,  72,  12,  25,  50,  79,
+            94,  131, 37,  75,  108, 42,  78,  103, 5,   31,  67,  103,
+            172, 192, 131, 135, 167, 129, 136, 165, 149, 144, 120, 149 },
+          { 35, 128, 74, 50, 128, 63, 59, 128, 87,  74,  38, 128,
+            32, 53,  23, 34, 50,  18, 30, 41,  15,  13,  18, 18,
+            52, 74,  18, 29, 36,  18, 31, 47,  51,  9,   15, 27,
+            96, 134, 85, 70, 93,  96, 79, 100, 108, 100, 55, 65 } },
+        { { 52,  128, 35,  79,  128, 29,  66,  128, 12,  30, 57,  128,
+            113, 156, 64,  107, 172, 54,  103, 145, 23,  57, 96,  110,
+            165, 184, 95,  138, 166, 95,  141, 184, 55,  80, 133, 165,
+            212, 222, 134, 175, 206, 158, 177, 197, 102, 61, 154, 190 },
+          { 36,  128, 18, 26,  128, 15, 29,  128, 4, 6,  30, 128,
+            63,  113, 25, 44,  66,  22, 40,  67,  9, 14, 34, 55,
+            90,  125, 26, 66,  82,  29, 73,  88,  1, 26, 34, 67,
+            158, 179, 70, 121, 134, 69, 111, 129, 1, 85, 54, 105 },
+          { 24, 128, 8,  31, 128, 15, 16, 128, 1,   1, 1,  128,
+            32, 39,  16, 18, 43,  5,  17, 13,  1,   1, 22, 1,
+            37, 65,  26, 20, 28,  16, 15, 24,  128, 1, 1,  1,
+            83, 107, 57, 56, 74,  34, 29, 73,  128, 1, 37, 47 } } },
+      { { { 72,  128, 45,  113, 128, 38,  100, 128, 26,  63,  112, 128,
+            134, 177, 65,  121, 148, 57,  111, 143, 27,  68,  116, 152,
+            181, 198, 98,  148, 173, 84,  136, 168, 53,  89,  134, 170,
+            218, 230, 173, 194, 216, 160, 188, 213, 199, 177, 183, 204 },
+          { 54,  128, 34,  55,  128, 32,  53,  128, 66,  45,  54,  128,
+            81,  128, 33,  59,  102, 26,  55,  80,  7,   23,  49,  91,
+            116, 145, 36,  79,  107, 35,  73,  102, 12,  28,  57,  95,
+            170, 201, 102, 133, 173, 105, 127, 173, 166, 132, 114, 149 },
+          { 40,  128, 25, 30, 128, 21, 31, 128, 24, 17, 24, 128,
+            51,  67,  19, 28, 40,  17, 25, 42,  15, 13, 19, 19,
+            61,  77,  19, 30, 48,  13, 33, 50,  11, 15, 21, 30,
+            103, 147, 37, 69, 111, 37, 66, 105, 18, 18, 36, 76 } },
+        { { 74,  128, 42,  99,  128, 32,  57,  128, 9,  28, 76,  128,
+            115, 187, 70,  118, 120, 52,  109, 128, 19, 60, 93,  100,
+            178, 197, 119, 147, 179, 92,  137, 178, 37, 87, 110, 158,
+            216, 227, 169, 186, 201, 128, 178, 204, 1,  96, 155, 217 },
+          { 59,  128, 26, 34,  128, 11, 20,  128, 7,   8, 24, 128,
+            73,  125, 38, 74,  96,  23, 61,  79,  15,  9, 23, 110,
+            96,  151, 49, 79,  164, 22, 70,  65,  1,   1, 9,  69,
+            156, 196, 73, 105, 181, 17, 126, 155, 128, 1, 90, 111 },
+          { 42, 128, 10, 11, 128, 13, 1,  128, 1,   1,   1, 128,
+            55, 63,  13, 17, 85,  1,  16, 64,  1,   1,   1, 1,
+            62, 58,  32, 21, 53,  1,  37, 91,  128, 128, 1, 1,
+            81, 133, 51, 48, 79,  1,  25, 81,  128, 128, 1, 54 } } },
+      { { { 103, 128, 52,  163, 128, 46,  155, 128, 12, 45,  97,  128,
+            162, 196, 69,  140, 170, 60,  130, 158, 21, 58,  109, 150,
+            205, 214, 93,  149, 178, 79,  143, 179, 38, 71,  120, 159,
+            231, 240, 150, 192, 218, 140, 188, 220, 84, 112, 159, 196 },
+          { 93,  128, 42, 143, 128, 41, 132, 128, 6,  15, 40, 128,
+            113, 172, 39, 99,  113, 33, 91,  94,  5,  15, 42, 83,
+            148, 172, 37, 91,  130, 28, 81,  121, 9,  20, 47, 87,
+            201, 223, 75, 139, 183, 77, 132, 176, 23, 41, 82, 147 },
+          { 92,  128, 45, 123, 128, 28, 88, 128, 1,  8,  20, 128,
+            85,  94,  39, 95,  83,  33, 81, 61,  4,  5,  17, 25,
+            84,  109, 17, 59,  76,  11, 46, 62,  1,  4,  13, 35,
+            139, 184, 25, 86,  129, 25, 71, 123, 26, 13, 31, 84 } },
+        { { 123, 128, 82,  169, 128, 62,  139, 128, 1,   28,  77,  128,
+            139, 167, 92,  170, 146, 76,  149, 255, 19,  68,  160, 73,
+            190, 209, 171, 165, 218, 57,  152, 209, 128, 61,  122, 164,
+            237, 240, 146, 210, 227, 128, 224, 220, 128, 128, 196, 199 },
+          { 130, 128, 52,  141, 128, 32,  101, 128, 128, 1,  85,  128,
+            94,  155, 71,  121, 255, 30,  116, 85,  1,   8,  58,  255,
+            105, 169, 110, 101, 132, 1,   77,  142, 128, 1,  54,  96,
+            166, 214, 224, 154, 198, 255, 153, 230, 128, 85, 100, 146 },
+          { 103, 128, 26, 83, 128, 20,  47,  128, 128, 128, 1,  128,
+            91,  90,  19, 76, 128, 1,   42,  1,   128, 255, 64, 128,
+            74,  77,  1,  72, 68,  128, 13,  77,  128, 128, 64, 1,
+            71,  147, 37, 99, 171, 1,   104, 151, 128, 1,   1,  96 } } },
+      { { { 113, 128, 79,  165, 128, 69,  149, 128, 14, 55,  116, 128,
+            163, 202, 104, 169, 205, 82,  159, 180, 22, 64,  121, 165,
+            207, 216, 113, 177, 215, 95,  166, 195, 35, 77,  132, 179,
+            241, 244, 173, 207, 233, 128, 202, 227, 92, 121, 169, 209 },
+          { 114, 128, 67, 136, 128, 54, 132, 128, 6,  26, 62,  128,
+            85,  129, 85, 146, 173, 64, 129, 140, 7,  19, 65,  92,
+            139, 169, 42, 147, 186, 40, 129, 170, 18, 18, 65,  117,
+            213, 230, 74, 172, 213, 69, 165, 196, 1,  40, 103, 170 },
+          { 101, 128, 61, 134, 128, 52, 97,  128, 1,   14, 26, 128,
+            79,  72,  71, 135, 152, 56, 114, 117, 1,   10, 24, 58,
+            64,  66,  60, 133, 148, 16, 126, 123, 1,   32, 26, 56,
+            143, 197, 51, 141, 176, 59, 132, 162, 128, 17, 47, 106 } },
+        { { 115, 128, 112, 135, 128, 89,  130, 128, 15,  49,  89,  128,
+            143, 238, 154, 203, 255, 138, 172, 255, 1,   98,  196, 255,
+            185, 203, 255, 211, 255, 192, 217, 235, 128, 128, 171, 255,
+            233, 233, 255, 247, 255, 1,   239, 245, 1,   128, 255, 255 },
+          { 75,  128, 76,  118, 128, 35,  74,  128, 1,   13,  23,  128,
+            63,  138, 114, 164, 140, 91,  128, 128, 128, 1,   138, 64,
+            96,  128, 255, 175, 236, 85,  166, 209, 128, 1,   128, 146,
+            196, 217, 1,   204, 206, 128, 212, 221, 128, 128, 128, 219 },
+          { 49,  128, 36,  62,  128, 37,  56, 128, 128, 1,   1,   128,
+            45,  37,  68,  102, 128, 90,  56, 1,   128, 128, 37,  1,
+            26,  27,  128, 126, 128, 255, 63, 142, 128, 128, 1,   1,
+            125, 159, 128, 173, 212, 128, 85, 189, 128, 128, 255, 171 } } }
+    };
+#endif  // BR_NODE
+#if CONFIG_CTX1D
+static const aom_prob default_eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES] = {
+#if CONFIG_CHROMA_2X2
+  { { 128, 176, 157 }, { 128, 222, 198 } },
+#endif
+  { { 128, 176, 157 }, { 128, 222, 198 } },
+  { { 128, 35, 56 }, { 128, 203, 225 } },
+  { { 128, 55, 136 }, { 128, 230, 253 } },
+  { { 128, 101, 188 }, { 128, 128, 128 } }
 };
+static const aom_prob default_empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]
+                                        [EMPTY_LINE_CONTEXTS] = {
+#if CONFIG_CHROMA_2X2
+                                          { { { 128, 128, 128, 128, 128 },
+                                              { 142, 153, 211, 205, 128 },
+                                              { 162, 142, 203, 197, 128 } },
+                                            { { 128, 128, 128, 128, 128 },
+                                              { 133, 116, 178, 123, 128 },
+                                              { 139, 109, 159, 115, 128 } } },
+#endif
+                                          { { { 128, 128, 128, 128, 128 },
+                                              { 142, 153, 211, 205, 128 },
+                                              { 162, 142, 203, 197, 128 } },
+                                            { { 128, 128, 128, 128, 128 },
+                                              { 133, 116, 178, 123, 128 },
+                                              { 139, 109, 159, 115, 128 } } },
+                                          { { { 128, 128, 128, 128, 128 },
+                                              { 185, 130, 183, 204, 227 },
+                                              { 171, 81, 177, 200, 221 } },
+                                            { { 128, 128, 128, 128, 128 },
+                                              { 180, 127, 175, 189, 213 },
+                                              { 120, 74, 129, 134, 156 } } },
+                                          { { { 128, 128, 128, 128, 128 },
+                                              { 202, 82, 183, 214, 248 },
+                                              { 144, 41, 163, 185, 203 } },
+                                            { { 128, 128, 128, 128, 128 },
+                                              { 151, 93, 171, 224, 160 },
+                                              { 128, 51, 171, 128, 1 } } },
+                                          { { { 128, 128, 128, 128, 128 },
+                                              { 154, 48, 174, 210, 233 },
+                                              { 123, 16, 148, 189, 197 } },
+                                            { { 128, 128, 128, 128, 128 },
+                                              { 128, 128, 128, 128, 128 },
+                                              { 128, 128, 128, 128, 128 } } }
+                                        };
+static const aom_prob
+    default_hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS] = {
+#if CONFIG_CHROMA_2X2
+      { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 151, 173, 114, 128, 128, 128, 128, 128, 128, 162, 198, 128,
+            128, 128, 128, 128, 182, 198, 109, 128, 128, 128, 128, 128 },
+          { 152, 173, 119, 128, 128, 128, 128, 128, 128, 164, 193, 128,
+            128, 128, 128, 128, 198, 209, 121, 128, 128, 128, 128, 128 } },
+        { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 123, 143, 70,  128, 128, 128, 128, 128, 128, 127, 154, 128,
+            128, 128, 128, 128, 176, 148, 36,  128, 128, 128, 128, 128 },
+          { 132, 152, 73,  128, 128, 128, 128, 128, 128, 127, 159, 128,
+            128, 128, 128, 128, 186, 181, 48,  128, 128, 128, 128, 128 } } },
+#endif
+      { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 151, 173, 114, 128, 128, 128, 128, 128, 128, 162, 198, 128,
+            128, 128, 128, 128, 182, 198, 109, 128, 128, 128, 128, 128 },
+          { 152, 173, 119, 128, 128, 128, 128, 128, 128, 164, 193, 128,
+            128, 128, 128, 128, 198, 209, 121, 128, 128, 128, 128, 128 } },
+        { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 123, 143, 70,  128, 128, 128, 128, 128, 128, 127, 154, 128,
+            128, 128, 128, 128, 176, 148, 36,  128, 128, 128, 128, 128 },
+          { 132, 152, 73,  128, 128, 128, 128, 128, 128, 127, 159, 128,
+            128, 128, 128, 128, 186, 181, 48,  128, 128, 128, 128, 128 } } },
+      { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 109, 105, 78,  44,  128, 128, 128, 128, 128, 146, 185, 221,
+            128, 128, 128, 128, 199, 188, 134, 69,  128, 128, 128, 128 },
+          { 124, 127, 115, 82,  128, 128, 128, 128, 128, 162, 198, 224,
+            128, 128, 128, 128, 206, 214, 177, 135, 128, 128, 128, 128 } },
+        { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 95,  102, 65,  14,  128, 128, 128, 128, 128, 132, 164, 199,
+            128, 128, 128, 128, 162, 163, 66,  27,  128, 128, 128, 128 },
+          { 83,  141, 97,  38,  128, 128, 128, 128, 128, 154, 132, 184,
+            128, 128, 128, 128, 194, 218, 112, 63,  128, 128, 128, 128 } } },
+      { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 117, 107, 86,  61,  51,  104, 128, 128, 128, 160, 198, 238,
+            252, 251, 128, 128, 221, 223, 209, 186, 99,  81,  128, 128 },
+          { 118, 122, 121, 100, 91,  97,  128, 128, 128, 168, 190, 214,
+            233, 235, 128, 128, 197, 216, 177, 165, 147, 126, 128, 128 } },
+        { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 109, 102, 63,  51,  255, 85,  128, 128, 128, 163, 131, 175,
+            128, 128, 128, 128, 183, 102, 40,  1,   128, 128, 128, 128 },
+          { 255, 255, 1,   1,   128, 1, 128, 128, 128, 1,   128, 128,
+            128, 128, 128, 128, 255, 1, 128, 128, 128, 128, 128, 128 } } },
+      { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 114, 108, 83,  61,  53,  28,  77,  177, 128, 161, 187, 218,
+            240, 237, 228, 234, 200, 207, 167, 136, 98,  78,  183, 128 },
+          { 117, 138, 116, 77,  75,  85,  26,  1,   128, 197, 162, 200,
+            184, 212, 225, 236, 189, 225, 168, 124, 144, 171, 128, 128 } },
+        { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+          { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+            128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } }
+    };
+#endif  // CONFIG_CTX1D
 #endif  // CONFIG_LV_MAP
 
 #if CONFIG_EXT_PARTITION_TYPES
@@ -312,10 +765,10 @@ static const aom_prob
       { 52, 79, 25, 128, 128, 85, 128, 85, 128 },   // l split, a not split
       { 17, 14, 12, 128, 128, 85, 128, 85, 128 },   // a/l both split
       // 64x64 -> 32x32
-      { 222, 34, 30, 128, 128, 128, 255, 128, 255 },  // a/l both not split
-      { 72, 16, 44, 128, 128, 128, 255, 128, 255 },   // a split, l not split
-      { 58, 32, 12, 128, 128, 128, 255, 128, 255 },   // l split, a not split
-      { 10, 7, 6, 128, 128, 128, 255, 128, 255 },     // a/l both split
+      { 222, 34, 30, 128, 128, 85, 128, 85, 128 },  // a/l both not split
+      { 72, 16, 44, 128, 128, 85, 128, 85, 128 },   // a split, l not split
+      { 58, 32, 12, 128, 128, 85, 128, 85, 128 },   // l split, a not split
+      { 10, 7, 6, 128, 128, 85, 128, 85, 128 },     // a/l both split
 #if CONFIG_EXT_PARTITION
       // 128x128 -> 64x64
       { 222, 34, 30, 128, 128, 128, 255, 128, 255 },  // a/l both not split
@@ -435,7 +888,6 @@ static const aom_cdf_prob default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)] = {
 };
 #endif
 
-#if CONFIG_EXT_INTER
 static const aom_prob default_inter_compound_mode_probs
     [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = {
       { 154, 167, 233, 165, 143, 170, 167 },  // 0 = both zero mv
@@ -516,7 +968,11 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 255, 1 },   { 255, 1 },   { 255, 1 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208, 128 }, { 208, 128 }, { 208, 128 }, { 208, 128 },
+      { 208, 128 }, { 208, 128 }, { 208, 128 }, { 208, 128 }, { 208, 1 },
+      { 208, 1 },
+#if CONFIG_EXT_PARTITION
+      { 208, 1 },   { 208, 1 }
+#endif  // CONFIG_EXT_PARTITION
     };
 #elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
 static const aom_prob
@@ -529,7 +985,10 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 255 }, { 255 }, { 255 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208 }, { 208 }, { 208 }, { 208 },
+      { 208 }, { 208 }, { 208 }, { 208 }, { 255 }, { 255 },
+#if CONFIG_EXT_PARTITION
+      { 255 }, { 255 }
+#endif  // CONFIG_EXT_PARTITION
     };
 #elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
 static const aom_prob
@@ -542,7 +1001,10 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 255 }, { 255 }, { 255 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208 }, { 208 }, { 208 }, { 208 },
+      { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
+#if CONFIG_EXT_PARTITION
+      { 208 }, { 208 }
+#endif  // CONFIG_EXT_PARTITION
     };
 #else
 static const aom_prob default_compound_type_probs[BLOCK_SIZES_ALL]
@@ -575,10 +1037,16 @@ static const aom_cdf_prob
       { AOM_ICDF(32640), AOM_ICDF(32641), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32641), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_EXT_PARTITION
-      { 16384, 8192, 0, 0 },
-      { 16384, 8192, 0, 0 },
-      { 16384, 8192, 0, 0 },
-      { 16384, 8192, 0, 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },  // 208, 1
+      { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(26648), AOM_ICDF(32768), 0 },
+#endif
     };
 #elif !CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
 static const aom_cdf_prob
@@ -610,6 +1078,12 @@ static const aom_cdf_prob
       { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
     };
 #elif CONFIG_COMPOUND_SEGMENT && !CONFIG_WEDGE
 static const aom_cdf_prob
@@ -637,14 +1111,17 @@ static const aom_cdf_prob
       { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_EXT_PARTITION
-      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },  // 208
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(26624), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
     };
-#else
-static const aom_cdf_prob default_compound_type_cdf[BLOCK_SIZES_ALL]
-                                                   [CDF_SIZE(COMPOUND_TYPES)];
 #endif  // CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
 
 #if CONFIG_INTERINTRA
@@ -681,9 +1158,12 @@ static const aom_prob default_wedge_interintra_prob[BLOCK_SIZES_ALL] = {
 #endif
   128, 128, 128, 194, 213, 217, 222, 224, 226, 220, 128, 128, 128,
 #if CONFIG_EXT_PARTITION
-  208, 208, 208,
+  255, 255, 255,
+#endif  // CONFIG_EXT_PARTITION
+  208, 208, 208, 208, 255, 255,
+#if CONFIG_EXT_PARTITION
+  255, 255
 #endif  // CONFIG_EXT_PARTITION
-  208, 208, 208, 208,
 };
 
 #if CONFIG_NEW_MULTISYMBOL
@@ -708,55 +1188,160 @@ static const aom_cdf_prob
       { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
 #if CONFIG_EXT_PARTITION
-      { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_EXT_PARTITION
       { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
     };
 #endif  // CONFIG_NEW_MULTISYMBOL
 
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
+#ifdef TWO_MODE
 const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)] = {
-  -NO_OVERLAP,    2,  -NCOBMC_MODE_1, 4,
-  -NCOBMC_MODE_2, 6,  -NCOBMC_MODE_3, 8,
-  -NCOBMC_MODE_4, 10, -NCOBMC_MODE_5, 12,
-  -NCOBMC_MODE_6, 14, -NCOBMC_MODE_7, -NCOBMC_MODE_8
+  -NCOBMC_MODE_0, -NCOBMC_MODE_1
 };
+#else
+const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)] = {
+  -NCOBMC_MODE_0, 2,
+  -NCOBMC_MODE_1, 4,
+  -NCOBMC_MODE_2, 6,
+  -NCOBMC_MODE_3, 8,
+  -NCOBMC_MODE_4, 10,
+  -NCOBMC_MODE_5, 12,
+  -NCOBMC_MODE_6, -NCOBMC_MODE_7
+};
+#endif  // TWO_MODE
 
 // TODO(weitinglin): find default prob
+//                   right now setting the first mode with probability 1/255,
+//                   the last eight modes with equal probabilities
 static const aom_prob
     default_ncobmc_mode_prob[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES - 1] = {
-      { 23, 37, 37, 38, 65, 71, 81, 86 },   // 8x8
-      { 28, 32, 37, 43, 51, 64, 85, 128 },  // 16X16 equal prob
-      { 86, 22, 32, 25, 10, 40, 97, 65 },   // 32X32
-      { 28, 32, 37, 43, 51, 64, 85, 128 }   // 64X64 equal prob
+#ifdef TWO_MODE
+      { 127 }, { 127 }, { 127 }, { 127 }
+#else
+      { 32, 36, 43, 51, 64, 85, 128 },  // 8x8
+      { 32, 36, 43, 51, 64, 85, 128 },  // 16X16
+      { 32, 36, 43, 51, 64, 85, 128 },  // 32X32
+      { 32, 36, 43, 51, 64, 85, 128 }   // 64X64
+#endif  // TWO_MODE
     };
 static const aom_cdf_prob
     default_ncobmc_mode_cdf[ADAPT_OVERLAP_BLOCKS][CDF_SIZE(MAX_NCOBMC_MODES)] =
-        { { AOM_ICDF(127), AOM_ICDF(4207), AOM_ICDF(8287), AOM_ICDF(12367),
-            AOM_ICDF(16447), AOM_ICDF(20527), AOM_ICDF(24607), AOM_ICDF(28687),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(127), AOM_ICDF(4207), AOM_ICDF(8287), AOM_ICDF(12367),
-            AOM_ICDF(16447), AOM_ICDF(20527), AOM_ICDF(24607), AOM_ICDF(28687),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(127), AOM_ICDF(4207), AOM_ICDF(8287), AOM_ICDF(12367),
-            AOM_ICDF(16447), AOM_ICDF(20527), AOM_ICDF(24607), AOM_ICDF(28687),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(127), AOM_ICDF(4207), AOM_ICDF(8287), AOM_ICDF(12367),
-            AOM_ICDF(16447), AOM_ICDF(20527), AOM_ICDF(24607), AOM_ICDF(28687),
-            AOM_ICDF(32768), 0 } };
+#ifdef TWO_MODE
+        { { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(16256), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(16256), AOM_ICDF(32768), 0 } };
+#else
+        { { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
+            AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
+            AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
+            AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(12288), AOM_ICDF(16384),
+            AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672), AOM_ICDF(32768),
+            0 } };
+#endif  // TWO_MODEE
 #endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
-
 // Change this section appropriately once warped motion is supported
 #if CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
-#if !CONFIG_NCOBMC_ADAPT_WEIGHT
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
+  -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -NCOBMC_ADAPT_WEIGHT,
+};
+static const aom_prob
+    default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+      { 255, 255 },
+      { 255, 255 },
+      { 255, 255 },
+#endif
+      { 255, 255 },
+      { 255, 255 },
+      { 255, 255 },
+      /** Only these nine block sizes allow ncobmc_adapt_weight **/
+      { 45, 207 },
+      { 42, 211 },
+      { 34, 207 },
+      { 181, 123 },
+      { 129, 141 },
+      { 15, 209 },
+      { 231, 122 },
+      { 195, 190 },
+      { 168, 190 },
+      /** ----------------------------------------------------- **/
+      { 244, 255 },
+#if CONFIG_EXT_PARTITION
+      { 252, 255 },
+      { 252, 255 },
+      { 252, 255 },
+#endif  // CONFIG_EXT_PARTITION
+      { 255, 200 },
+      { 255, 200 },
+      { 255, 200 },
+      { 255, 200 },
+#if CONFIG_EXT_PARTITION
+      { 252, 255 },
+      { 252, 200 },
+      { 252, 200 },
+#endif  // CONFIG_EXT_PARTITION
+    };
+static const aom_cdf_prob
+    default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+#endif
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0, 0 },
+      /** Only these seven block sizes allow ncobmc_adapt_weight **/
+      { AOM_ICDF(5702), AOM_ICDF(27555), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(5408), AOM_ICDF(27964), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4330), AOM_ICDF(27298), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(23107), AOM_ICDF(27760), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16490), AOM_ICDF(25461), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(1959), AOM_ICDF(27153), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(29530), AOM_ICDF(31073), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(25057), AOM_ICDF(30840), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(21588), AOM_ICDF(29940), AOM_ICDF(32768), 0 },
+      /** ----------------------------------------------------- **/
+      { AOM_ICDF(244 * 128), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32256), AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+#endif
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 }
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+#endif
+    };
+#else  // CONFIG_NCOBMC_ADAPT_WEIGHT
 const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
   -SIMPLE_TRANSLATION, -OBMC_CAUSAL
 };
@@ -771,7 +1356,10 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 252 }, { 252 }, { 252 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208 }, { 208 }, { 208 }, { 208 },
+      { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
+#if CONFIG_EXT_PARTITION
+      { 208 }, { 208 }
+#endif  // CONFIG_EXT_PARTITION
     };
 
 static const aom_cdf_prob
@@ -803,62 +1391,14 @@ static const aom_cdf_prob
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
-    };
-
-#else
-// TODO(weitinglin): The default probability is copied from warped motion right
-//                   now as a place holder. It needs to be fined tuned after
-//                   NCOBMC_ADAPT_WEIGHT is actually implemented. Also needs to
-//                   change this section appropriately once warped motion is
-//                   supported.
-const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
-  -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -NCOBMC_ADAPT_WEIGHT,
-};
-static const aom_prob
-    default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-      { 255, 200 }, { 255, 200 }, { 255, 200 },
-#endif
-      { 255, 200 }, { 255, 200 }, { 255, 200 }, { 151, 200 }, { 153, 200 },
-      { 144, 200 }, { 178, 200 }, { 165, 200 }, { 160, 200 }, { 207, 200 },
-      { 195, 200 }, { 168, 200 }, { 244, 200 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
 #if CONFIG_EXT_PARTITION
-      { 252, 200 }, { 252, 200 }, { 252, 200 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_EXT_PARTITION
-      { 255, 200 }, { 255, 200 }, { 255, 200 }, { 255, 200 },
-    };
-static const aom_cdf_prob
-    default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
-#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-#endif
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(7936), AOM_ICDF(19091), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4991), AOM_ICDF(19205), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4992), AOM_ICDF(19314), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(15104), AOM_ICDF(21590), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(9855), AOM_ICDF(21043), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(12800), AOM_ICDF(22238), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(24320), AOM_ICDF(26498), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(26496), AOM_ICDF(28995), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(25216), AOM_ICDF(28166), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(30592), AOM_ICDF(31238), AOM_ICDF(32768), 0 },
-#if CONFIG_EXT_PARTITION
-      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
-#endif
-      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
     };
 #endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
-
 #elif !CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 
 const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
@@ -875,7 +1415,10 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 252 }, { 252 }, { 252 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208 }, { 208 }, { 208 }, { 208 },
+      { 208 }, { 208 }, { 208 }, { 208 }, { 208 }, { 208 },
+#if CONFIG_EXT_PARTITION
+      { 252 }, { 252 }
+#endif  // CONFIG_EXT_PARTITION
     };
 
 static const aom_cdf_prob
@@ -907,10 +1450,122 @@ static const aom_cdf_prob
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(255 * 128), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
     };
 
 #elif CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
+  -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, 4, -NCOBMC_ADAPT_WEIGHT, -WARPED_CAUSAL
+};
 
+static const aom_prob default_motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES -
+                                                                1] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+  { 128, 128, 255 }, { 128, 128, 128 }, { 128, 128, 128 },
+#endif
+  { 128, 128, 128 }, { 128, 128, 128 }, { 128, 128, 128 }, { 62, 115, 128 },
+  { 39, 131, 128 },  { 39, 132, 128 },  { 118, 94, 128 },  { 77, 125, 128 },
+  { 100, 121, 128 }, { 190, 66, 128 },  { 207, 102, 128 }, { 197, 100, 128 },
+  { 239, 76, 128 },
+#if CONFIG_EXT_PARTITION
+  { 252, 200, 128 }, { 252, 200, 128 }, { 252, 200, 128 },
+#endif  // CONFIG_EXT_PARTITION
+  { 208, 200, 128 }, { 208, 200, 128 }, { 208, 200, 128 }, { 208, 200, 128 }
+};
+static const aom_cdf_prob
+    default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#endif
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      /** Only these nine block sizes allow ncobmc_adapt_weight **/
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(32768), 0 },
+      /***********************************************************/
+      { AOM_ICDF(30592), AOM_ICDF(31238), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#endif
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32767), AOM_ICDF(32768), 0 }
+    };
+
+const aom_tree_index av1_ncobmc_tree[TREE_SIZE(OBMC_FAMILY_MODES)] = {
+  -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -NCOBMC_ADAPT_WEIGHT
+};
+
+static const aom_prob
+    default_ncobmc_prob[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES - 1] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+      { 128, 255 }, { 128, 255 }, { 128, 255 },
+#endif
+      { 128, 255 }, { 128, 255 }, { 128, 255 }, { 45, 255 },  { 79, 255 },
+      { 75, 255 },  { 130, 255 }, { 141, 255 }, { 144, 255 }, { 208, 255 },
+      { 201, 255 }, { 186, 255 }, { 231, 255 },
+#if CONFIG_EXT_PARTITION
+      { 252, 255 }, { 252, 255 }, { 252, 255 },
+#endif  // CONFIG_EXT_PARTITION
+      { 208, 255 }, { 208, 255 }, { 208, 255 }, { 208, 255 }
+    };
+
+static const aom_cdf_prob
+    default_ncobmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(OBMC_FAMILY_MODES)] = {
+#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#endif
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      /** Only these nine block sizes allow ncobmc_adapt_weight **/
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10922), AOM_ICDF(21845), AOM_ICDF(32768), 0 },
+      /***********************************************************/
+      { AOM_ICDF(231 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(252 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32767), AOM_ICDF(32768), 0 }
+    };
+#else
 const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)] = {
   -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -WARPED_CAUSAL,
 };
@@ -926,7 +1581,11 @@ static const aom_prob
 #if CONFIG_EXT_PARTITION
       { 252, 200 }, { 252, 200 }, { 252, 200 },
 #endif  // CONFIG_EXT_PARTITION
-      { 208, 200 }, { 208, 200 }, { 208, 200 }, { 208, 200 },
+      { 208, 200 }, { 208, 200 }, { 208, 200 }, { 208, 200 }, { 208, 200 },
+      { 208, 200 },
+#if CONFIG_EXT_PARTITION
+      { 252, 200 }, { 252, 200 }
+#endif  // CONFIG_EXT_PARTITION
     };
 static const aom_cdf_prob
     default_motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)] = {
@@ -957,21 +1616,30 @@ static const aom_cdf_prob
       { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
       { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32640), AOM_ICDF(32740), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(32256), AOM_ICDF(32656), AOM_ICDF(32768), 0 },
+#endif
     };
-
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 // Probability for the case that only 1 additional motion mode is allowed
 static const aom_prob default_obmc_prob[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
   128, 128, 128,
 #endif
-  128, 128, 128, 45,  79, 75, 130, 141, 144, 208, 201, 186, 231,
+  128, 128, 128, 45,  79,  75,  130, 141, 144, 208, 201, 186, 231,
 #if CONFIG_EXT_PARTITION
   252, 252, 252,
 #endif  // CONFIG_EXT_PARTITION
-  208, 208, 208, 208,
+  208, 208, 208, 208, 208, 208,
+#if CONFIG_EXT_PARTITION
+  252, 252
+#endif  // CONFIG_EXT_PARTITION
 };
 
-#if CONFIG_NEW_MULTISYMBOL
+#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
 static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
   { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
@@ -1000,59 +1668,46 @@ static const aom_cdf_prob default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
   { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
   { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
   { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+  { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+  { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+#if CONFIG_EXT_PARTITION
+  { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
+  { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_EXT_PARTITION
 };
 #endif  // CONFIG_NEW_MULTISYMBOL
 #endif
 
-#if CONFIG_DELTA_Q
 static const aom_prob default_delta_q_probs[DELTA_Q_PROBS] = { 220, 220, 220 };
 static const aom_cdf_prob default_delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)] = {
   AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0
 };
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+static const aom_prob
+    default_delta_lf_multi_probs[FRAME_LF_COUNT][DELTA_LF_PROBS] = {
+      { 220, 220, 220 }, { 220, 220, 220 }, { 220, 220, 220 }, { 220, 220, 220 }
+    };
+static const aom_cdf_prob
+    default_delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)] = {
+      { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0 }
+    };
+#endif  // CONFIG_LOOPFILTER_LEVEL
 static const aom_prob default_delta_lf_probs[DELTA_LF_PROBS] = { 220, 220,
                                                                  220 };
 static const aom_cdf_prob default_delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)] = {
   AOM_ICDF(28160), AOM_ICDF(32120), AOM_ICDF(32677), AOM_ICDF(32768), 0
 };
 #endif
-#endif
-#if CONFIG_EXT_TX
-int av1_ext_tx_intra_ind[EXT_TX_SETS_INTRA][TX_TYPES];
-int av1_ext_tx_intra_inv[EXT_TX_SETS_INTRA][TX_TYPES];
-int av1_ext_tx_inter_ind[EXT_TX_SETS_INTER][TX_TYPES];
-int av1_ext_tx_inter_inv[EXT_TX_SETS_INTER][TX_TYPES];
-#endif
 
-#if CONFIG_ALT_INTRA
-#if CONFIG_SMOOTH_HV
-const int av1_intra_mode_ind[INTRA_MODES] = { 0, 2, 3,  6,  4,  5, 8,
-                                              9, 7, 10, 11, 12, 1 };
-const int av1_intra_mode_inv[INTRA_MODES] = { 0, 12, 1, 2, 4,  5, 3,
-                                              8, 6,  7, 9, 10, 11 };
-#else
-const int av1_intra_mode_ind[INTRA_MODES] = {
-  0, 2, 3, 6, 4, 5, 8, 9, 7, 10, 1
-};
-const int av1_intra_mode_inv[INTRA_MODES] = {
-  0, 10, 1, 2, 4, 5, 3, 8, 6, 7, 9
-};
-#endif  // CONFIG_SMOOTH_HV
-#else
-const int av1_intra_mode_ind[INTRA_MODES] = { 0, 2, 3, 6, 4, 5, 8, 9, 7, 1 };
-const int av1_intra_mode_inv[INTRA_MODES] = { 0, 9, 1, 2, 4, 5, 3, 8, 6, 7 };
-#endif  // CONFIG_ALT_INTRA
-
-#if CONFIG_EXT_INTER
 /* clang-format off */
 #if CONFIG_INTERINTRA
 const aom_tree_index av1_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = {
   -II_DC_PRED, 2,        /* 0 = II_DC_NODE     */
-#if CONFIG_ALT_INTRA
   -II_SMOOTH_PRED, 4,    /* 1 = II_SMOOTH_PRED */
-#else
-  -II_TM_PRED, 4,        /* 1 = II_TM_NODE     */
-#endif
   -II_V_PRED, -II_H_PRED /* 2 = II_V_NODE      */
 };
 #endif  // CONFIG_INTERINTRA
@@ -1105,7 +1760,6 @@ const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {
 const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)] = {};
 #endif  // CONFIG_COMPOUND_SEGMENT && CONFIG_WEDGE
 /* clang-format on */
-#endif  // CONFIG_EXT_INTER
 
 const aom_tree_index av1_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
   -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT
@@ -1144,30 +1798,16 @@ static const aom_cdf_prob
 #endif
 
 static const aom_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
-#if !CONFIG_EXT_COMP_REFS
-  216, 170, 131, 92, 42
-#else   // CONFIG_EXT_COMP_REFS
-  206, 182, 117, 104, 32
-#endif  // !CONFIG_EXT_COMP_REFS
+  190, 156, 91, 77, 22
 };
 
 #if CONFIG_NEW_MULTISYMBOL
-static const aom_cdf_prob
-    default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)] = {
-#if !CONFIG_EXT_COMP_REFS
-      { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(131 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(92 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(42 * 128), AOM_ICDF(32768), 0 }
-#else   // CONFIG_EXT_COMP_REFS
-      { AOM_ICDF(206 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(182 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(117 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(104 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(32 * 128), AOM_ICDF(32768), 0 }
-#endif  // !CONFIG_EXT_COMP_REFS
-    };
+static const aom_cdf_prob default_comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(
+    2)] = { { AOM_ICDF(24290), AOM_ICDF(32768), 0 },
+            { AOM_ICDF(19956), AOM_ICDF(32768), 0 },
+            { AOM_ICDF(11641), AOM_ICDF(32768), 0 },
+            { AOM_ICDF(9804), AOM_ICDF(32768), 0 },
+            { AOM_ICDF(2842), AOM_ICDF(32768), 0 } };
 #endif  // CONFIG_NEW_MULTISYMBOL
 
 #if CONFIG_EXT_COMP_REFS
@@ -1206,91 +1846,49 @@ static const aom_cdf_prob
 
 #if CONFIG_EXT_REFS
 static const aom_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = {
-#if !CONFIG_EXT_COMP_REFS
-  { 33, 16, 16 },
-  { 77, 74, 74 },
-  { 142, 142, 142 },
-  { 172, 170, 170 },
-  { 238, 247, 247 }
-#else   // CONFIG_EXT_COMP_REFS
-  { 21, 7, 5 },
-  { 68, 20, 16 },
-  { 128, 56, 36 },
-  { 197, 111, 139 },
-  { 238, 131, 136 }
-#endif  // !CONFIG_EXT_COMP_REFS
+  { 28, 10, 8 },
+  { 77, 27, 26 },
+  { 127, 62, 56 },
+  { 186, 126, 160 },
+  { 236, 143, 172 }
 };
 
 static const aom_prob default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1] = {
-#if CONFIG_ALTREF2
-  // TODO(zoeliu): ALTREF2 to work with EXT_COMP_REFS and NEW_MULTISYMBOL.
-  { 50, 50 },
-  { 130, 130 },
-  { 210, 210 },
-  { 128, 128 },
-  { 128, 128 }
-#else  // !CONFIG_ALTREF2
-#if !CONFIG_EXT_COMP_REFS
-  { 16 }, { 74 }, { 142 }, { 170 }, { 247 }
-#else   // CONFIG_EXT_COMP_REFS
-  { 7 }, { 56 }, { 29 }, { 230 }, { 220 }
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_ALTREF2
+  { 22, 13 }, { 140, 124 }, { 241, 239 }, { 128, 128 }, { 128, 128 }
 };
 
 #if CONFIG_NEW_MULTISYMBOL
 static const aom_cdf_prob
     default_comp_ref_cdf[REF_CONTEXTS][FWD_REFS - 1][CDF_SIZE(2)] = {
-#if !CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(33 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(77 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(172 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(238 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 } }
-#else   // CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(21 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(68 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(20 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(56 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(36 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(197 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(111 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(139 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(238 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(131 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(136 * 128), AOM_ICDF(32768), 0 } }
-#endif  // !CONFIG_EXT_COMP_REFS
+      { { AOM_ICDF(3556), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(1217), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(988), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(9857), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(3394), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(3303), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(16237), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(7946), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(7195), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(23826), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(16124), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(20536), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(30195), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(18344), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(21980), AOM_ICDF(32768), 0 } }
     };
 
 static const aom_cdf_prob
     default_comp_bwdref_cdf[REF_CONTEXTS][BWD_REFS - 1][CDF_SIZE(2)] = {
-#if !CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 } }
-#else   // CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(7 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(56 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(29 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(230 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(220 * 128), AOM_ICDF(32768), 0 } }
-#endif  // !CONFIG_EXT_COMP_REFS
+      { { AOM_ICDF(2762), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(1614), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(17976), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(15912), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(30894), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(30639), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 } }
     };
 #endif  // CONFIG_NEW_MULTISYMBOL
 
@@ -1313,28 +1911,11 @@ static const aom_cdf_prob
 
 static const aom_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
 #if CONFIG_EXT_REFS
-#if CONFIG_ALTREF2
-  // TODO(zoeliu): ALTREF2 to work with EXT_COMP_REFS and NEW_MULTISYMBOL.
-  { 33, 50, 16, 16, 16, 50 },
-  { 77, 130, 74, 74, 74, 130 },
-  { 142, 210, 142, 142, 142, 210 },
-  { 172, 128, 170, 170, 170, 128 },
-  { 238, 128, 247, 247, 247, 128 }
-#else  // !CONFIG_ALTREF2
-#if !CONFIG_EXT_COMP_REFS
-  { 33, 16, 16, 16, 16 },
-  { 77, 74, 74, 74, 74 },
-  { 142, 142, 142, 142, 142 },
-  { 172, 170, 170, 170, 170 },
-  { 238, 247, 247, 247, 247 }
-#else   // CONFIG_EXT_COMP_REFS
-  { 36, 2, 28, 58, 9 },
-  { 64, 22, 60, 122, 40 },
-  { 153, 69, 126, 179, 71 },
-  { 128, 174, 189, 216, 101 },
-  { 233, 252, 228, 246, 200 }
-#endif  // !CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_ALTREF2
+  { 36, 16, 32, 57, 11, 14 },
+  { 68, 128, 73, 128, 49, 124 },
+  { 136, 236, 127, 170, 81, 238 },
+  { 128, 128, 191, 211, 115, 128 },
+  { 224, 128, 230, 242, 208, 128 }
 #else   // !CONFIG_EXT_REFS
   { 31, 25 }, { 72, 80 }, { 147, 148 }, { 197, 191 }, { 235, 247 },
 #endif  // CONFIG_EXT_REFS
@@ -1344,60 +1925,37 @@ static const aom_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
 static const aom_cdf_prob
     default_single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)] = {
 #if CONFIG_EXT_REFS
-#if !CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(33 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(16 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(77 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(74 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(142 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(172 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(170 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(238 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(247 * 128), AOM_ICDF(32768), 0 } }
-#else   // CONFIG_EXT_COMP_REFS
-      { { AOM_ICDF(36 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(2 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(28 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(58 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(64 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(22 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(60 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(122 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(40 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(153 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(69 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(126 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(179 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(71 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(174 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(189 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(101 * 128), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(233 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(252 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(228 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(246 * 128), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(200 * 128), AOM_ICDF(32768), 0 } }
-#endif  // !CONFIG_EXT_COMP_REFS
-#else   // CONFIG_EXT_REFS
+      { { AOM_ICDF(4623), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(2110), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(4132), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(7309), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(1392), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(1781), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(8659), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(16372), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(9371), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(16322), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(6216), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(15834), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(17353), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(30182), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(16300), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(21702), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(10365), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(30486), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(24426), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(26972), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(14760), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(28634), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(29425), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(30969), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(26676), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(32768), AOM_ICDF(32768), 0 } }
+#else   // !CONFIG_EXT_REFS
       { { AOM_ICDF(31 * 128), AOM_ICDF(32768), 0 },
         { AOM_ICDF(25 * 128), AOM_ICDF(32768), 0 } },
       { { AOM_ICDF(72 * 128), AOM_ICDF(32768), 0 },
@@ -1412,15 +1970,14 @@ static const aom_cdf_prob
     };
 #endif  // CONFIG_NEW_MULTISYMBOL
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 // TODO(zoeliu): Default values to be further adjusted based on the collected
 //               stats.
 static const aom_prob default_comp_inter_mode_p[COMP_INTER_MODE_CONTEXTS] = {
   40, 110, 160, 220
 };
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-#if CONFIG_PALETTE
 // TODO(huisu): tune these cdfs
 const aom_cdf_prob
     default_palette_y_size_cdf[PALETTE_BLOCK_SIZES][CDF_SIZE(PALETTE_SIZES)] = {
@@ -1504,6 +2061,61 @@ const aom_prob av1_default_palette_uv_mode_prob[PALETTE_UV_MODE_CONTEXTS] = {
   253, 229
 };
 
+#if CONFIG_NEW_MULTISYMBOL
+const aom_cdf_prob
+    default_palette_y_mode_cdf[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
+                              [CDF_SIZE(2)] = {
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+#if CONFIG_EXT_PARTITION
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+                                { { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 180), AOM_ICDF(32768), 0 },
+                                  { AOM_ICDF(128 * 100), AOM_ICDF(32768), 0 } },
+#endif  // CONFIG_EXT_PARTITION
+                              };
+
+const aom_cdf_prob
+    default_palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)] = {
+      { AOM_ICDF(128 * 253), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 229), AOM_ICDF(32768), 0 }
+    };
+
+#endif
+
 const aom_cdf_prob default_palette_y_color_index_cdf
     [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
       {
@@ -1679,6 +2291,190 @@ const aom_cdf_prob default_palette_uv_color_index_cdf
             0 },
       }
     };
+#if CONFIG_MRC_TX
+// TODO(sarahparker) Tune these cdfs
+const aom_cdf_prob default_mrc_mask_intra_cdf
+    [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
+      {
+          { AOM_ICDF(29568), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(28672), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(31872), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(28032), AOM_ICDF(30326), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(11647), AOM_ICDF(27405), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(4352), AOM_ICDF(30659), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(23552), AOM_ICDF(27800), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(32256), AOM_ICDF(32504), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+      },
+      {
+          { AOM_ICDF(26112), AOM_ICDF(28374), AOM_ICDF(30039), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(9472), AOM_ICDF(22576), AOM_ICDF(27712), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(6656), AOM_ICDF(26138), AOM_ICDF(29608), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(19328), AOM_ICDF(23791), AOM_ICDF(28946), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(31744), AOM_ICDF(31984), AOM_ICDF(32336), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(27904), AOM_ICDF(29215), AOM_ICDF(30075), AOM_ICDF(31190),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(9728), AOM_ICDF(22598), AOM_ICDF(26134), AOM_ICDF(29425),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(2688), AOM_ICDF(30066), AOM_ICDF(31058), AOM_ICDF(31933),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(22015), AOM_ICDF(25039), AOM_ICDF(27726), AOM_ICDF(29932),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(32383), AOM_ICDF(32482), AOM_ICDF(32554), AOM_ICDF(32660),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(24319), AOM_ICDF(26299), AOM_ICDF(27486), AOM_ICDF(28600),
+            AOM_ICDF(29804), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(7935), AOM_ICDF(18217), AOM_ICDF(21116), AOM_ICDF(25440),
+            AOM_ICDF(28589), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(6656), AOM_ICDF(25016), AOM_ICDF(27105), AOM_ICDF(28698),
+            AOM_ICDF(30399), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(19967), AOM_ICDF(24117), AOM_ICDF(26550), AOM_ICDF(28566),
+            AOM_ICDF(30224), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(31359), AOM_ICDF(31607), AOM_ICDF(31775), AOM_ICDF(31977),
+            AOM_ICDF(32258), AOM_ICDF(32768), 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(26368), AOM_ICDF(27768), AOM_ICDF(28588), AOM_ICDF(29274),
+            AOM_ICDF(29997), AOM_ICDF(30917), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(8960), AOM_ICDF(18260), AOM_ICDF(20810), AOM_ICDF(23986),
+            AOM_ICDF(26627), AOM_ICDF(28882), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(7295), AOM_ICDF(24111), AOM_ICDF(25836), AOM_ICDF(27515),
+            AOM_ICDF(29033), AOM_ICDF(30769), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(22016), AOM_ICDF(25208), AOM_ICDF(27305), AOM_ICDF(28159),
+            AOM_ICDF(29221), AOM_ICDF(30274), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(31744), AOM_ICDF(31932), AOM_ICDF(32050), AOM_ICDF(32199),
+            AOM_ICDF(32335), AOM_ICDF(32521), AOM_ICDF(32768), 0, 0 },
+      },
+      {
+          { AOM_ICDF(26624), AOM_ICDF(27872), AOM_ICDF(28599), AOM_ICDF(29153),
+            AOM_ICDF(29633), AOM_ICDF(30172), AOM_ICDF(30841), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(6655), AOM_ICDF(17569), AOM_ICDF(19587), AOM_ICDF(23345),
+            AOM_ICDF(25884), AOM_ICDF(28088), AOM_ICDF(29678), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(3584), AOM_ICDF(27296), AOM_ICDF(28429), AOM_ICDF(29158),
+            AOM_ICDF(30032), AOM_ICDF(30780), AOM_ICDF(31572), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(23551), AOM_ICDF(25855), AOM_ICDF(27070), AOM_ICDF(27893),
+            AOM_ICDF(28597), AOM_ICDF(29721), AOM_ICDF(30970), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(32128), AOM_ICDF(32173), AOM_ICDF(32245), AOM_ICDF(32337),
+            AOM_ICDF(32416), AOM_ICDF(32500), AOM_ICDF(32609), AOM_ICDF(32768),
+            0 },
+      },
+    };
+
+const aom_cdf_prob default_mrc_mask_inter_cdf
+    [PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] = {
+      {
+          { AOM_ICDF(29568), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(16384), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(8832), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(28672), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+          { AOM_ICDF(31872), AOM_ICDF(32768), 0, 0, 0, 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(28032), AOM_ICDF(30326), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(11647), AOM_ICDF(27405), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(4352), AOM_ICDF(30659), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(23552), AOM_ICDF(27800), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+          { AOM_ICDF(32256), AOM_ICDF(32504), AOM_ICDF(32768), 0, 0, 0, 0, 0,
+            0 },
+      },
+      {
+          { AOM_ICDF(26112), AOM_ICDF(28374), AOM_ICDF(30039), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(9472), AOM_ICDF(22576), AOM_ICDF(27712), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(6656), AOM_ICDF(26138), AOM_ICDF(29608), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(19328), AOM_ICDF(23791), AOM_ICDF(28946), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+          { AOM_ICDF(31744), AOM_ICDF(31984), AOM_ICDF(32336), AOM_ICDF(32768),
+            0, 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(27904), AOM_ICDF(29215), AOM_ICDF(30075), AOM_ICDF(31190),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(9728), AOM_ICDF(22598), AOM_ICDF(26134), AOM_ICDF(29425),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(2688), AOM_ICDF(30066), AOM_ICDF(31058), AOM_ICDF(31933),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(22015), AOM_ICDF(25039), AOM_ICDF(27726), AOM_ICDF(29932),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+          { AOM_ICDF(32383), AOM_ICDF(32482), AOM_ICDF(32554), AOM_ICDF(32660),
+            AOM_ICDF(32768), 0, 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(24319), AOM_ICDF(26299), AOM_ICDF(27486), AOM_ICDF(28600),
+            AOM_ICDF(29804), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(7935), AOM_ICDF(18217), AOM_ICDF(21116), AOM_ICDF(25440),
+            AOM_ICDF(28589), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(6656), AOM_ICDF(25016), AOM_ICDF(27105), AOM_ICDF(28698),
+            AOM_ICDF(30399), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(19967), AOM_ICDF(24117), AOM_ICDF(26550), AOM_ICDF(28566),
+            AOM_ICDF(30224), AOM_ICDF(32768), 0, 0, 0 },
+          { AOM_ICDF(31359), AOM_ICDF(31607), AOM_ICDF(31775), AOM_ICDF(31977),
+            AOM_ICDF(32258), AOM_ICDF(32768), 0, 0, 0 },
+      },
+      {
+          { AOM_ICDF(26368), AOM_ICDF(27768), AOM_ICDF(28588), AOM_ICDF(29274),
+            AOM_ICDF(29997), AOM_ICDF(30917), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(8960), AOM_ICDF(18260), AOM_ICDF(20810), AOM_ICDF(23986),
+            AOM_ICDF(26627), AOM_ICDF(28882), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(7295), AOM_ICDF(24111), AOM_ICDF(25836), AOM_ICDF(27515),
+            AOM_ICDF(29033), AOM_ICDF(30769), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(22016), AOM_ICDF(25208), AOM_ICDF(27305), AOM_ICDF(28159),
+            AOM_ICDF(29221), AOM_ICDF(30274), AOM_ICDF(32768), 0, 0 },
+          { AOM_ICDF(31744), AOM_ICDF(31932), AOM_ICDF(32050), AOM_ICDF(32199),
+            AOM_ICDF(32335), AOM_ICDF(32521), AOM_ICDF(32768), 0, 0 },
+      },
+      {
+          { AOM_ICDF(26624), AOM_ICDF(27872), AOM_ICDF(28599), AOM_ICDF(29153),
+            AOM_ICDF(29633), AOM_ICDF(30172), AOM_ICDF(30841), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(6655), AOM_ICDF(17569), AOM_ICDF(19587), AOM_ICDF(23345),
+            AOM_ICDF(25884), AOM_ICDF(28088), AOM_ICDF(29678), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(3584), AOM_ICDF(27296), AOM_ICDF(28429), AOM_ICDF(29158),
+            AOM_ICDF(30032), AOM_ICDF(30780), AOM_ICDF(31572), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(23551), AOM_ICDF(25855), AOM_ICDF(27070), AOM_ICDF(27893),
+            AOM_ICDF(28597), AOM_ICDF(29721), AOM_ICDF(30970), AOM_ICDF(32768),
+            0 },
+          { AOM_ICDF(32128), AOM_ICDF(32173), AOM_ICDF(32245), AOM_ICDF(32337),
+            AOM_ICDF(32416), AOM_ICDF(32500), AOM_ICDF(32609), AOM_ICDF(32768),
+            0 },
+      },
+    };
+#endif  // CONFIG_MRC_TX
+
+#if CONFIG_INTRABC
+static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)] = {
+  AOM_ICDF(192 * 128), AOM_ICDF(32768), 0,
+};
+#endif  // CONFIG_INTRABC
 
 #define MAX_COLOR_CONTEXT_HASH 8
 // Negative values are invalid
@@ -1686,59 +2482,13 @@ static const int palette_color_index_context_lookup[MAX_COLOR_CONTEXT_HASH +
                                                     1] = { -1, -1, 0, -1, -1,
                                                            4,  3,  2, 1 };
 
-#endif  // CONFIG_PALETTE
-
-// The transform size is coded as an offset to the smallest transform
-// block size.
-const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)] = {
-  {
-      // Max tx_size is 8X8
-      -0, -1,
-  },
-  {
-      // Max tx_size is 16X16
-      -0, 2, -1, -2,
-  },
-  {
-      // Max tx_size is 32X32
-      -0, 2, -1, 4, -2, -3,
-  },
-#if CONFIG_TX64X64
-  {
-      // Max tx_size is 64X64
-      -0, 2, -1, 4, -2, 6, -3, -4,
-  },
-#endif  // CONFIG_TX64X64
-};
-
-static const aom_prob default_tx_size_prob[MAX_TX_DEPTH][TX_SIZE_CONTEXTS]
-                                          [MAX_TX_DEPTH] = {
-                                            {
-                                                // Max tx_size is 8X8
-                                                { 100 },
-                                                { 66 },
-                                            },
-                                            {
-                                                // Max tx_size is 16X16
-                                                { 20, 152 },
-                                                { 15, 101 },
-                                            },
-                                            {
-                                                // Max tx_size is 32X32
-                                                { 3, 136, 37 },
-                                                { 5, 52, 13 },
-                                            },
-#if CONFIG_TX64X64
-                                            {
-                                                // Max tx_size is 64X64
-                                                { 1, 64, 136, 127 },
-                                                { 1, 32, 52, 67 },
-                                            },
-#endif  // CONFIG_TX64X64
-                                          };
-
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
 static const aom_prob default_quarter_tx_size_prob = 192;
+#if CONFIG_NEW_MULTISYMBOL
+static const aom_cdf_prob default_quarter_tx_size_cdf[CDF_SIZE(2)] = {
+  AOM_ICDF(192 * 128), AOM_ICDF(32768), 0
+};
+#endif
 #endif
 
 #if CONFIG_LOOP_RESTORATION
@@ -1753,7 +2503,6 @@ static const aom_prob
     };
 #endif  // CONFIG_LOOP_RESTORATION
 
-#if CONFIG_PALETTE
 #define NUM_PALETTE_NEIGHBORS 3  // left, top-left and top.
 int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
                                         int r, int c, int palette_size,
@@ -1838,15 +2587,42 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
 #undef NUM_PALETTE_NEIGHBORS
 #undef MAX_COLOR_CONTEXT_HASH
 
-#endif  // CONFIG_PALETTE
-
 #if CONFIG_VAR_TX
 static const aom_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
-  250, 231, 212, 241, 166, 66, 241, 230, 135, 243, 154, 64, 248, 161, 63, 128,
+#if CONFIG_TX64X64
+  249, 240, 223, 249, 229, 177, 250, 243, 208, 226, 187,
+  145, 236, 204, 150, 183, 149, 125, 181, 146, 113, 128
+#else
+  250, 231, 212, 241, 166, 66, 241, 230, 135, 243, 154, 64, 248, 161, 63, 128
+#endif  // CONFIG_TX64X64
 };
 #if CONFIG_NEW_MULTISYMBOL
 static const aom_cdf_prob
     default_txfm_partition_cdf[TXFM_PARTITION_CONTEXTS][CDF_SIZE(2)] = {
+#if CONFIG_TX64X64
+      { AOM_ICDF(249 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(240 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(223 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(249 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(229 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(177 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(250 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(243 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(208 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(226 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(187 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(145 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(236 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(204 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(150 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(183 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(149 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(125 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(181 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(146 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(113 * 128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 }
+#else
       { AOM_ICDF(250 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(231 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(212 * 128), AOM_ICDF(32768), 0 },
@@ -1862,10 +2638,11 @@ static const aom_cdf_prob
       { AOM_ICDF(248 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(161 * 128), AOM_ICDF(32768), 0 },
       { AOM_ICDF(63 * 128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 }
+      { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
+#endif  // CONFIG_TX64X64
     };
 #endif  // CONFIG_NEW_MULTISYMBOL
-#endif
+#endif  // CONFIG_VAR_TX
 
 static const aom_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 };
 #if CONFIG_NEW_MULTISYMBOL
@@ -1876,583 +2653,22 @@ static const aom_cdf_prob default_skip_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)] = {
 };
 #endif
 
-#if CONFIG_DUAL_FILTER
-#if USE_EXTRA_FILTER
-static const aom_prob default_switchable_interp_prob
-    [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
-      { 235, 192, 128 }, { 36, 243, 48 },   { 34, 16, 128 },
-      { 34, 16, 128 },   { 149, 160, 128 }, { 235, 192, 128 },
-      { 36, 243, 48 },   { 34, 16, 128 },   { 34, 16, 128 },
-      { 149, 160, 128 }, { 235, 192, 128 }, { 36, 243, 48 },
-      { 34, 16, 128 },   { 34, 16, 128 },   { 149, 160, 128 },
-      { 235, 192, 128 }, { 36, 243, 48 },   { 34, 16, 128 },
-      { 34, 16, 128 },   { 149, 160, 128 },
-    };
-#else   // USE_EXTRA_FILTER
-static const aom_prob default_switchable_interp_prob
-    [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
-      { 252, 199 }, { 22, 255 }, { 4, 2 }, { 238, 146 },
-      { 253, 66 },  { 24, 255 }, { 2, 1 }, { 198, 41 },
-      { 250, 177 }, { 16, 255 }, { 3, 4 }, { 226, 162 },
-      { 247, 38 },  { 33, 253 }, { 1, 1 }, { 136, 14 },
-    };
-#endif  // USE_EXTRA_FILTER
-#else   // CONFIG_DUAL_FILTER
-static const aom_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
-                                                    [SWITCHABLE_FILTERS - 1] = {
-                                                      { 235, 162 },
-                                                      { 36, 255 },
-                                                      { 34, 3 },
-                                                      { 149, 144 },
-                                                    };
-#endif  // CONFIG_DUAL_FILTER
-
-#if CONFIG_EXT_TX
-/* clang-format off */
-const aom_tree_index av1_ext_tx_inter_tree[EXT_TX_SETS_INTER]
-                                           [TREE_SIZE(TX_TYPES)] = {
-  { // ToDo(yaowu): remove used entry 0.
-    0
-  }, {
-    -IDTX, 2,
-    4, 14,
-    6, 8,
-    -V_DCT, -H_DCT,
-    10, 12,
-    -V_ADST, -H_ADST,
-    -V_FLIPADST, -H_FLIPADST,
-    -DCT_DCT, 16,
-    18, 24,
-    20, 22,
-    -ADST_DCT, -DCT_ADST,
-    -FLIPADST_DCT, -DCT_FLIPADST,
-    26, 28,
-    -ADST_ADST, -FLIPADST_FLIPADST,
-    -ADST_FLIPADST, -FLIPADST_ADST
-  }, {
-    -IDTX, 2,
-    4, 6,
-    -V_DCT, -H_DCT,
-    -DCT_DCT, 8,
-    10, 16,
-    12, 14,
-    -ADST_DCT, -DCT_ADST,
-    -FLIPADST_DCT, -DCT_FLIPADST,
-    18, 20,
-    -ADST_ADST, -FLIPADST_FLIPADST,
-    -ADST_FLIPADST, -FLIPADST_ADST
-  }, {
-    -IDTX, -DCT_DCT,
-  },
-#if CONFIG_MRC_TX
-  {
-    -IDTX, 2, -DCT_DCT, -MRC_DCT,
-  }
-#endif  // CONFIG_MRC_TX
-};
-
-const aom_tree_index av1_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
-                                           [TREE_SIZE(TX_TYPES)] = {
-  {  // ToDo(yaowu): remove unused entry 0.
-    0
-  }, {
-    -IDTX, 2,
-    -DCT_DCT, 4,
-    6, 8,
-    -V_DCT, -H_DCT,
-    -ADST_ADST, 10,
-    -ADST_DCT, -DCT_ADST,
-  }, {
-    -IDTX, 2,
-    -DCT_DCT, 4,
-    -ADST_ADST, 6,
-    -ADST_DCT, -DCT_ADST,
-  },
-#if CONFIG_MRC_TX
-  {
-    -DCT_DCT, -MRC_DCT,
-  }
-#endif  // CONFIG_MRC_TX
-};
-/* clang-format on */
-
-static const aom_prob
-    default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
-      {
-// ToDo(yaowu): remove unused entry 0.
-#if CONFIG_CHROMA_2X2
-          { 0 },
-#endif
-          { 0 },
-          { 0 },
-          { 0 },
-          { 0 },
-      },
-      {
-#if CONFIG_CHROMA_2X2
-          { 0 },
-#endif
-          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
-            128 },
-          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
-            128 },
-          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
-            128 },
-          { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
-            128 },
-      },
-      {
-#if CONFIG_CHROMA_2X2
-          { 0 },
-#endif
-          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
-          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
-          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
-          { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
-      },
-      {
-#if CONFIG_CHROMA_2X2
-          { 0 },
-#endif
-          { 12 },
-          { 12 },
-          { 12 },
-          { 12 },
-      },
-#if CONFIG_MRC_TX
-      {
-#if CONFIG_CHROMA_2X2
-          { 0 },
-#endif
-          { 12, 128 },
-          { 12, 128 },
-          { 12, 128 },
-          { 12, 128 },
-      }
-#endif  // CONFIG_MRC_TX
-    };
-
-// TODO(urvang): 3rd context should be tx_type instead of intra mode just like
-// the baseline.
-static const aom_prob
-    default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                             [TX_TYPES - 1] = {
-                               {
-// ToDo(yaowu): remove unused entry 0.
-#if CONFIG_CHROMA_2X2
-                                   {
-                                       { 0 },
-                                   },
-#endif
-                                   {
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-#if CONFIG_ALT_INTRA
-                                       { 0 },
-#if CONFIG_SMOOTH_HV
-                                       { 0 },
-                                       { 0 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 0 },
-                                   },
-                                   {
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-#if CONFIG_ALT_INTRA
-                                       { 0 },
-#if CONFIG_SMOOTH_HV
-                                       { 0 },
-                                       { 0 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 0 },
-                                   },
-                                   {
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-#if CONFIG_ALT_INTRA
-                                       { 0 },
+#if CONFIG_LGT_FROM_PRED
+static const aom_prob default_intra_lgt_prob[LGT_SIZES][INTRA_MODES] = {
+  { 255, 208, 208, 180, 230, 208, 194, 214, 220, 255,
 #if CONFIG_SMOOTH_HV
-                                       { 0 },
-                                       { 0 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 0 },
-                                   },
-                                   {
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-                                       { 0 },
-#if CONFIG_ALT_INTRA
-                                       { 0 },
-#if CONFIG_SMOOTH_HV
-                                       { 0 },
-                                       { 0 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 0 },
-                                   },
-                               },
-                               {
-#if CONFIG_CHROMA_2X2
-                                   {
-                                       { 0 },
-                                   },
-#endif
-                                   {
-                                       { 8, 224, 32, 128, 64, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 9, 200, 32, 128, 64, 128 },
-                                       { 8, 8, 32, 128, 224, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 23, 32, 128, 80, 176 },
-                                       { 10, 23, 32, 128, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 32, 32, 128, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 32, 128, 64, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 9, 200, 32, 128, 64, 128 },
-                                       { 8, 8, 32, 128, 224, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 23, 32, 128, 80, 176 },
-                                       { 10, 23, 32, 128, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 32, 32, 128, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 32, 128, 64, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 9, 200, 32, 128, 64, 128 },
-                                       { 8, 8, 32, 128, 224, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 23, 32, 128, 80, 176 },
-                                       { 10, 23, 32, 128, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 32, 32, 128, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 32, 128, 64, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 9, 200, 32, 128, 64, 128 },
-                                       { 8, 8, 32, 128, 224, 128 },
-                                       { 10, 32, 32, 128, 16, 192 },
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 23, 32, 128, 80, 176 },
-                                       { 10, 23, 32, 128, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 32, 128, 16, 64 },
-                                       { 10, 32, 32, 128, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 32, 128, 16, 64 },
-                                   },
-                               },
-                               {
-#if CONFIG_CHROMA_2X2
-                                   {
-                                       { 0 },
-                                   },
-#endif
-                                   {
-                                       { 8, 224, 64, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 9, 200, 64, 128 },
-                                       { 8, 8, 224, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 10, 23, 80, 176 },
-                                       { 10, 23, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 16, 64 },
-                                       { 10, 32, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 64, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 9, 200, 64, 128 },
-                                       { 8, 8, 224, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 10, 23, 80, 176 },
-                                       { 10, 23, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 16, 64 },
-                                       { 10, 32, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 64, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 9, 200, 64, 128 },
-                                       { 8, 8, 224, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 10, 23, 80, 176 },
-                                       { 10, 23, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 16, 64 },
-                                       { 10, 32, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-                                   },
-                                   {
-                                       { 8, 224, 64, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 9, 200, 64, 128 },
-                                       { 8, 8, 224, 128 },
-                                       { 10, 32, 16, 192 },
-                                       { 10, 32, 16, 64 },
-                                       { 10, 23, 80, 176 },
-                                       { 10, 23, 80, 176 },
-#if CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-#if CONFIG_SMOOTH_HV
-                                       { 10, 32, 16, 64 },
-                                       { 10, 32, 16, 64 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 10, 32, 16, 64 },
-                                   },
-                               },
-#if CONFIG_MRC_TX
-                               {
-// ToDo(yaowu): remove unused entry 0.
-#if CONFIG_CHROMA_2X2
-                                   {
-                                       { 0 },
-                                   },
+    220, 220,
 #endif
-                                   {
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-#if CONFIG_ALT_INTRA
-                                       { 128 },
-#if CONFIG_SMOOTH_HV
-                                       { 128 },
-                                       { 128 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 128 },
-                                   },
-                                   {
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-#if CONFIG_ALT_INTRA
-                                       { 128 },
-#if CONFIG_SMOOTH_HV
-                                       { 128 },
-                                       { 128 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 128 },
-                                   },
-                                   {
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-#if CONFIG_ALT_INTRA
-                                       { 128 },
+    230 },
+  { 255, 192, 216, 180, 180, 180, 180, 200, 200, 255,
 #if CONFIG_SMOOTH_HV
-                                       { 128 },
-                                       { 128 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 128 },
-                                   },
-                                   {
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-                                       { 128 },
-#if CONFIG_ALT_INTRA
-                                       { 128 },
-#if CONFIG_SMOOTH_HV
-                                       { 128 },
-                                       { 128 },
-#endif  //  CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                       { 128 },
-                                   },
-                               },
-
-#endif  // CONFIG_MRC_TX
-                             };
-#else  // !CONFIG_EXT_TX
-
-/* clang-format off */
-#if CONFIG_MRC_TX
-const aom_tree_index av1_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
-  -DCT_DCT, 2,
-  -MRC_DCT, 4,
-  -ADST_ADST, 6,
-  -ADST_DCT, -DCT_ADST
-};
-#else
-const aom_tree_index av1_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
-  -DCT_DCT, 2,
-  -ADST_ADST, 4,
-  -ADST_DCT, -DCT_ADST
-};
-#endif  // CONFIG_MRC_TX
-/* clang-format on */
-
-int av1_ext_tx_ind[TX_TYPES];
-int av1_ext_tx_inv[TX_TYPES];
-
-#if CONFIG_MRC_TX
-static const aom_prob default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES]
-                                               [TX_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2
-                                                 { { 240, 1, 85, 128 },
-                                                   { 4, 1, 1, 248 },
-                                                   { 4, 1, 1, 8 },
-                                                   { 4, 1, 248, 128 },
-                                                   { 4, 1, 248, 128 } },
+    220, 220,
 #endif
-                                                 { { 240, 1, 85, 128 },
-                                                   { 4, 1, 1, 248 },
-                                                   { 4, 1, 1, 8 },
-                                                   { 4, 1, 248, 128 },
-                                                   { 4, 1, 248, 128 } },
-                                                 { { 244, 1, 85, 128 },
-                                                   { 8, 1, 2, 248 },
-                                                   { 8, 1, 2, 8 },
-                                                   { 8, 1, 248, 128 },
-                                                   { 4, 1, 248, 128 } },
-                                                 { { 248, 128, 85, 128 },
-                                                   { 16, 128, 4, 248 },
-                                                   { 16, 128, 4, 8 },
-                                                   { 16, 128, 248, 128 },
-                                                   { 4, 1, 248, 128 } },
-                                               };
-
-static const aom_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2
-  { 160, 1, 85, 128 },
-#endif
-  { 160, 1, 85, 128 },
-  { 176, 1, 85, 128 },
-  { 192, 128, 85, 128 },
+    222 },
 };
-#else
-static const aom_prob
-    default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2
-      { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
-#endif
-      { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
-      { { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } },
-      { { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } },
-    };
 
-static const aom_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
-#if CONFIG_CHROMA_2X2
-  { 160, 85, 128 },
-#endif
-  { 160, 85, 128 },
-  { 176, 85, 128 },
-  { 192, 85, 128 },
-};
-#endif  // CONFIG_MRC_TX
-#endif  // CONFIG_EXT_TX
+static const aom_prob default_inter_lgt_prob[LGT_SIZES] = { 230, 230 };
+#endif  // CONFIG_LGT_FROM_PRED
 
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 static const aom_prob
@@ -2508,32 +2724,31 @@ static const aom_cdf_prob
 };
 #endif
 // clang-format on
-
 #if CONFIG_DUAL_FILTER
 #if USE_EXTRA_FILTER
 static const aom_cdf_prob
     default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
         SWITCHABLE_FILTERS)] = {
-      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(32096), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(31338), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(27632), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(32096), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(31338), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(27632), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(32096), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(31338), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(27632), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(32096), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(31338), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(6128), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(27632), AOM_ICDF(32768), 0 }
+      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(30080), AOM_ICDF(31088), AOM_ICDF(31760), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4608), AOM_ICDF(9620), AOM_ICDF(11050), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4352), AOM_ICDF(5240), AOM_ICDF(31880), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19072), AOM_ICDF(23352), AOM_ICDF(28488), AOM_ICDF(32768), 0 },
     };
 #else   // USE_EXTRA_FILTER
 static const aom_cdf_prob
@@ -2591,188 +2806,258 @@ static const aom_cdf_prob
 #endif
     };
 
-#if CONFIG_ALT_INTRA
 #if CONFIG_SMOOTH_HV
 static const aom_cdf_prob
     default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)] = {
-      { AOM_ICDF(7168), AOM_ICDF(8468), AOM_ICDF(11980), AOM_ICDF(15213),
-        AOM_ICDF(18579), AOM_ICDF(21075), AOM_ICDF(24090), AOM_ICDF(25954),
-        AOM_ICDF(27870), AOM_ICDF(29439), AOM_ICDF(31051), AOM_ICDF(31863),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(11776), AOM_ICDF(21616), AOM_ICDF(23663), AOM_ICDF(25147),
-        AOM_ICDF(26060), AOM_ICDF(26828), AOM_ICDF(27246), AOM_ICDF(28066),
-        AOM_ICDF(28654), AOM_ICDF(29474), AOM_ICDF(31353), AOM_ICDF(32038),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(14720), AOM_ICDF(21911), AOM_ICDF(23650), AOM_ICDF(25282),
-        AOM_ICDF(25740), AOM_ICDF(26108), AOM_ICDF(26316), AOM_ICDF(26896),
-        AOM_ICDF(27194), AOM_ICDF(27695), AOM_ICDF(30113), AOM_ICDF(31254),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(18944), AOM_ICDF(27422), AOM_ICDF(28403), AOM_ICDF(29386),
-        AOM_ICDF(29405), AOM_ICDF(29460), AOM_ICDF(29550), AOM_ICDF(29588),
-        AOM_ICDF(29600), AOM_ICDF(29637), AOM_ICDF(30542), AOM_ICDF(31298),
-        AOM_ICDF(32768), 0 },
+      {
+          AOM_ICDF(7168), AOM_ICDF(10680), AOM_ICDF(13913), AOM_ICDF(16928),
+          AOM_ICDF(20294), AOM_ICDF(22790), AOM_ICDF(24706), AOM_ICDF(26275),
+          AOM_ICDF(28139), AOM_ICDF(29751), AOM_ICDF(30563), AOM_ICDF(31468),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11776), AOM_ICDF(13823), AOM_ICDF(15307), AOM_ICDF(15725),
+          AOM_ICDF(16638), AOM_ICDF(17406), AOM_ICDF(17994), AOM_ICDF(18814),
+          AOM_ICDF(19634), AOM_ICDF(21513), AOM_ICDF(22198), AOM_ICDF(22928),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(14720), AOM_ICDF(16459), AOM_ICDF(18091), AOM_ICDF(18299),
+          AOM_ICDF(18757), AOM_ICDF(19125), AOM_ICDF(19423), AOM_ICDF(19924),
+          AOM_ICDF(20504), AOM_ICDF(22922), AOM_ICDF(24063), AOM_ICDF(25577),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(18944), AOM_ICDF(19925), AOM_ICDF(20908), AOM_ICDF(20998),
+          AOM_ICDF(21017), AOM_ICDF(21072), AOM_ICDF(21084), AOM_ICDF(21121),
+          AOM_ICDF(21159), AOM_ICDF(22064), AOM_ICDF(22820), AOM_ICDF(24290),
+          AOM_ICDF(32768), 0,
+      },
     };
 
+#if CONFIG_CFL
 static const aom_cdf_prob
     default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
-      { AOM_ICDF(23552), AOM_ICDF(23660), AOM_ICDF(26044), AOM_ICDF(28731),
-        AOM_ICDF(29093), AOM_ICDF(29590), AOM_ICDF(30000), AOM_ICDF(30465),
-        AOM_ICDF(30825), AOM_ICDF(31478), AOM_ICDF(32088), AOM_ICDF(32401),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(2944), AOM_ICDF(3294), AOM_ICDF(26781), AOM_ICDF(27903),
-        AOM_ICDF(28179), AOM_ICDF(29237), AOM_ICDF(29430), AOM_ICDF(30317),
-        AOM_ICDF(30441), AOM_ICDF(30614), AOM_ICDF(31556), AOM_ICDF(31963),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(4352), AOM_ICDF(4685), AOM_ICDF(5453), AOM_ICDF(28285),
-        AOM_ICDF(28641), AOM_ICDF(28927), AOM_ICDF(29092), AOM_ICDF(29279),
-        AOM_ICDF(30083), AOM_ICDF(31384), AOM_ICDF(32027), AOM_ICDF(32406),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(17664), AOM_ICDF(17841), AOM_ICDF(20465), AOM_ICDF(22016),
-        AOM_ICDF(22364), AOM_ICDF(22916), AOM_ICDF(27149), AOM_ICDF(29498),
-        AOM_ICDF(29766), AOM_ICDF(31091), AOM_ICDF(31871), AOM_ICDF(32260),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16640), AOM_ICDF(16766), AOM_ICDF(18516), AOM_ICDF(20359),
-        AOM_ICDF(24964), AOM_ICDF(27591), AOM_ICDF(27915), AOM_ICDF(28389),
-        AOM_ICDF(29997), AOM_ICDF(30495), AOM_ICDF(31623), AOM_ICDF(32151),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(13952), AOM_ICDF(14173), AOM_ICDF(18168), AOM_ICDF(19139),
-        AOM_ICDF(21064), AOM_ICDF(30601), AOM_ICDF(30889), AOM_ICDF(31410),
-        AOM_ICDF(31803), AOM_ICDF(32059), AOM_ICDF(32358), AOM_ICDF(32563),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(15872), AOM_ICDF(15938), AOM_ICDF(17056), AOM_ICDF(21545),
-        AOM_ICDF(23947), AOM_ICDF(24667), AOM_ICDF(24920), AOM_ICDF(25196),
-        AOM_ICDF(30638), AOM_ICDF(31229), AOM_ICDF(31968), AOM_ICDF(32284),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16256), AOM_ICDF(16385), AOM_ICDF(17409), AOM_ICDF(23210),
-        AOM_ICDF(23628), AOM_ICDF(24009), AOM_ICDF(24967), AOM_ICDF(25546),
-        AOM_ICDF(26054), AOM_ICDF(31037), AOM_ICDF(31875), AOM_ICDF(32335),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(14720), AOM_ICDF(14932), AOM_ICDF(19461), AOM_ICDF(20713),
-        AOM_ICDF(21073), AOM_ICDF(21852), AOM_ICDF(23430), AOM_ICDF(29631),
-        AOM_ICDF(29876), AOM_ICDF(30520), AOM_ICDF(31591), AOM_ICDF(32078),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16768), AOM_ICDF(17018), AOM_ICDF(20217), AOM_ICDF(22624),
-        AOM_ICDF(23484), AOM_ICDF(23698), AOM_ICDF(24300), AOM_ICDF(25193),
-        AOM_ICDF(25785), AOM_ICDF(26903), AOM_ICDF(29835), AOM_ICDF(31187),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16768), AOM_ICDF(17081), AOM_ICDF(21064), AOM_ICDF(23339),
-        AOM_ICDF(24047), AOM_ICDF(24264), AOM_ICDF(24829), AOM_ICDF(25759),
-        AOM_ICDF(26224), AOM_ICDF(27119), AOM_ICDF(29833), AOM_ICDF(31599),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(17536), AOM_ICDF(17774), AOM_ICDF(20293), AOM_ICDF(23203),
-        AOM_ICDF(23906), AOM_ICDF(24094), AOM_ICDF(24636), AOM_ICDF(25303),
-        AOM_ICDF(26003), AOM_ICDF(27271), AOM_ICDF(29912), AOM_ICDF(30927),
-        AOM_ICDF(32768), 0 },
-      { AOM_ICDF(17536), AOM_ICDF(18250), AOM_ICDF(23467), AOM_ICDF(27840),
-        AOM_ICDF(28058), AOM_ICDF(28626), AOM_ICDF(28853), AOM_ICDF(29541),
-        AOM_ICDF(29907), AOM_ICDF(30600), AOM_ICDF(31515), AOM_ICDF(32049),
-        AOM_ICDF(32768), 0 },
+      { AOM_ICDF(18377), AOM_ICDF(18815), AOM_ICDF(19743), AOM_ICDF(20178),
+        AOM_ICDF(20560), AOM_ICDF(20889), AOM_ICDF(21359), AOM_ICDF(22098),
+        AOM_ICDF(22481), AOM_ICDF(24563), AOM_ICDF(25781), AOM_ICDF(26662),
+        AOM_ICDF(28396), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(5350), AOM_ICDF(16837), AOM_ICDF(17066), AOM_ICDF(17360),
+        AOM_ICDF(17692), AOM_ICDF(18778), AOM_ICDF(18969), AOM_ICDF(19206),
+        AOM_ICDF(20291), AOM_ICDF(22367), AOM_ICDF(23212), AOM_ICDF(24670),
+        AOM_ICDF(27912), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(6671), AOM_ICDF(6759), AOM_ICDF(17812), AOM_ICDF(17998),
+        AOM_ICDF(18260), AOM_ICDF(18384), AOM_ICDF(19408), AOM_ICDF(20667),
+        AOM_ICDF(20806), AOM_ICDF(22760), AOM_ICDF(24142), AOM_ICDF(24875),
+        AOM_ICDF(28072), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(7461), AOM_ICDF(8082), AOM_ICDF(8515), AOM_ICDF(15013),
+        AOM_ICDF(15583), AOM_ICDF(16098), AOM_ICDF(16522), AOM_ICDF(18519),
+        AOM_ICDF(20348), AOM_ICDF(22954), AOM_ICDF(24130), AOM_ICDF(25342),
+        AOM_ICDF(26548), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(3694), AOM_ICDF(4403), AOM_ICDF(5370), AOM_ICDF(5854),
+        AOM_ICDF(17841), AOM_ICDF(19639), AOM_ICDF(21625), AOM_ICDF(22224),
+        AOM_ICDF(22651), AOM_ICDF(24613), AOM_ICDF(25399), AOM_ICDF(26143),
+        AOM_ICDF(26599), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(3700), AOM_ICDF(5651), AOM_ICDF(6112), AOM_ICDF(6541),
+        AOM_ICDF(8929), AOM_ICDF(20623), AOM_ICDF(21213), AOM_ICDF(21640),
+        AOM_ICDF(22214), AOM_ICDF(24306), AOM_ICDF(25412), AOM_ICDF(26406),
+        AOM_ICDF(27249), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4649), AOM_ICDF(4947), AOM_ICDF(7128), AOM_ICDF(7432),
+        AOM_ICDF(9439), AOM_ICDF(9903), AOM_ICDF(21163), AOM_ICDF(21774),
+        AOM_ICDF(22056), AOM_ICDF(24426), AOM_ICDF(25403), AOM_ICDF(26324),
+        AOM_ICDF(27128), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(7208), AOM_ICDF(7375), AOM_ICDF(8779), AOM_ICDF(9683),
+        AOM_ICDF(10072), AOM_ICDF(10284), AOM_ICDF(10796), AOM_ICDF(19786),
+        AOM_ICDF(20152), AOM_ICDF(22955), AOM_ICDF(24246), AOM_ICDF(25165),
+        AOM_ICDF(26589), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(5897), AOM_ICDF(7283), AOM_ICDF(7555), AOM_ICDF(8910),
+        AOM_ICDF(9391), AOM_ICDF(9937), AOM_ICDF(10276), AOM_ICDF(11044),
+        AOM_ICDF(19841), AOM_ICDF(22620), AOM_ICDF(23784), AOM_ICDF(25060),
+        AOM_ICDF(26418), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(12171), AOM_ICDF(12718), AOM_ICDF(13885), AOM_ICDF(14348),
+        AOM_ICDF(14925), AOM_ICDF(15394), AOM_ICDF(16108), AOM_ICDF(17075),
+        AOM_ICDF(17583), AOM_ICDF(21996), AOM_ICDF(23614), AOM_ICDF(25048),
+        AOM_ICDF(27011), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10192), AOM_ICDF(11222), AOM_ICDF(12318), AOM_ICDF(12877),
+        AOM_ICDF(13533), AOM_ICDF(14184), AOM_ICDF(14866), AOM_ICDF(15879),
+        AOM_ICDF(16650), AOM_ICDF(20419), AOM_ICDF(23265), AOM_ICDF(24295),
+        AOM_ICDF(26596), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(10776), AOM_ICDF(11387), AOM_ICDF(12899), AOM_ICDF(13471),
+        AOM_ICDF(14088), AOM_ICDF(14575), AOM_ICDF(15366), AOM_ICDF(16456),
+        AOM_ICDF(17040), AOM_ICDF(20815), AOM_ICDF(22009), AOM_ICDF(24448),
+        AOM_ICDF(26492), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4015), AOM_ICDF(6473), AOM_ICDF(9853), AOM_ICDF(10285),
+        AOM_ICDF(10655), AOM_ICDF(11032), AOM_ICDF(11431), AOM_ICDF(12199),
+        AOM_ICDF(12738), AOM_ICDF(14760), AOM_ICDF(16121), AOM_ICDF(17263),
+        AOM_ICDF(28612), AOM_ICDF(32768), 0 },
     };
-#else   // !CONFIG_SMOOTH_HV
-static const aom_cdf_prob
-    default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)] = {
-      { AOM_ICDF(11264), AOM_ICDF(12608), AOM_ICDF(16309), AOM_ICDF(21086),
-        AOM_ICDF(23297), AOM_ICDF(24860), AOM_ICDF(27022), AOM_ICDF(28099),
-        AOM_ICDF(29631), AOM_ICDF(31126), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(9600), AOM_ICDF(11953), AOM_ICDF(16100), AOM_ICDF(20922),
-        AOM_ICDF(22756), AOM_ICDF(23913), AOM_ICDF(25435), AOM_ICDF(26724),
-        AOM_ICDF(28046), AOM_ICDF(29927), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(9344), AOM_ICDF(11540), AOM_ICDF(16515), AOM_ICDF(21763),
-        AOM_ICDF(23078), AOM_ICDF(23816), AOM_ICDF(24725), AOM_ICDF(25856),
-        AOM_ICDF(26720), AOM_ICDF(28208), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(12288), AOM_ICDF(14448), AOM_ICDF(18026), AOM_ICDF(23346),
-        AOM_ICDF(23833), AOM_ICDF(24188), AOM_ICDF(24724), AOM_ICDF(25415),
-        AOM_ICDF(25817), AOM_ICDF(26876), AOM_ICDF(32768), 0 },
-    };
-
+#else
 static const aom_cdf_prob
     default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
-      { AOM_ICDF(25472), AOM_ICDF(25558), AOM_ICDF(27783), AOM_ICDF(30779),
-        AOM_ICDF(30988), AOM_ICDF(31269), AOM_ICDF(31492), AOM_ICDF(31741),
-        AOM_ICDF(32014), AOM_ICDF(32420), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(2176), AOM_ICDF(2415), AOM_ICDF(28381), AOM_ICDF(29574),
-        AOM_ICDF(29832), AOM_ICDF(30712), AOM_ICDF(30881), AOM_ICDF(31662),
-        AOM_ICDF(31761), AOM_ICDF(31922), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(3328), AOM_ICDF(3443), AOM_ICDF(4016), AOM_ICDF(31099),
-        AOM_ICDF(31272), AOM_ICDF(31420), AOM_ICDF(31504), AOM_ICDF(31608),
-        AOM_ICDF(31916), AOM_ICDF(32598), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(23424), AOM_ICDF(23534), AOM_ICDF(25915), AOM_ICDF(27831),
-        AOM_ICDF(28058), AOM_ICDF(28431), AOM_ICDF(30142), AOM_ICDF(31209),
-        AOM_ICDF(31459), AOM_ICDF(32369), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(22784), AOM_ICDF(22862), AOM_ICDF(24255), AOM_ICDF(26287),
-        AOM_ICDF(28490), AOM_ICDF(29509), AOM_ICDF(29776), AOM_ICDF(30115),
-        AOM_ICDF(31203), AOM_ICDF(31674), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19712), AOM_ICDF(19865), AOM_ICDF(23141), AOM_ICDF(24428),
-        AOM_ICDF(25731), AOM_ICDF(31377), AOM_ICDF(31622), AOM_ICDF(32047),
-        AOM_ICDF(32458), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(21376), AOM_ICDF(21421), AOM_ICDF(22130), AOM_ICDF(27688),
-        AOM_ICDF(28485), AOM_ICDF(28779), AOM_ICDF(28935), AOM_ICDF(29085),
-        AOM_ICDF(31962), AOM_ICDF(32450), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19712), AOM_ICDF(19814), AOM_ICDF(20725), AOM_ICDF(28510),
-        AOM_ICDF(28814), AOM_ICDF(29099), AOM_ICDF(29457), AOM_ICDF(29729),
-        AOM_ICDF(30133), AOM_ICDF(32408), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(19584), AOM_ICDF(19790), AOM_ICDF(23643), AOM_ICDF(25501),
-        AOM_ICDF(25913), AOM_ICDF(26673), AOM_ICDF(27578), AOM_ICDF(30923),
-        AOM_ICDF(31255), AOM_ICDF(31870), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(20864), AOM_ICDF(21004), AOM_ICDF(24129), AOM_ICDF(26308),
-        AOM_ICDF(27062), AOM_ICDF(27065), AOM_ICDF(27488), AOM_ICDF(28045),
-        AOM_ICDF(28506), AOM_ICDF(29272), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(23680), AOM_ICDF(23929), AOM_ICDF(27831), AOM_ICDF(30446),
-        AOM_ICDF(30598), AOM_ICDF(31129), AOM_ICDF(31244), AOM_ICDF(31655),
-        AOM_ICDF(31868), AOM_ICDF(32234), AOM_ICDF(32768), 0 },
+      {
+          AOM_ICDF(23552), AOM_ICDF(25936), AOM_ICDF(28623), AOM_ICDF(29033),
+          AOM_ICDF(29395), AOM_ICDF(29892), AOM_ICDF(30252), AOM_ICDF(30905),
+          AOM_ICDF(31370), AOM_ICDF(31980), AOM_ICDF(32293), AOM_ICDF(32660),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2944), AOM_ICDF(26431), AOM_ICDF(27553), AOM_ICDF(27746),
+          AOM_ICDF(28022), AOM_ICDF(29080), AOM_ICDF(29204), AOM_ICDF(29377),
+          AOM_ICDF(30264), AOM_ICDF(31206), AOM_ICDF(31613), AOM_ICDF(32418),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4352), AOM_ICDF(5120), AOM_ICDF(27952), AOM_ICDF(28117),
+          AOM_ICDF(28473), AOM_ICDF(28759), AOM_ICDF(29563), AOM_ICDF(30864),
+          AOM_ICDF(31051), AOM_ICDF(31694), AOM_ICDF(32073), AOM_ICDF(32435),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(17664), AOM_ICDF(20288), AOM_ICDF(21839), AOM_ICDF(26072),
+          AOM_ICDF(26420), AOM_ICDF(26972), AOM_ICDF(27240), AOM_ICDF(28565),
+          AOM_ICDF(30914), AOM_ICDF(31694), AOM_ICDF(32083), AOM_ICDF(32591),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(16640), AOM_ICDF(18390), AOM_ICDF(20233), AOM_ICDF(20557),
+          AOM_ICDF(25162), AOM_ICDF(27789), AOM_ICDF(29397), AOM_ICDF(29895),
+          AOM_ICDF(30369), AOM_ICDF(31497), AOM_ICDF(32025), AOM_ICDF(32642),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(13952), AOM_ICDF(17947), AOM_ICDF(18918), AOM_ICDF(19206),
+          AOM_ICDF(21131), AOM_ICDF(30668), AOM_ICDF(31061), AOM_ICDF(31317),
+          AOM_ICDF(31838), AOM_ICDF(32137), AOM_ICDF(32342), AOM_ICDF(32547),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(15872), AOM_ICDF(16990), AOM_ICDF(21479), AOM_ICDF(21732),
+          AOM_ICDF(24134), AOM_ICDF(24854), AOM_ICDF(30296), AOM_ICDF(30887),
+          AOM_ICDF(31163), AOM_ICDF(31902), AOM_ICDF(32218), AOM_ICDF(32702),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(16256), AOM_ICDF(17280), AOM_ICDF(23081), AOM_ICDF(24039),
+          AOM_ICDF(24457), AOM_ICDF(24838), AOM_ICDF(25346), AOM_ICDF(30329),
+          AOM_ICDF(30908), AOM_ICDF(31746), AOM_ICDF(32206), AOM_ICDF(32639),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(14720), AOM_ICDF(19249), AOM_ICDF(20501), AOM_ICDF(22079),
+          AOM_ICDF(22439), AOM_ICDF(23218), AOM_ICDF(23463), AOM_ICDF(24107),
+          AOM_ICDF(30308), AOM_ICDF(31379), AOM_ICDF(31866), AOM_ICDF(32556),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(16768), AOM_ICDF(19967), AOM_ICDF(22374), AOM_ICDF(22976),
+          AOM_ICDF(23836), AOM_ICDF(24050), AOM_ICDF(24642), AOM_ICDF(25760),
+          AOM_ICDF(26653), AOM_ICDF(29585), AOM_ICDF(30937), AOM_ICDF(32518),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(16768), AOM_ICDF(20751), AOM_ICDF(23026), AOM_ICDF(23591),
+          AOM_ICDF(24299), AOM_ICDF(24516), AOM_ICDF(24981), AOM_ICDF(25876),
+          AOM_ICDF(26806), AOM_ICDF(29520), AOM_ICDF(31286), AOM_ICDF(32455),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(17536), AOM_ICDF(20055), AOM_ICDF(22965), AOM_ICDF(23507),
+          AOM_ICDF(24210), AOM_ICDF(24398), AOM_ICDF(25098), AOM_ICDF(26366),
+          AOM_ICDF(27033), AOM_ICDF(29674), AOM_ICDF(30689), AOM_ICDF(32530),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(17536), AOM_ICDF(22753), AOM_ICDF(27126), AOM_ICDF(27353),
+          AOM_ICDF(27571), AOM_ICDF(28139), AOM_ICDF(28505), AOM_ICDF(29198),
+          AOM_ICDF(29886), AOM_ICDF(30801), AOM_ICDF(31335), AOM_ICDF(32054),
+          AOM_ICDF(32768), 0,
+      },
     };
-#endif  // CONFIG_SMOOTH_HV
-#else   // !CONFIG_ALT_INTRA
+#endif  // CONFIG_CFL
+#else   // !CONFIG_SMOOTH_HV
 static const aom_cdf_prob
     default_if_y_mode_cdf[BLOCK_SIZE_GROUPS][CDF_SIZE(INTRA_MODES)] = {
-      { AOM_ICDF(8320), AOM_ICDF(11376), AOM_ICDF(12880), AOM_ICDF(19959),
-        AOM_ICDF(23072), AOM_ICDF(24067), AOM_ICDF(25461), AOM_ICDF(26917),
-        AOM_ICDF(29157), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(16896), AOM_ICDF(21112), AOM_ICDF(21932), AOM_ICDF(27852),
-        AOM_ICDF(28667), AOM_ICDF(28916), AOM_ICDF(29593), AOM_ICDF(30089),
-        AOM_ICDF(30905), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(22144), AOM_ICDF(25464), AOM_ICDF(26006), AOM_ICDF(30364),
-        AOM_ICDF(30583), AOM_ICDF(30655), AOM_ICDF(31183), AOM_ICDF(31400),
-        AOM_ICDF(31646), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(28288), AOM_ICDF(30650), AOM_ICDF(30964), AOM_ICDF(32288),
-        AOM_ICDF(32308), AOM_ICDF(32331), AOM_ICDF(32495), AOM_ICDF(32586),
-        AOM_ICDF(32607), AOM_ICDF(32768), 0 },
+      {
+          AOM_ICDF(11264), AOM_ICDF(14965), AOM_ICDF(19742), AOM_ICDF(21904),
+          AOM_ICDF(24115), AOM_ICDF(25678), AOM_ICDF(27210), AOM_ICDF(28705),
+          AOM_ICDF(29782), AOM_ICDF(31424), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(13747), AOM_ICDF(18569), AOM_ICDF(20091),
+          AOM_ICDF(21925), AOM_ICDF(23082), AOM_ICDF(24404), AOM_ICDF(26285),
+          AOM_ICDF(27574), AOM_ICDF(30415), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9344), AOM_ICDF(14319), AOM_ICDF(19567), AOM_ICDF(20476),
+          AOM_ICDF(21791), AOM_ICDF(22529), AOM_ICDF(23393), AOM_ICDF(24881),
+          AOM_ICDF(26012), AOM_ICDF(30572), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12288), AOM_ICDF(15866), AOM_ICDF(21186), AOM_ICDF(21722),
+          AOM_ICDF(22209), AOM_ICDF(22564), AOM_ICDF(22966), AOM_ICDF(24025),
+          AOM_ICDF(24716), AOM_ICDF(30608), AOM_ICDF(32768), 0,
+      },
     };
 
 static const aom_cdf_prob
     default_uv_mode_cdf[INTRA_MODES][CDF_SIZE(UV_INTRA_MODES)] = {
-      { AOM_ICDF(15360), AOM_ICDF(15836), AOM_ICDF(20863), AOM_ICDF(27513),
-        AOM_ICDF(28269), AOM_ICDF(29048), AOM_ICDF(29455), AOM_ICDF(30154),
-        AOM_ICDF(31206), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(6144), AOM_ICDF(7392), AOM_ICDF(22657), AOM_ICDF(25981),
-        AOM_ICDF(26965), AOM_ICDF(28779), AOM_ICDF(29309), AOM_ICDF(30890),
-        AOM_ICDF(31763), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(8576), AOM_ICDF(9143), AOM_ICDF(11450), AOM_ICDF(27575),
-        AOM_ICDF(28108), AOM_ICDF(28438), AOM_ICDF(28658), AOM_ICDF(28995),
-        AOM_ICDF(30410), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(12416), AOM_ICDF(12814), AOM_ICDF(16244), AOM_ICDF(22057),
-        AOM_ICDF(23492), AOM_ICDF(24700), AOM_ICDF(26213), AOM_ICDF(27954),
-        AOM_ICDF(29778), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(10624), AOM_ICDF(11057), AOM_ICDF(14619), AOM_ICDF(19415),
-        AOM_ICDF(23134), AOM_ICDF(25679), AOM_ICDF(26399), AOM_ICDF(27618),
-        AOM_ICDF(30676), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(10240), AOM_ICDF(10680), AOM_ICDF(15684), AOM_ICDF(19118),
-        AOM_ICDF(21856), AOM_ICDF(27563), AOM_ICDF(28234), AOM_ICDF(29332),
-        AOM_ICDF(31278), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(11008), AOM_ICDF(11433), AOM_ICDF(14100), AOM_ICDF(22522),
-        AOM_ICDF(24365), AOM_ICDF(25330), AOM_ICDF(25737), AOM_ICDF(26341),
-        AOM_ICDF(30433), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(10880), AOM_ICDF(11308), AOM_ICDF(13991), AOM_ICDF(23645),
-        AOM_ICDF(24679), AOM_ICDF(25433), AOM_ICDF(25977), AOM_ICDF(26746),
-        AOM_ICDF(28463), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(9856), AOM_ICDF(10483), AOM_ICDF(16054), AOM_ICDF(19959),
-        AOM_ICDF(21708), AOM_ICDF(23628), AOM_ICDF(24949), AOM_ICDF(28797),
-        AOM_ICDF(30658), AOM_ICDF(32768), 0 },
-      { AOM_ICDF(12928), AOM_ICDF(14556), AOM_ICDF(22168), AOM_ICDF(27789),
-        AOM_ICDF(28543), AOM_ICDF(29663), AOM_ICDF(29893), AOM_ICDF(30645),
-        AOM_ICDF(31682), AOM_ICDF(32768), 0 },
+      {
+          AOM_ICDF(25472), AOM_ICDF(27697), AOM_ICDF(30693), AOM_ICDF(30916),
+          AOM_ICDF(31125), AOM_ICDF(31406), AOM_ICDF(31679), AOM_ICDF(32085),
+          AOM_ICDF(32334), AOM_ICDF(32682), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2176), AOM_ICDF(28142), AOM_ICDF(29335), AOM_ICDF(29504),
+          AOM_ICDF(29762), AOM_ICDF(30642), AOM_ICDF(30741), AOM_ICDF(30902),
+          AOM_ICDF(31683), AOM_ICDF(32529), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3328), AOM_ICDF(3901), AOM_ICDF(30984), AOM_ICDF(31068),
+          AOM_ICDF(31241), AOM_ICDF(31389), AOM_ICDF(31697), AOM_ICDF(32379),
+          AOM_ICDF(32483), AOM_ICDF(32653), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(23424), AOM_ICDF(25805), AOM_ICDF(27721), AOM_ICDF(29432),
+          AOM_ICDF(29659), AOM_ICDF(30032), AOM_ICDF(30282), AOM_ICDF(31192),
+          AOM_ICDF(32259), AOM_ICDF(32658), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(22784), AOM_ICDF(24177), AOM_ICDF(26209), AOM_ICDF(26476),
+          AOM_ICDF(28679), AOM_ICDF(29698), AOM_ICDF(30786), AOM_ICDF(31257),
+          AOM_ICDF(31596), AOM_ICDF(32690), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(19712), AOM_ICDF(22988), AOM_ICDF(24275), AOM_ICDF(24520),
+          AOM_ICDF(25823), AOM_ICDF(31469), AOM_ICDF(31880), AOM_ICDF(32189),
+          AOM_ICDF(32614), AOM_ICDF(32615), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(21376), AOM_ICDF(22085), AOM_ICDF(27643), AOM_ICDF(27799),
+          AOM_ICDF(28596), AOM_ICDF(28890), AOM_ICDF(31767), AOM_ICDF(32255),
+          AOM_ICDF(32405), AOM_ICDF(32723), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(19712), AOM_ICDF(20623), AOM_ICDF(28408), AOM_ICDF(28766),
+          AOM_ICDF(29070), AOM_ICDF(29355), AOM_ICDF(29759), AOM_ICDF(32034),
+          AOM_ICDF(32306), AOM_ICDF(32666), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(19584), AOM_ICDF(23437), AOM_ICDF(25295), AOM_ICDF(26200),
+          AOM_ICDF(26612), AOM_ICDF(27372), AOM_ICDF(27704), AOM_ICDF(28319),
+          AOM_ICDF(31664), AOM_ICDF(32562), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(20864), AOM_ICDF(23989), AOM_ICDF(26168), AOM_ICDF(26591),
+          AOM_ICDF(27345), AOM_ICDF(27348), AOM_ICDF(27809), AOM_ICDF(28575),
+          AOM_ICDF(29132), AOM_ICDF(32628), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(23680), AOM_ICDF(27582), AOM_ICDF(30197), AOM_ICDF(30312),
+          AOM_ICDF(30464), AOM_ICDF(30995), AOM_ICDF(31208), AOM_ICDF(31574),
+          AOM_ICDF(31985), AOM_ICDF(32519), AOM_ICDF(32768), 0,
+      },
     };
-#endif  // CONFIG_ALT_INTRA
+#endif  // CONFIG_SMOOTH_HV
 
 #if CONFIG_EXT_PARTITION_TYPES
 static const aom_cdf_prob
@@ -2788,17 +3073,17 @@ static const aom_cdf_prob
         0, 0, 0, 0, 0, 0 },
       // 16x16 -> 8x8
       { AOM_ICDF(22272), AOM_ICDF(23768), AOM_ICDF(25043), AOM_ICDF(29996),
-        AOM_ICDF(30744), AOM_ICDF(31493), AOM_ICDF(32130), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(30495), AOM_ICDF(30994), AOM_ICDF(31419), AOM_ICDF(31844),
+        AOM_ICDF(32343), AOM_ICDF(32768), 0 },
       { AOM_ICDF(11776), AOM_ICDF(13457), AOM_ICDF(16315), AOM_ICDF(28229),
-        AOM_ICDF(29069), AOM_ICDF(29910), AOM_ICDF(31339), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(28789), AOM_ICDF(29349), AOM_ICDF(30302), AOM_ICDF(31255),
+        AOM_ICDF(31816), AOM_ICDF(32768), 0 },
       { AOM_ICDF(10496), AOM_ICDF(14802), AOM_ICDF(16136), AOM_ICDF(27127),
-        AOM_ICDF(29280), AOM_ICDF(31434), AOM_ICDF(32101), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(28563), AOM_ICDF(29999), AOM_ICDF(30444), AOM_ICDF(30889),
+        AOM_ICDF(32324), AOM_ICDF(32768), 0 },
       { AOM_ICDF(6784), AOM_ICDF(8763), AOM_ICDF(10440), AOM_ICDF(29110),
-        AOM_ICDF(30100), AOM_ICDF(31090), AOM_ICDF(31929), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(29770), AOM_ICDF(30430), AOM_ICDF(30989), AOM_ICDF(31548),
+        AOM_ICDF(32208), AOM_ICDF(32768), 0 },
       // 32x32 -> 16x16
       { AOM_ICDF(22656), AOM_ICDF(23801), AOM_ICDF(24702), AOM_ICDF(30721),
         AOM_ICDF(31103), AOM_ICDF(31485), AOM_ICDF(31785), AOM_ICDF(32085),
@@ -2814,31 +3099,31 @@ static const aom_cdf_prob
         AOM_ICDF(32542), AOM_ICDF(32768), 0 },
       // 64x64 -> 32x32
       { AOM_ICDF(28416), AOM_ICDF(28705), AOM_ICDF(28926), AOM_ICDF(32258),
-        AOM_ICDF(32402), AOM_ICDF(32547), AOM_ICDF(32657), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(32354), AOM_ICDF(32450), AOM_ICDF(32523), AOM_ICDF(32596),
+        AOM_ICDF(32693), AOM_ICDF(32768), 0 },
       { AOM_ICDF(9216), AOM_ICDF(9952), AOM_ICDF(11849), AOM_ICDF(30134),
-        AOM_ICDF(30502), AOM_ICDF(30870), AOM_ICDF(31819), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(30379), AOM_ICDF(30624), AOM_ICDF(31256), AOM_ICDF(31888),
+        AOM_ICDF(32134), AOM_ICDF(32768), 0 },
       { AOM_ICDF(7424), AOM_ICDF(9008), AOM_ICDF(9528), AOM_ICDF(30664),
-        AOM_ICDF(31456), AOM_ICDF(32248), AOM_ICDF(32508), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(31192), AOM_ICDF(31720), AOM_ICDF(31893), AOM_ICDF(32066),
+        AOM_ICDF(32594), AOM_ICDF(32768), 0 },
       { AOM_ICDF(1280), AOM_ICDF(1710), AOM_ICDF(2069), AOM_ICDF(31978),
-        AOM_ICDF(32193), AOM_ICDF(32409), AOM_ICDF(32588), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(32121), AOM_ICDF(32264), AOM_ICDF(32383), AOM_ICDF(32502),
+        AOM_ICDF(32647), AOM_ICDF(32768), 0 },
 #if CONFIG_EXT_PARTITION
       // 128x128 -> 64x64
       { AOM_ICDF(28416), AOM_ICDF(28705), AOM_ICDF(28926), AOM_ICDF(32258),
-        AOM_ICDF(32402), AOM_ICDF(32547), AOM_ICDF(32548), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(32354), AOM_ICDF(32450), AOM_ICDF(32523), AOM_ICDF(32596),
+        AOM_ICDF(32693), AOM_ICDF(32768), 0 },
       { AOM_ICDF(9216), AOM_ICDF(9952), AOM_ICDF(11849), AOM_ICDF(30134),
-        AOM_ICDF(30502), AOM_ICDF(30870), AOM_ICDF(30871), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(30379), AOM_ICDF(30624), AOM_ICDF(31256), AOM_ICDF(31888),
+        AOM_ICDF(32134), AOM_ICDF(32768), 0 },
       { AOM_ICDF(7424), AOM_ICDF(9008), AOM_ICDF(9528), AOM_ICDF(30664),
-        AOM_ICDF(31456), AOM_ICDF(32248), AOM_ICDF(32249), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(31192), AOM_ICDF(31720), AOM_ICDF(31893), AOM_ICDF(32066),
+        AOM_ICDF(32594), AOM_ICDF(32768), 0 },
       { AOM_ICDF(1280), AOM_ICDF(1710), AOM_ICDF(2069), AOM_ICDF(31978),
-        AOM_ICDF(32193), AOM_ICDF(32409), AOM_ICDF(32410), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(32121), AOM_ICDF(32264), AOM_ICDF(32383), AOM_ICDF(32502),
+        AOM_ICDF(32647), AOM_ICDF(32768), 0 },
 #endif
     };
 #else
@@ -2889,13 +3174,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { 0 },
               { 0 },
               { 0 },
-#if CONFIG_ALT_INTRA
               { 0 },
 #if CONFIG_SMOOTH_HV
               { 0 },
               { 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { 0 },
           },
           {
@@ -2908,13 +3191,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { 0 },
               { 0 },
               { 0 },
-#if CONFIG_ALT_INTRA
               { 0 },
 #if CONFIG_SMOOTH_HV
               { 0 },
               { 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { 0 },
           },
           {
@@ -2927,13 +3208,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { 0 },
               { 0 },
               { 0 },
-#if CONFIG_ALT_INTRA
               { 0 },
 #if CONFIG_SMOOTH_HV
               { 0 },
               { 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { 0 },
           },
           {
@@ -2946,13 +3225,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { 0 },
               { 0 },
               { 0 },
-#if CONFIG_ALT_INTRA
               { 0 },
 #if CONFIG_SMOOTH_HV
               { 0 },
               { 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { 0 },
           },
       },
@@ -2978,7 +3255,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -2987,7 +3263,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
           },
@@ -3012,7 +3287,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3021,7 +3295,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
           },
@@ -3046,7 +3319,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3055,7 +3327,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
           },
@@ -3080,7 +3351,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(5900), AOM_ICDF(7691),
                 AOM_ICDF(15528), AOM_ICDF(27380), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3089,7 +3359,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(8660),
                 AOM_ICDF(10167), AOM_ICDF(15817), AOM_ICDF(32768), 0 },
           },
@@ -3114,7 +3383,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3123,7 +3391,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
           },
@@ -3146,7 +3413,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3155,7 +3421,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
           },
@@ -3178,7 +3443,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3187,7 +3451,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
           },
@@ -3210,7 +3473,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(4109), AOM_ICDF(13065),
                 AOM_ICDF(26611), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
@@ -3219,7 +3481,6 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(5216), AOM_ICDF(6938), AOM_ICDF(13396),
                 AOM_ICDF(32768), 0 },
           },
@@ -3236,13 +3497,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
           },
           {
@@ -3255,13 +3514,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
           },
           {
@@ -3274,13 +3531,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
           },
           {
@@ -3293,13 +3548,11 @@ static const aom_cdf_prob default_intra_ext_tx_cdf
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
-#if CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #if CONFIG_SMOOTH_HV
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               { AOM_ICDF(1280), AOM_ICDF(32768), 0 },
           },
       }
@@ -3417,7 +3670,7 @@ static const aom_cdf_prob
         { AOM_ICDF(1024), AOM_ICDF(1792), AOM_ICDF(31776), AOM_ICDF(32272),
           AOM_ICDF(32768), 0 } },
 
-      { { AOM_ICDF(31744), AOM_ICDF(29440), AOM_ICDF(32084), AOM_ICDF(32426),
+      { { AOM_ICDF(31744), AOM_ICDF(31940), AOM_ICDF(32084), AOM_ICDF(32426),
           AOM_ICDF(32768), 0 },
         { AOM_ICDF(2048), AOM_ICDF(2176), AOM_ICDF(2528), AOM_ICDF(31823),
           AOM_ICDF(32768), 0 },
@@ -3498,1423 +3751,1941 @@ static const aom_cdf_prob
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 
 #if CONFIG_CFL
-static const aom_cdf_prob default_cfl_alpha_cdf[CDF_SIZE(CFL_ALPHABET_SIZE)] = {
-  AOM_ICDF(20492), AOM_ICDF(24094), AOM_ICDF(25679), AOM_ICDF(27242),
-  AOM_ICDF(28286), AOM_ICDF(29153), AOM_ICDF(29807), AOM_ICDF(30352),
-  AOM_ICDF(30866), AOM_ICDF(31295), AOM_ICDF(31703), AOM_ICDF(32046),
-  AOM_ICDF(32317), AOM_ICDF(32534), AOM_ICDF(32663), AOM_ICDF(32768)
+static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
+  AOM_ICDF(1892),  AOM_ICDF(2229),  AOM_ICDF(11464),
+  AOM_ICDF(14116), AOM_ICDF(25661), AOM_ICDF(26409),
+  AOM_ICDF(32508), AOM_ICDF(32768), 0
 };
+
+static const aom_cdf_prob
+    default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
+      { AOM_ICDF(16215), AOM_ICDF(27740), AOM_ICDF(31726), AOM_ICDF(32606),
+        AOM_ICDF(32736), AOM_ICDF(32751), AOM_ICDF(32757), AOM_ICDF(32759),
+        AOM_ICDF(32761), AOM_ICDF(32762), AOM_ICDF(32763), AOM_ICDF(32764),
+        AOM_ICDF(32765), AOM_ICDF(32766), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(15213), AOM_ICDF(24615), AOM_ICDF(29704), AOM_ICDF(31974),
+        AOM_ICDF(32545), AOM_ICDF(32673), AOM_ICDF(32713), AOM_ICDF(32746),
+        AOM_ICDF(32753), AOM_ICDF(32756), AOM_ICDF(32758), AOM_ICDF(32761),
+        AOM_ICDF(32763), AOM_ICDF(32764), AOM_ICDF(32766), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(13250), AOM_ICDF(24677), AOM_ICDF(29113), AOM_ICDF(31666),
+        AOM_ICDF(32408), AOM_ICDF(32578), AOM_ICDF(32628), AOM_ICDF(32711),
+        AOM_ICDF(32730), AOM_ICDF(32738), AOM_ICDF(32744), AOM_ICDF(32749),
+        AOM_ICDF(32752), AOM_ICDF(32756), AOM_ICDF(32759), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(24593), AOM_ICDF(30787), AOM_ICDF(32062), AOM_ICDF(32495),
+        AOM_ICDF(32656), AOM_ICDF(32707), AOM_ICDF(32735), AOM_ICDF(32747),
+        AOM_ICDF(32752), AOM_ICDF(32757), AOM_ICDF(32760), AOM_ICDF(32763),
+        AOM_ICDF(32764), AOM_ICDF(32765), AOM_ICDF(32767), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(19883), AOM_ICDF(27419), AOM_ICDF(30100), AOM_ICDF(31392),
+        AOM_ICDF(31896), AOM_ICDF(32184), AOM_ICDF(32299), AOM_ICDF(32511),
+        AOM_ICDF(32568), AOM_ICDF(32602), AOM_ICDF(32628), AOM_ICDF(32664),
+        AOM_ICDF(32680), AOM_ICDF(32691), AOM_ICDF(32708), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(15939), AOM_ICDF(24151), AOM_ICDF(27754), AOM_ICDF(29680),
+        AOM_ICDF(30651), AOM_ICDF(31267), AOM_ICDF(31527), AOM_ICDF(31868),
+        AOM_ICDF(32001), AOM_ICDF(32090), AOM_ICDF(32181), AOM_ICDF(32284),
+        AOM_ICDF(32314), AOM_ICDF(32366), AOM_ICDF(32486), AOM_ICDF(32768), 0 }
+    };
 #endif
 
-// CDF version of 'av1_kf_y_mode_prob'.
+#if CONFIG_KF_CTX
+// TODO(jingning): This initial models are copied directly from the entries
+// from the original table. The copied indexes are (0, 0), (0, 1), .. (4, 4).
+// It is possible to re-train this model and bring back the 0.14% loss in CIF
+// set key frame coding. This reduction in context model does not change the
+// key frame coding stats for mid and high resolution sets.
 const aom_cdf_prob
-    av1_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(INTRA_MODES)] = {
-#if CONFIG_ALT_INTRA
+    default_kf_y_mode_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][CDF_SIZE(
+        INTRA_MODES)] = {
+      {
+          {
+              AOM_ICDF(14208), AOM_ICDF(17049), AOM_ICDF(20482),
+              AOM_ICDF(21400), AOM_ICDF(22520), AOM_ICDF(23261),
+              AOM_ICDF(23963), AOM_ICDF(25010), AOM_ICDF(25828),
+              AOM_ICDF(28398), AOM_ICDF(29394), AOM_ICDF(30738),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(10496), AOM_ICDF(18295), AOM_ICDF(19872),
+              AOM_ICDF(20945), AOM_ICDF(21933), AOM_ICDF(22818),
+              AOM_ICDF(23334), AOM_ICDF(24033), AOM_ICDF(24996),
+              AOM_ICDF(27652), AOM_ICDF(29060), AOM_ICDF(30071),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(5120), AOM_ICDF(6461), AOM_ICDF(19840), AOM_ICDF(20310),
+              AOM_ICDF(21151), AOM_ICDF(21506), AOM_ICDF(22535),
+              AOM_ICDF(23900), AOM_ICDF(24281), AOM_ICDF(26958),
+              AOM_ICDF(27680), AOM_ICDF(29636), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(12544), AOM_ICDF(15177), AOM_ICDF(17666),
+              AOM_ICDF(19855), AOM_ICDF(21147), AOM_ICDF(22017),
+              AOM_ICDF(22797), AOM_ICDF(24514), AOM_ICDF(25779),
+              AOM_ICDF(28716), AOM_ICDF(29772), AOM_ICDF(31267),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(7552), AOM_ICDF(9909), AOM_ICDF(11908), AOM_ICDF(13141),
+              AOM_ICDF(18765), AOM_ICDF(22029), AOM_ICDF(23872),
+              AOM_ICDF(24920), AOM_ICDF(25674), AOM_ICDF(29031),
+              AOM_ICDF(30244), AOM_ICDF(31684), AOM_ICDF(32768), 0,
+          },
+      },
+      {
+          {
+              AOM_ICDF(3968), AOM_ICDF(17613), AOM_ICDF(19125), AOM_ICDF(19550),
+              AOM_ICDF(20305), AOM_ICDF(21908), AOM_ICDF(22274),
+              AOM_ICDF(22719), AOM_ICDF(23959), AOM_ICDF(26970),
+              AOM_ICDF(29013), AOM_ICDF(29843), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(3072), AOM_ICDF(21231), AOM_ICDF(21863), AOM_ICDF(22306),
+              AOM_ICDF(22674), AOM_ICDF(23414), AOM_ICDF(23517),
+              AOM_ICDF(23798), AOM_ICDF(24770), AOM_ICDF(27032),
+              AOM_ICDF(29016), AOM_ICDF(29636), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(2560), AOM_ICDF(9825), AOM_ICDF(15681), AOM_ICDF(16370),
+              AOM_ICDF(17054), AOM_ICDF(17687), AOM_ICDF(18236),
+              AOM_ICDF(19273), AOM_ICDF(20311), AOM_ICDF(24863),
+              AOM_ICDF(26825), AOM_ICDF(28756), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(6912), AOM_ICDF(15140), AOM_ICDF(16485), AOM_ICDF(18364),
+              AOM_ICDF(19181), AOM_ICDF(20394), AOM_ICDF(20663),
+              AOM_ICDF(22098), AOM_ICDF(23936), AOM_ICDF(27555),
+              AOM_ICDF(29704), AOM_ICDF(30849), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(2944), AOM_ICDF(13101), AOM_ICDF(14006), AOM_ICDF(14974),
+              AOM_ICDF(17818), AOM_ICDF(21093), AOM_ICDF(21930),
+              AOM_ICDF(22566), AOM_ICDF(24137), AOM_ICDF(27732),
+              AOM_ICDF(29814), AOM_ICDF(30904), AOM_ICDF(32768), 0,
+          },
+      },
+      {
+          {
+              AOM_ICDF(11392), AOM_ICDF(12961), AOM_ICDF(20901),
+              AOM_ICDF(21544), AOM_ICDF(22490), AOM_ICDF(22928),
+              AOM_ICDF(23888), AOM_ICDF(25214), AOM_ICDF(25777),
+              AOM_ICDF(28256), AOM_ICDF(29102), AOM_ICDF(30513),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(8064), AOM_ICDF(13595), AOM_ICDF(18888), AOM_ICDF(19616),
+              AOM_ICDF(20765), AOM_ICDF(21454), AOM_ICDF(21990),
+              AOM_ICDF(23103), AOM_ICDF(23980), AOM_ICDF(26772),
+              AOM_ICDF(28070), AOM_ICDF(29197), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(4352), AOM_ICDF(5059), AOM_ICDF(21705), AOM_ICDF(22099),
+              AOM_ICDF(22703), AOM_ICDF(22846), AOM_ICDF(23679),
+              AOM_ICDF(25469), AOM_ICDF(25728), AOM_ICDF(27919),
+              AOM_ICDF(28484), AOM_ICDF(30215), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(10752), AOM_ICDF(12277), AOM_ICDF(16471),
+              AOM_ICDF(18276), AOM_ICDF(19443), AOM_ICDF(19917),
+              AOM_ICDF(21158), AOM_ICDF(23881), AOM_ICDF(24892),
+              AOM_ICDF(27709), AOM_ICDF(28771), AOM_ICDF(30274),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(8320), AOM_ICDF(10000), AOM_ICDF(14147), AOM_ICDF(15330),
+              AOM_ICDF(19197), AOM_ICDF(20923), AOM_ICDF(22954),
+              AOM_ICDF(24541), AOM_ICDF(25285), AOM_ICDF(28407),
+              AOM_ICDF(29431), AOM_ICDF(30953), AOM_ICDF(32768), 0,
+          },
+      },
+      {
+          {
+              AOM_ICDF(10240), AOM_ICDF(12819), AOM_ICDF(15545),
+              AOM_ICDF(18248), AOM_ICDF(19779), AOM_ICDF(20932),
+              AOM_ICDF(21899), AOM_ICDF(23377), AOM_ICDF(25448),
+              AOM_ICDF(28730), AOM_ICDF(29936), AOM_ICDF(31536),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(7552), AOM_ICDF(15309), AOM_ICDF(16645), AOM_ICDF(19760),
+              AOM_ICDF(20653), AOM_ICDF(21650), AOM_ICDF(22221),
+              AOM_ICDF(23273), AOM_ICDF(25509), AOM_ICDF(28683),
+              AOM_ICDF(30153), AOM_ICDF(31192), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(5248), AOM_ICDF(6840), AOM_ICDF(16129), AOM_ICDF(17940),
+              AOM_ICDF(19069), AOM_ICDF(19660), AOM_ICDF(20588),
+              AOM_ICDF(22760), AOM_ICDF(23927), AOM_ICDF(27538),
+              AOM_ICDF(28397), AOM_ICDF(30725), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(11008), AOM_ICDF(11903), AOM_ICDF(13794),
+              AOM_ICDF(21320), AOM_ICDF(21931), AOM_ICDF(22310),
+              AOM_ICDF(22546), AOM_ICDF(25375), AOM_ICDF(27347),
+              AOM_ICDF(29800), AOM_ICDF(30761), AOM_ICDF(31833),
+              AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(6272), AOM_ICDF(8678), AOM_ICDF(10313), AOM_ICDF(13073),
+              AOM_ICDF(16823), AOM_ICDF(19980), AOM_ICDF(21520),
+              AOM_ICDF(23242), AOM_ICDF(25344), AOM_ICDF(28797),
+              AOM_ICDF(30405), AOM_ICDF(31940), AOM_ICDF(32768), 0,
+          },
+      },
+      {
+          {
+              AOM_ICDF(7296), AOM_ICDF(9304), AOM_ICDF(11772), AOM_ICDF(12529),
+              AOM_ICDF(18014), AOM_ICDF(20418), AOM_ICDF(23076),
+              AOM_ICDF(24662), AOM_ICDF(25549), AOM_ICDF(29074),
+              AOM_ICDF(30392), AOM_ICDF(31773), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(7168), AOM_ICDF(11687), AOM_ICDF(13541), AOM_ICDF(14431),
+              AOM_ICDF(18214), AOM_ICDF(20761), AOM_ICDF(22269),
+              AOM_ICDF(23320), AOM_ICDF(24633), AOM_ICDF(28339),
+              AOM_ICDF(30193), AOM_ICDF(31268), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(3584), AOM_ICDF(4428), AOM_ICDF(13496), AOM_ICDF(14189),
+              AOM_ICDF(17372), AOM_ICDF(18617), AOM_ICDF(20609),
+              AOM_ICDF(22615), AOM_ICDF(23270), AOM_ICDF(27280),
+              AOM_ICDF(28305), AOM_ICDF(30602), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(7424), AOM_ICDF(8834), AOM_ICDF(10499), AOM_ICDF(14357),
+              AOM_ICDF(17671), AOM_ICDF(19150), AOM_ICDF(20460),
+              AOM_ICDF(23235), AOM_ICDF(24391), AOM_ICDF(28351),
+              AOM_ICDF(29843), AOM_ICDF(31481), AOM_ICDF(32768), 0,
+          },
+          {
+              AOM_ICDF(4480), AOM_ICDF(5888), AOM_ICDF(7093), AOM_ICDF(7902),
+              AOM_ICDF(18290), AOM_ICDF(22123), AOM_ICDF(24511),
+              AOM_ICDF(25532), AOM_ICDF(26360), AOM_ICDF(29653),
+              AOM_ICDF(30954), AOM_ICDF(32215), AOM_ICDF(32768), 0,
+          },
+      },
+    };
+#else
+const aom_cdf_prob default_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(
+    INTRA_MODES)] = {
 #if CONFIG_SMOOTH_HV
+  {
+      {
+          AOM_ICDF(14208), AOM_ICDF(17049), AOM_ICDF(20482), AOM_ICDF(21400),
+          AOM_ICDF(22520), AOM_ICDF(23261), AOM_ICDF(23963), AOM_ICDF(25010),
+          AOM_ICDF(25828), AOM_ICDF(28398), AOM_ICDF(29394), AOM_ICDF(30738),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10496), AOM_ICDF(18295), AOM_ICDF(19872), AOM_ICDF(20945),
+          AOM_ICDF(21933), AOM_ICDF(22818), AOM_ICDF(23334), AOM_ICDF(24033),
+          AOM_ICDF(24996), AOM_ICDF(27652), AOM_ICDF(29060), AOM_ICDF(30071),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5120), AOM_ICDF(6461), AOM_ICDF(19840), AOM_ICDF(20310),
+          AOM_ICDF(21151), AOM_ICDF(21506), AOM_ICDF(22535), AOM_ICDF(23900),
+          AOM_ICDF(24281), AOM_ICDF(26958), AOM_ICDF(27680), AOM_ICDF(29636),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12544), AOM_ICDF(15177), AOM_ICDF(17666), AOM_ICDF(19855),
+          AOM_ICDF(21147), AOM_ICDF(22017), AOM_ICDF(22797), AOM_ICDF(24514),
+          AOM_ICDF(25779), AOM_ICDF(28716), AOM_ICDF(29772), AOM_ICDF(31267),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7552), AOM_ICDF(9909), AOM_ICDF(11908), AOM_ICDF(13141),
+          AOM_ICDF(18765), AOM_ICDF(22029), AOM_ICDF(23872), AOM_ICDF(24920),
+          AOM_ICDF(25674), AOM_ICDF(29031), AOM_ICDF(30244), AOM_ICDF(31684),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11008), AOM_ICDF(15004), AOM_ICDF(16534), AOM_ICDF(18158),
+          AOM_ICDF(21515), AOM_ICDF(26668), AOM_ICDF(27834), AOM_ICDF(28735),
+          AOM_ICDF(30471), AOM_ICDF(30839), AOM_ICDF(30969), AOM_ICDF(31068),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(7963), AOM_ICDF(11944), AOM_ICDF(12780),
+          AOM_ICDF(17944), AOM_ICDF(19198), AOM_ICDF(24071), AOM_ICDF(25295),
+          AOM_ICDF(25834), AOM_ICDF(29014), AOM_ICDF(29949), AOM_ICDF(31733),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8192), AOM_ICDF(10189), AOM_ICDF(14596), AOM_ICDF(15680),
+          AOM_ICDF(17143), AOM_ICDF(17909), AOM_ICDF(19201), AOM_ICDF(23711),
+          AOM_ICDF(24503), AOM_ICDF(28207), AOM_ICDF(29338), AOM_ICDF(31424),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10752), AOM_ICDF(13199), AOM_ICDF(15048), AOM_ICDF(17151),
+          AOM_ICDF(18445), AOM_ICDF(19604), AOM_ICDF(20363), AOM_ICDF(21782),
+          AOM_ICDF(24311), AOM_ICDF(28026), AOM_ICDF(29517), AOM_ICDF(30962),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(10301), AOM_ICDF(13245), AOM_ICDF(14307),
+          AOM_ICDF(16021), AOM_ICDF(16257), AOM_ICDF(17265), AOM_ICDF(18739),
+          AOM_ICDF(20080), AOM_ICDF(26066), AOM_ICDF(28325), AOM_ICDF(31184),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(10893), AOM_ICDF(13773), AOM_ICDF(14824),
+          AOM_ICDF(16540), AOM_ICDF(16926), AOM_ICDF(17748), AOM_ICDF(18876),
+          AOM_ICDF(20396), AOM_ICDF(25974), AOM_ICDF(28795), AOM_ICDF(30820),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(11005), AOM_ICDF(14320), AOM_ICDF(15349),
+          AOM_ICDF(16746), AOM_ICDF(16884), AOM_ICDF(17887), AOM_ICDF(19304),
+          AOM_ICDF(20265), AOM_ICDF(26115), AOM_ICDF(27672), AOM_ICDF(31358),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(9504), AOM_ICDF(15437), AOM_ICDF(16399),
+          AOM_ICDF(17355), AOM_ICDF(17948), AOM_ICDF(18814), AOM_ICDF(20270),
+          AOM_ICDF(21134), AOM_ICDF(23690), AOM_ICDF(24759), AOM_ICDF(26454),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(3968), AOM_ICDF(17613), AOM_ICDF(19125), AOM_ICDF(19550),
+          AOM_ICDF(20305), AOM_ICDF(21908), AOM_ICDF(22274), AOM_ICDF(22719),
+          AOM_ICDF(23959), AOM_ICDF(26970), AOM_ICDF(29013), AOM_ICDF(29843),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3072), AOM_ICDF(21231), AOM_ICDF(21863), AOM_ICDF(22306),
+          AOM_ICDF(22674), AOM_ICDF(23414), AOM_ICDF(23517), AOM_ICDF(23798),
+          AOM_ICDF(24770), AOM_ICDF(27032), AOM_ICDF(29016), AOM_ICDF(29636),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2560), AOM_ICDF(9825), AOM_ICDF(15681), AOM_ICDF(16370),
+          AOM_ICDF(17054), AOM_ICDF(17687), AOM_ICDF(18236), AOM_ICDF(19273),
+          AOM_ICDF(20311), AOM_ICDF(24863), AOM_ICDF(26825), AOM_ICDF(28756),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(15140), AOM_ICDF(16485), AOM_ICDF(18364),
+          AOM_ICDF(19181), AOM_ICDF(20394), AOM_ICDF(20663), AOM_ICDF(22098),
+          AOM_ICDF(23936), AOM_ICDF(27555), AOM_ICDF(29704), AOM_ICDF(30849),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2944), AOM_ICDF(13101), AOM_ICDF(14006), AOM_ICDF(14974),
+          AOM_ICDF(17818), AOM_ICDF(21093), AOM_ICDF(21930), AOM_ICDF(22566),
+          AOM_ICDF(24137), AOM_ICDF(27732), AOM_ICDF(29814), AOM_ICDF(30904),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4352), AOM_ICDF(17824), AOM_ICDF(18715), AOM_ICDF(19632),
+          AOM_ICDF(21519), AOM_ICDF(26341), AOM_ICDF(26922), AOM_ICDF(27575),
+          AOM_ICDF(29863), AOM_ICDF(30432), AOM_ICDF(30769), AOM_ICDF(30881),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2944), AOM_ICDF(11971), AOM_ICDF(13509), AOM_ICDF(14295),
+          AOM_ICDF(17202), AOM_ICDF(19005), AOM_ICDF(21605), AOM_ICDF(22458),
+          AOM_ICDF(23839), AOM_ICDF(27774), AOM_ICDF(29492), AOM_ICDF(30787),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4224), AOM_ICDF(13072), AOM_ICDF(15288), AOM_ICDF(16406),
+          AOM_ICDF(17285), AOM_ICDF(18362), AOM_ICDF(19003), AOM_ICDF(21378),
+          AOM_ICDF(22942), AOM_ICDF(27093), AOM_ICDF(29381), AOM_ICDF(30872),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5120), AOM_ICDF(15375), AOM_ICDF(16188), AOM_ICDF(17415),
+          AOM_ICDF(18183), AOM_ICDF(19756), AOM_ICDF(20030), AOM_ICDF(20883),
+          AOM_ICDF(23935), AOM_ICDF(27428), AOM_ICDF(29627), AOM_ICDF(30608),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2816), AOM_ICDF(14999), AOM_ICDF(16352), AOM_ICDF(16969),
+          AOM_ICDF(17836), AOM_ICDF(18125), AOM_ICDF(18514), AOM_ICDF(19181),
+          AOM_ICDF(20650), AOM_ICDF(25773), AOM_ICDF(29172), AOM_ICDF(30662),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2560), AOM_ICDF(16158), AOM_ICDF(17320), AOM_ICDF(17839),
+          AOM_ICDF(18545), AOM_ICDF(18848), AOM_ICDF(19130), AOM_ICDF(19599),
+          AOM_ICDF(20863), AOM_ICDF(25449), AOM_ICDF(29304), AOM_ICDF(30408),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3328), AOM_ICDF(15146), AOM_ICDF(16880), AOM_ICDF(17523),
+          AOM_ICDF(18340), AOM_ICDF(18563), AOM_ICDF(18896), AOM_ICDF(19582),
+          AOM_ICDF(20944), AOM_ICDF(25914), AOM_ICDF(28759), AOM_ICDF(30583),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2560), AOM_ICDF(16618), AOM_ICDF(18460), AOM_ICDF(19207),
+          AOM_ICDF(19654), AOM_ICDF(20276), AOM_ICDF(20529), AOM_ICDF(21179),
+          AOM_ICDF(22355), AOM_ICDF(25423), AOM_ICDF(27696), AOM_ICDF(28638),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(11392), AOM_ICDF(12961), AOM_ICDF(20901), AOM_ICDF(21544),
+          AOM_ICDF(22490), AOM_ICDF(22928), AOM_ICDF(23888), AOM_ICDF(25214),
+          AOM_ICDF(25777), AOM_ICDF(28256), AOM_ICDF(29102), AOM_ICDF(30513),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8064), AOM_ICDF(13595), AOM_ICDF(18888), AOM_ICDF(19616),
+          AOM_ICDF(20765), AOM_ICDF(21454), AOM_ICDF(21990), AOM_ICDF(23103),
+          AOM_ICDF(23980), AOM_ICDF(26772), AOM_ICDF(28070), AOM_ICDF(29197),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4352), AOM_ICDF(5059), AOM_ICDF(21705), AOM_ICDF(22099),
+          AOM_ICDF(22703), AOM_ICDF(22846), AOM_ICDF(23679), AOM_ICDF(25469),
+          AOM_ICDF(25728), AOM_ICDF(27919), AOM_ICDF(28484), AOM_ICDF(30215),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10752), AOM_ICDF(12277), AOM_ICDF(16471), AOM_ICDF(18276),
+          AOM_ICDF(19443), AOM_ICDF(19917), AOM_ICDF(21158), AOM_ICDF(23881),
+          AOM_ICDF(24892), AOM_ICDF(27709), AOM_ICDF(28771), AOM_ICDF(30274),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8320), AOM_ICDF(10000), AOM_ICDF(14147), AOM_ICDF(15330),
+          AOM_ICDF(19197), AOM_ICDF(20923), AOM_ICDF(22954), AOM_ICDF(24541),
+          AOM_ICDF(25285), AOM_ICDF(28407), AOM_ICDF(29431), AOM_ICDF(30953),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11264), AOM_ICDF(14751), AOM_ICDF(18517), AOM_ICDF(20285),
+          AOM_ICDF(23172), AOM_ICDF(25970), AOM_ICDF(27312), AOM_ICDF(28684),
+          AOM_ICDF(29803), AOM_ICDF(30242), AOM_ICDF(30412), AOM_ICDF(30668),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(7509), AOM_ICDF(14190), AOM_ICDF(14953),
+          AOM_ICDF(17905), AOM_ICDF(18452), AOM_ICDF(23074), AOM_ICDF(24910),
+          AOM_ICDF(25374), AOM_ICDF(28605), AOM_ICDF(29542), AOM_ICDF(31640),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(7644), AOM_ICDF(15953), AOM_ICDF(17055),
+          AOM_ICDF(17945), AOM_ICDF(18242), AOM_ICDF(19351), AOM_ICDF(24705),
+          AOM_ICDF(25365), AOM_ICDF(28466), AOM_ICDF(29334), AOM_ICDF(31245),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8192), AOM_ICDF(9802), AOM_ICDF(14519), AOM_ICDF(15740),
+          AOM_ICDF(17351), AOM_ICDF(18084), AOM_ICDF(18962), AOM_ICDF(20908),
+          AOM_ICDF(22937), AOM_ICDF(26847), AOM_ICDF(28284), AOM_ICDF(29888),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5888), AOM_ICDF(7534), AOM_ICDF(14635), AOM_ICDF(15436),
+          AOM_ICDF(16710), AOM_ICDF(16830), AOM_ICDF(18000), AOM_ICDF(19760),
+          AOM_ICDF(20571), AOM_ICDF(25777), AOM_ICDF(27649), AOM_ICDF(30668),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5248), AOM_ICDF(7364), AOM_ICDF(14858), AOM_ICDF(15545),
+          AOM_ICDF(16861), AOM_ICDF(17016), AOM_ICDF(17859), AOM_ICDF(19384),
+          AOM_ICDF(20237), AOM_ICDF(25239), AOM_ICDF(27715), AOM_ICDF(29865),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(7989), AOM_ICDF(15472), AOM_ICDF(16265),
+          AOM_ICDF(17271), AOM_ICDF(17334), AOM_ICDF(18563), AOM_ICDF(20327),
+          AOM_ICDF(20916), AOM_ICDF(26173), AOM_ICDF(27350), AOM_ICDF(31034),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(6411), AOM_ICDF(17828), AOM_ICDF(18611),
+          AOM_ICDF(19399), AOM_ICDF(19684), AOM_ICDF(20504), AOM_ICDF(21782),
+          AOM_ICDF(22335), AOM_ICDF(25286), AOM_ICDF(26352), AOM_ICDF(28016),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(10240), AOM_ICDF(12819), AOM_ICDF(15545), AOM_ICDF(18248),
+          AOM_ICDF(19779), AOM_ICDF(20932), AOM_ICDF(21899), AOM_ICDF(23377),
+          AOM_ICDF(25448), AOM_ICDF(28730), AOM_ICDF(29936), AOM_ICDF(31536),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7552), AOM_ICDF(15309), AOM_ICDF(16645), AOM_ICDF(19760),
+          AOM_ICDF(20653), AOM_ICDF(21650), AOM_ICDF(22221), AOM_ICDF(23273),
+          AOM_ICDF(25509), AOM_ICDF(28683), AOM_ICDF(30153), AOM_ICDF(31192),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5248), AOM_ICDF(6840), AOM_ICDF(16129), AOM_ICDF(17940),
+          AOM_ICDF(19069), AOM_ICDF(19660), AOM_ICDF(20588), AOM_ICDF(22760),
+          AOM_ICDF(23927), AOM_ICDF(27538), AOM_ICDF(28397), AOM_ICDF(30725),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11008), AOM_ICDF(11903), AOM_ICDF(13794), AOM_ICDF(21320),
+          AOM_ICDF(21931), AOM_ICDF(22310), AOM_ICDF(22546), AOM_ICDF(25375),
+          AOM_ICDF(27347), AOM_ICDF(29800), AOM_ICDF(30761), AOM_ICDF(31833),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(8678), AOM_ICDF(10313), AOM_ICDF(13073),
+          AOM_ICDF(16823), AOM_ICDF(19980), AOM_ICDF(21520), AOM_ICDF(23242),
+          AOM_ICDF(25344), AOM_ICDF(28797), AOM_ICDF(30405), AOM_ICDF(31940),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(10835), AOM_ICDF(12653), AOM_ICDF(16345),
+          AOM_ICDF(19574), AOM_ICDF(24868), AOM_ICDF(25937), AOM_ICDF(27299),
+          AOM_ICDF(31104), AOM_ICDF(31448), AOM_ICDF(31580), AOM_ICDF(31679),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4992), AOM_ICDF(6458), AOM_ICDF(9945), AOM_ICDF(11961),
+          AOM_ICDF(16136), AOM_ICDF(17677), AOM_ICDF(20946), AOM_ICDF(23042),
+          AOM_ICDF(24475), AOM_ICDF(28304), AOM_ICDF(29748), AOM_ICDF(31791),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(11879), AOM_ICDF(14703), AOM_ICDF(17653),
+          AOM_ICDF(19176), AOM_ICDF(20185), AOM_ICDF(20880), AOM_ICDF(25194),
+          AOM_ICDF(26584), AOM_ICDF(29655), AOM_ICDF(30430), AOM_ICDF(32044),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9856), AOM_ICDF(11385), AOM_ICDF(13457), AOM_ICDF(18705),
+          AOM_ICDF(19577), AOM_ICDF(20266), AOM_ICDF(20746), AOM_ICDF(22207),
+          AOM_ICDF(26724), AOM_ICDF(29431), AOM_ICDF(30645), AOM_ICDF(31604),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(9318), AOM_ICDF(11569), AOM_ICDF(14812),
+          AOM_ICDF(16351), AOM_ICDF(16619), AOM_ICDF(17537), AOM_ICDF(19596),
+          AOM_ICDF(22025), AOM_ICDF(27384), AOM_ICDF(29277), AOM_ICDF(31422),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5888), AOM_ICDF(9348), AOM_ICDF(11416), AOM_ICDF(14690),
+          AOM_ICDF(16254), AOM_ICDF(16633), AOM_ICDF(17457), AOM_ICDF(19031),
+          AOM_ICDF(21875), AOM_ICDF(27080), AOM_ICDF(29442), AOM_ICDF(31193),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(9329), AOM_ICDF(12218), AOM_ICDF(15177),
+          AOM_ICDF(16806), AOM_ICDF(16998), AOM_ICDF(17991), AOM_ICDF(20005),
+          AOM_ICDF(21952), AOM_ICDF(27108), AOM_ICDF(28867), AOM_ICDF(31657),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5120), AOM_ICDF(9098), AOM_ICDF(13132), AOM_ICDF(17701),
+          AOM_ICDF(18739), AOM_ICDF(19534), AOM_ICDF(20415), AOM_ICDF(22136),
+          AOM_ICDF(24213), AOM_ICDF(27199), AOM_ICDF(28504), AOM_ICDF(29960),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(7296), AOM_ICDF(9304), AOM_ICDF(11772), AOM_ICDF(12529),
+          AOM_ICDF(18014), AOM_ICDF(20418), AOM_ICDF(23076), AOM_ICDF(24662),
+          AOM_ICDF(25549), AOM_ICDF(29074), AOM_ICDF(30392), AOM_ICDF(31773),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(11687), AOM_ICDF(13541), AOM_ICDF(14431),
+          AOM_ICDF(18214), AOM_ICDF(20761), AOM_ICDF(22269), AOM_ICDF(23320),
+          AOM_ICDF(24633), AOM_ICDF(28339), AOM_ICDF(30193), AOM_ICDF(31268),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3584), AOM_ICDF(4428), AOM_ICDF(13496), AOM_ICDF(14189),
+          AOM_ICDF(17372), AOM_ICDF(18617), AOM_ICDF(20609), AOM_ICDF(22615),
+          AOM_ICDF(23270), AOM_ICDF(27280), AOM_ICDF(28305), AOM_ICDF(30602),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(8834), AOM_ICDF(10499), AOM_ICDF(14357),
+          AOM_ICDF(17671), AOM_ICDF(19150), AOM_ICDF(20460), AOM_ICDF(23235),
+          AOM_ICDF(24391), AOM_ICDF(28351), AOM_ICDF(29843), AOM_ICDF(31481),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(5888), AOM_ICDF(7093), AOM_ICDF(7902),
+          AOM_ICDF(18290), AOM_ICDF(22123), AOM_ICDF(24511), AOM_ICDF(25532),
+          AOM_ICDF(26360), AOM_ICDF(29653), AOM_ICDF(30954), AOM_ICDF(32215),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7296), AOM_ICDF(10176), AOM_ICDF(11780), AOM_ICDF(12824),
+          AOM_ICDF(19608), AOM_ICDF(25882), AOM_ICDF(28520), AOM_ICDF(29445),
+          AOM_ICDF(31106), AOM_ICDF(31573), AOM_ICDF(31775), AOM_ICDF(31872),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(4833), AOM_ICDF(7551), AOM_ICDF(8449),
+          AOM_ICDF(16668), AOM_ICDF(18614), AOM_ICDF(23952), AOM_ICDF(25668),
+          AOM_ICDF(26721), AOM_ICDF(29888), AOM_ICDF(30697), AOM_ICDF(32090),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(8011), AOM_ICDF(11083), AOM_ICDF(12427),
+          AOM_ICDF(16188), AOM_ICDF(17548), AOM_ICDF(19625), AOM_ICDF(23787),
+          AOM_ICDF(24792), AOM_ICDF(28649), AOM_ICDF(29872), AOM_ICDF(31845),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(9170), AOM_ICDF(10655), AOM_ICDF(12439),
+          AOM_ICDF(15550), AOM_ICDF(18128), AOM_ICDF(19565), AOM_ICDF(21412),
+          AOM_ICDF(23355), AOM_ICDF(28007), AOM_ICDF(30080), AOM_ICDF(31568),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5504), AOM_ICDF(7636), AOM_ICDF(10174), AOM_ICDF(11056),
+          AOM_ICDF(15562), AOM_ICDF(16252), AOM_ICDF(17931), AOM_ICDF(19598),
+          AOM_ICDF(20967), AOM_ICDF(26845), AOM_ICDF(29149), AOM_ICDF(31490),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5248), AOM_ICDF(7810), AOM_ICDF(10004), AOM_ICDF(11015),
+          AOM_ICDF(15359), AOM_ICDF(16310), AOM_ICDF(17834), AOM_ICDF(19185),
+          AOM_ICDF(20903), AOM_ICDF(26728), AOM_ICDF(29585), AOM_ICDF(31478),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(7322), AOM_ICDF(10592), AOM_ICDF(11694),
+          AOM_ICDF(15586), AOM_ICDF(16103), AOM_ICDF(17999), AOM_ICDF(19740),
+          AOM_ICDF(20950), AOM_ICDF(26894), AOM_ICDF(28912), AOM_ICDF(31591),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4608), AOM_ICDF(7276), AOM_ICDF(12153), AOM_ICDF(13388),
+          AOM_ICDF(16091), AOM_ICDF(17970), AOM_ICDF(19548), AOM_ICDF(21175),
+          AOM_ICDF(22481), AOM_ICDF(26543), AOM_ICDF(28212), AOM_ICDF(29908),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(6656), AOM_ICDF(12225), AOM_ICDF(14441), AOM_ICDF(15158),
+          AOM_ICDF(19600), AOM_ICDF(27127), AOM_ICDF(28221), AOM_ICDF(29186),
+          AOM_ICDF(30439), AOM_ICDF(30913), AOM_ICDF(31135), AOM_ICDF(31238),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6400), AOM_ICDF(14608), AOM_ICDF(15920), AOM_ICDF(16643),
+          AOM_ICDF(20149), AOM_ICDF(27328), AOM_ICDF(27896), AOM_ICDF(28672),
+          AOM_ICDF(30227), AOM_ICDF(30778), AOM_ICDF(31053), AOM_ICDF(31120),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(6925), AOM_ICDF(14671), AOM_ICDF(15709),
+          AOM_ICDF(19830), AOM_ICDF(24216), AOM_ICDF(25507), AOM_ICDF(27459),
+          AOM_ICDF(28552), AOM_ICDF(29569), AOM_ICDF(29808), AOM_ICDF(30169),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(13604), AOM_ICDF(15202), AOM_ICDF(17530),
+          AOM_ICDF(20878), AOM_ICDF(24279), AOM_ICDF(25278), AOM_ICDF(28255),
+          AOM_ICDF(30651), AOM_ICDF(31170), AOM_ICDF(31343), AOM_ICDF(31410),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4608), AOM_ICDF(8535), AOM_ICDF(9588), AOM_ICDF(10740),
+          AOM_ICDF(18673), AOM_ICDF(27664), AOM_ICDF(28826), AOM_ICDF(29828),
+          AOM_ICDF(31081), AOM_ICDF(31503), AOM_ICDF(31680), AOM_ICDF(31778),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4864), AOM_ICDF(10543), AOM_ICDF(11313), AOM_ICDF(12197),
+          AOM_ICDF(16785), AOM_ICDF(27858), AOM_ICDF(28556), AOM_ICDF(29480),
+          AOM_ICDF(30892), AOM_ICDF(31486), AOM_ICDF(31722), AOM_ICDF(31787),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3968), AOM_ICDF(7492), AOM_ICDF(10283), AOM_ICDF(11318),
+          AOM_ICDF(18486), AOM_ICDF(24061), AOM_ICDF(26761), AOM_ICDF(28456),
+          AOM_ICDF(30126), AOM_ICDF(30872), AOM_ICDF(31088), AOM_ICDF(31305),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(10246), AOM_ICDF(12999), AOM_ICDF(15083),
+          AOM_ICDF(18769), AOM_ICDF(22398), AOM_ICDF(23584), AOM_ICDF(27098),
+          AOM_ICDF(29574), AOM_ICDF(30609), AOM_ICDF(30898), AOM_ICDF(31200),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7808), AOM_ICDF(13404), AOM_ICDF(14723), AOM_ICDF(16413),
+          AOM_ICDF(20186), AOM_ICDF(24739), AOM_ICDF(25407), AOM_ICDF(27106),
+          AOM_ICDF(29929), AOM_ICDF(30507), AOM_ICDF(30827), AOM_ICDF(30915),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2816), AOM_ICDF(6530), AOM_ICDF(8123), AOM_ICDF(9240),
+          AOM_ICDF(12536), AOM_ICDF(17593), AOM_ICDF(18754), AOM_ICDF(20319),
+          AOM_ICDF(22070), AOM_ICDF(27037), AOM_ICDF(29332), AOM_ICDF(30779),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2432), AOM_ICDF(6577), AOM_ICDF(8010), AOM_ICDF(9215),
+          AOM_ICDF(12657), AOM_ICDF(18898), AOM_ICDF(19588), AOM_ICDF(20953),
+          AOM_ICDF(22766), AOM_ICDF(27231), AOM_ICDF(29927), AOM_ICDF(31109),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3200), AOM_ICDF(6974), AOM_ICDF(9162), AOM_ICDF(10450),
+          AOM_ICDF(13818), AOM_ICDF(17757), AOM_ICDF(19119), AOM_ICDF(20842),
+          AOM_ICDF(22269), AOM_ICDF(27170), AOM_ICDF(29271), AOM_ICDF(30804),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(10689), AOM_ICDF(15307), AOM_ICDF(16589),
+          AOM_ICDF(19738), AOM_ICDF(24416), AOM_ICDF(25332), AOM_ICDF(26541),
+          AOM_ICDF(28634), AOM_ICDF(29711), AOM_ICDF(29913), AOM_ICDF(30116),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(9600), AOM_ICDF(11066), AOM_ICDF(15832), AOM_ICDF(16515),
+          AOM_ICDF(18844), AOM_ICDF(19883), AOM_ICDF(24302), AOM_ICDF(25759),
+          AOM_ICDF(26358), AOM_ICDF(29290), AOM_ICDF(30262), AOM_ICDF(31682),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8832), AOM_ICDF(12814), AOM_ICDF(16171), AOM_ICDF(17041),
+          AOM_ICDF(19066), AOM_ICDF(20145), AOM_ICDF(22933), AOM_ICDF(24074),
+          AOM_ICDF(25006), AOM_ICDF(28115), AOM_ICDF(29722), AOM_ICDF(30991),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(4486), AOM_ICDF(15821), AOM_ICDF(16330),
+          AOM_ICDF(18461), AOM_ICDF(18879), AOM_ICDF(22436), AOM_ICDF(25051),
+          AOM_ICDF(25443), AOM_ICDF(28637), AOM_ICDF(29396), AOM_ICDF(31412),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9856), AOM_ICDF(10863), AOM_ICDF(14050), AOM_ICDF(15920),
+          AOM_ICDF(18783), AOM_ICDF(19531), AOM_ICDF(22502), AOM_ICDF(24577),
+          AOM_ICDF(25361), AOM_ICDF(28559), AOM_ICDF(29600), AOM_ICDF(31336),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(7620), AOM_ICDF(10182), AOM_ICDF(11199),
+          AOM_ICDF(17281), AOM_ICDF(19946), AOM_ICDF(23885), AOM_ICDF(25333),
+          AOM_ICDF(26130), AOM_ICDF(29425), AOM_ICDF(30332), AOM_ICDF(31948),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9728), AOM_ICDF(11821), AOM_ICDF(13954), AOM_ICDF(15233),
+          AOM_ICDF(19855), AOM_ICDF(24478), AOM_ICDF(28675), AOM_ICDF(29878),
+          AOM_ICDF(31238), AOM_ICDF(31741), AOM_ICDF(31874), AOM_ICDF(32048),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5120), AOM_ICDF(5753), AOM_ICDF(9673), AOM_ICDF(10149),
+          AOM_ICDF(14343), AOM_ICDF(15190), AOM_ICDF(24967), AOM_ICDF(26378),
+          AOM_ICDF(26841), AOM_ICDF(29749), AOM_ICDF(30527), AOM_ICDF(32120),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5888), AOM_ICDF(6606), AOM_ICDF(11498), AOM_ICDF(12538),
+          AOM_ICDF(14737), AOM_ICDF(15425), AOM_ICDF(19549), AOM_ICDF(24047),
+          AOM_ICDF(24765), AOM_ICDF(28711), AOM_ICDF(29822), AOM_ICDF(32138),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10368), AOM_ICDF(11757), AOM_ICDF(14126), AOM_ICDF(15474),
+          AOM_ICDF(18311), AOM_ICDF(19358), AOM_ICDF(21539), AOM_ICDF(23451),
+          AOM_ICDF(25034), AOM_ICDF(28791), AOM_ICDF(30035), AOM_ICDF(31280),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(7623), AOM_ICDF(11378), AOM_ICDF(12248),
+          AOM_ICDF(15171), AOM_ICDF(15459), AOM_ICDF(18958), AOM_ICDF(20875),
+          AOM_ICDF(21955), AOM_ICDF(27411), AOM_ICDF(29196), AOM_ICDF(31723),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(7469), AOM_ICDF(11399), AOM_ICDF(12323),
+          AOM_ICDF(15165), AOM_ICDF(15528), AOM_ICDF(18804), AOM_ICDF(20769),
+          AOM_ICDF(21767), AOM_ICDF(27129), AOM_ICDF(29435), AOM_ICDF(31502),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7040), AOM_ICDF(8295), AOM_ICDF(12298), AOM_ICDF(13035),
+          AOM_ICDF(15194), AOM_ICDF(15357), AOM_ICDF(18976), AOM_ICDF(21100),
+          AOM_ICDF(21805), AOM_ICDF(26978), AOM_ICDF(28342), AOM_ICDF(31763),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5632), AOM_ICDF(7465), AOM_ICDF(14220), AOM_ICDF(15035),
+          AOM_ICDF(17014), AOM_ICDF(18105), AOM_ICDF(21111), AOM_ICDF(23027),
+          AOM_ICDF(23934), AOM_ICDF(27207), AOM_ICDF(28293), AOM_ICDF(30330),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(11008), AOM_ICDF(13089), AOM_ICDF(17144), AOM_ICDF(18425),
+          AOM_ICDF(19954), AOM_ICDF(20624), AOM_ICDF(21658), AOM_ICDF(24229),
+          AOM_ICDF(25290), AOM_ICDF(28803), AOM_ICDF(29938), AOM_ICDF(31493),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(14218), AOM_ICDF(16378), AOM_ICDF(17699),
+          AOM_ICDF(18935), AOM_ICDF(19928), AOM_ICDF(20524), AOM_ICDF(22781),
+          AOM_ICDF(24155), AOM_ICDF(27523), AOM_ICDF(29068), AOM_ICDF(30270),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6144), AOM_ICDF(7194), AOM_ICDF(17912), AOM_ICDF(18991),
+          AOM_ICDF(19879), AOM_ICDF(20151), AOM_ICDF(21170), AOM_ICDF(23938),
+          AOM_ICDF(24712), AOM_ICDF(27763), AOM_ICDF(28556), AOM_ICDF(30584),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10496), AOM_ICDF(11614), AOM_ICDF(13652), AOM_ICDF(16928),
+          AOM_ICDF(18425), AOM_ICDF(18967), AOM_ICDF(19724), AOM_ICDF(23817),
+          AOM_ICDF(25594), AOM_ICDF(28685), AOM_ICDF(29734), AOM_ICDF(30941),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7296), AOM_ICDF(8915), AOM_ICDF(11163), AOM_ICDF(13821),
+          AOM_ICDF(16951), AOM_ICDF(18507), AOM_ICDF(20180), AOM_ICDF(22423),
+          AOM_ICDF(24017), AOM_ICDF(28294), AOM_ICDF(29614), AOM_ICDF(31673),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9728), AOM_ICDF(13441), AOM_ICDF(15858), AOM_ICDF(18860),
+          AOM_ICDF(21713), AOM_ICDF(24478), AOM_ICDF(25995), AOM_ICDF(28233),
+          AOM_ICDF(30347), AOM_ICDF(30853), AOM_ICDF(31081), AOM_ICDF(31328),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6400), AOM_ICDF(7480), AOM_ICDF(11482), AOM_ICDF(13206),
+          AOM_ICDF(16199), AOM_ICDF(16908), AOM_ICDF(20436), AOM_ICDF(23507),
+          AOM_ICDF(24650), AOM_ICDF(28360), AOM_ICDF(29438), AOM_ICDF(31532),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9856), AOM_ICDF(10979), AOM_ICDF(13430), AOM_ICDF(15195),
+          AOM_ICDF(15957), AOM_ICDF(16350), AOM_ICDF(16871), AOM_ICDF(26198),
+          AOM_ICDF(26991), AOM_ICDF(29612), AOM_ICDF(30438), AOM_ICDF(31962),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(10529), AOM_ICDF(12640), AOM_ICDF(15350),
+          AOM_ICDF(16987), AOM_ICDF(17859), AOM_ICDF(18590), AOM_ICDF(21400),
+          AOM_ICDF(23812), AOM_ICDF(28188), AOM_ICDF(29589), AOM_ICDF(31280),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(8877), AOM_ICDF(12393), AOM_ICDF(14015),
+          AOM_ICDF(15655), AOM_ICDF(15794), AOM_ICDF(16814), AOM_ICDF(19923),
+          AOM_ICDF(21086), AOM_ICDF(26723), AOM_ICDF(28669), AOM_ICDF(31468),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6400), AOM_ICDF(8900), AOM_ICDF(12241), AOM_ICDF(13828),
+          AOM_ICDF(15513), AOM_ICDF(15671), AOM_ICDF(16500), AOM_ICDF(19257),
+          AOM_ICDF(20456), AOM_ICDF(25984), AOM_ICDF(28658), AOM_ICDF(31017),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7296), AOM_ICDF(8820), AOM_ICDF(12885), AOM_ICDF(14441),
+          AOM_ICDF(15813), AOM_ICDF(15911), AOM_ICDF(16954), AOM_ICDF(20026),
+          AOM_ICDF(20950), AOM_ICDF(26563), AOM_ICDF(28140), AOM_ICDF(31673),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(8455), AOM_ICDF(13328), AOM_ICDF(15907),
+          AOM_ICDF(17026), AOM_ICDF(17464), AOM_ICDF(18267), AOM_ICDF(21436),
+          AOM_ICDF(22712), AOM_ICDF(26403), AOM_ICDF(27660), AOM_ICDF(29559),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(6784), AOM_ICDF(11216), AOM_ICDF(13269), AOM_ICDF(15677),
+          AOM_ICDF(16931), AOM_ICDF(18445), AOM_ICDF(19097), AOM_ICDF(20082),
+          AOM_ICDF(24298), AOM_ICDF(28236), AOM_ICDF(30118), AOM_ICDF(31448),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(13240), AOM_ICDF(14110), AOM_ICDF(16966),
+          AOM_ICDF(17743), AOM_ICDF(18916), AOM_ICDF(19281), AOM_ICDF(19848),
+          AOM_ICDF(25552), AOM_ICDF(28646), AOM_ICDF(30444), AOM_ICDF(31291),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4352), AOM_ICDF(6870), AOM_ICDF(14660), AOM_ICDF(16597),
+          AOM_ICDF(17361), AOM_ICDF(18126), AOM_ICDF(18852), AOM_ICDF(20765),
+          AOM_ICDF(23526), AOM_ICDF(27670), AOM_ICDF(29096), AOM_ICDF(31214),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9472), AOM_ICDF(11736), AOM_ICDF(13172), AOM_ICDF(18192),
+          AOM_ICDF(19070), AOM_ICDF(19651), AOM_ICDF(19991), AOM_ICDF(21793),
+          AOM_ICDF(26005), AOM_ICDF(29291), AOM_ICDF(30500), AOM_ICDF(31767),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(7252), AOM_ICDF(8651), AOM_ICDF(12379),
+          AOM_ICDF(14936), AOM_ICDF(17493), AOM_ICDF(18326), AOM_ICDF(19527),
+          AOM_ICDF(23655), AOM_ICDF(28031), AOM_ICDF(29960), AOM_ICDF(31773),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(11561), AOM_ICDF(12864), AOM_ICDF(15793),
+          AOM_ICDF(18765), AOM_ICDF(23040), AOM_ICDF(23640), AOM_ICDF(24415),
+          AOM_ICDF(31040), AOM_ICDF(31473), AOM_ICDF(31740), AOM_ICDF(31827),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(6825), AOM_ICDF(8810), AOM_ICDF(11269),
+          AOM_ICDF(14257), AOM_ICDF(15716), AOM_ICDF(18397), AOM_ICDF(20006),
+          AOM_ICDF(24020), AOM_ICDF(28230), AOM_ICDF(29780), AOM_ICDF(31773),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(9466), AOM_ICDF(11717), AOM_ICDF(15159),
+          AOM_ICDF(16237), AOM_ICDF(17145), AOM_ICDF(17814), AOM_ICDF(21258),
+          AOM_ICDF(24754), AOM_ICDF(28864), AOM_ICDF(30313), AOM_ICDF(32061),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7680), AOM_ICDF(10517), AOM_ICDF(11381), AOM_ICDF(16202),
+          AOM_ICDF(16809), AOM_ICDF(17425), AOM_ICDF(17774), AOM_ICDF(18764),
+          AOM_ICDF(26842), AOM_ICDF(29600), AOM_ICDF(31073), AOM_ICDF(31886),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4992), AOM_ICDF(8626), AOM_ICDF(10531), AOM_ICDF(13103),
+          AOM_ICDF(14495), AOM_ICDF(14784), AOM_ICDF(15365), AOM_ICDF(16657),
+          AOM_ICDF(21051), AOM_ICDF(27011), AOM_ICDF(29685), AOM_ICDF(31574),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4736), AOM_ICDF(9433), AOM_ICDF(10981), AOM_ICDF(13494),
+          AOM_ICDF(14644), AOM_ICDF(15043), AOM_ICDF(15396), AOM_ICDF(16378),
+          AOM_ICDF(21506), AOM_ICDF(26869), AOM_ICDF(29824), AOM_ICDF(31454),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(9526), AOM_ICDF(11905), AOM_ICDF(14476),
+          AOM_ICDF(15722), AOM_ICDF(16103), AOM_ICDF(16768), AOM_ICDF(18070),
+          AOM_ICDF(21630), AOM_ICDF(27401), AOM_ICDF(29592), AOM_ICDF(31818),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(9947), AOM_ICDF(12386), AOM_ICDF(15909),
+          AOM_ICDF(16496), AOM_ICDF(17397), AOM_ICDF(17866), AOM_ICDF(18927),
+          AOM_ICDF(24408), AOM_ICDF(27750), AOM_ICDF(29614), AOM_ICDF(30889),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(7424), AOM_ICDF(10538), AOM_ICDF(14098), AOM_ICDF(14891),
+          AOM_ICDF(16486), AOM_ICDF(16756), AOM_ICDF(17607), AOM_ICDF(18952),
+          AOM_ICDF(20168), AOM_ICDF(26275), AOM_ICDF(28617), AOM_ICDF(31580),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(13070), AOM_ICDF(14969), AOM_ICDF(15848),
+          AOM_ICDF(17197), AOM_ICDF(17447), AOM_ICDF(17954), AOM_ICDF(18747),
+          AOM_ICDF(20137), AOM_ICDF(25628), AOM_ICDF(28753), AOM_ICDF(30628),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3584), AOM_ICDF(5287), AOM_ICDF(16141), AOM_ICDF(16840),
+          AOM_ICDF(17670), AOM_ICDF(17760), AOM_ICDF(18532), AOM_ICDF(20387),
+          AOM_ICDF(21102), AOM_ICDF(26118), AOM_ICDF(27535), AOM_ICDF(30830),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(9594), AOM_ICDF(11770), AOM_ICDF(14505),
+          AOM_ICDF(16234), AOM_ICDF(16365), AOM_ICDF(17201), AOM_ICDF(20286),
+          AOM_ICDF(22128), AOM_ICDF(27371), AOM_ICDF(29426), AOM_ICDF(31580),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5632), AOM_ICDF(8393), AOM_ICDF(10566), AOM_ICDF(11917),
+          AOM_ICDF(16025), AOM_ICDF(16697), AOM_ICDF(18123), AOM_ICDF(19541),
+          AOM_ICDF(21135), AOM_ICDF(27059), AOM_ICDF(29325), AOM_ICDF(31814),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(7916), AOM_ICDF(9526), AOM_ICDF(11010),
+          AOM_ICDF(14114), AOM_ICDF(18169), AOM_ICDF(19510), AOM_ICDF(21031),
+          AOM_ICDF(23083), AOM_ICDF(27769), AOM_ICDF(29782), AOM_ICDF(31299),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(7338), AOM_ICDF(10657), AOM_ICDF(11699),
+          AOM_ICDF(14780), AOM_ICDF(15070), AOM_ICDF(18291), AOM_ICDF(20170),
+          AOM_ICDF(21347), AOM_ICDF(26985), AOM_ICDF(28811), AOM_ICDF(31805),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5632), AOM_ICDF(7669), AOM_ICDF(11558), AOM_ICDF(12653),
+          AOM_ICDF(13962), AOM_ICDF(14116), AOM_ICDF(15074), AOM_ICDF(19886),
+          AOM_ICDF(21123), AOM_ICDF(26953), AOM_ICDF(28755), AOM_ICDF(31708),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(9739), AOM_ICDF(11612), AOM_ICDF(13211),
+          AOM_ICDF(14992), AOM_ICDF(15237), AOM_ICDF(16016), AOM_ICDF(17677),
+          AOM_ICDF(20588), AOM_ICDF(26647), AOM_ICDF(29116), AOM_ICDF(31435),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(8346), AOM_ICDF(11022), AOM_ICDF(11976),
+          AOM_ICDF(13541), AOM_ICDF(13749), AOM_ICDF(14520), AOM_ICDF(16173),
+          AOM_ICDF(17567), AOM_ICDF(25182), AOM_ICDF(28111), AOM_ICDF(31591),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4736), AOM_ICDF(8894), AOM_ICDF(11294), AOM_ICDF(12220),
+          AOM_ICDF(13753), AOM_ICDF(14029), AOM_ICDF(14645), AOM_ICDF(16065),
+          AOM_ICDF(17621), AOM_ICDF(24911), AOM_ICDF(28655), AOM_ICDF(31344),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(8193), AOM_ICDF(11667), AOM_ICDF(12461),
+          AOM_ICDF(13880), AOM_ICDF(14040), AOM_ICDF(14946), AOM_ICDF(16537),
+          AOM_ICDF(17642), AOM_ICDF(25117), AOM_ICDF(27333), AOM_ICDF(31713),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4096), AOM_ICDF(8479), AOM_ICDF(13751), AOM_ICDF(14813),
+          AOM_ICDF(15994), AOM_ICDF(16157), AOM_ICDF(16905), AOM_ICDF(18314),
+          AOM_ICDF(19575), AOM_ICDF(25132), AOM_ICDF(27445), AOM_ICDF(30192),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(7936), AOM_ICDF(12263), AOM_ICDF(15558), AOM_ICDF(16331),
+          AOM_ICDF(17779), AOM_ICDF(18148), AOM_ICDF(18810), AOM_ICDF(19794),
+          AOM_ICDF(21046), AOM_ICDF(26644), AOM_ICDF(29417), AOM_ICDF(31507),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(15025), AOM_ICDF(16457), AOM_ICDF(17074),
+          AOM_ICDF(18079), AOM_ICDF(18299), AOM_ICDF(18648), AOM_ICDF(19240),
+          AOM_ICDF(20612), AOM_ICDF(25687), AOM_ICDF(29392), AOM_ICDF(30842),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(6037), AOM_ICDF(17465), AOM_ICDF(18089),
+          AOM_ICDF(18869), AOM_ICDF(18953), AOM_ICDF(19688), AOM_ICDF(21223),
+          AOM_ICDF(21816), AOM_ICDF(26562), AOM_ICDF(28195), AOM_ICDF(30621),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(11255), AOM_ICDF(13307), AOM_ICDF(15676),
+          AOM_ICDF(17392), AOM_ICDF(17603), AOM_ICDF(18268), AOM_ICDF(20783),
+          AOM_ICDF(22646), AOM_ICDF(27628), AOM_ICDF(29737), AOM_ICDF(31628),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(9119), AOM_ICDF(11015), AOM_ICDF(12269),
+          AOM_ICDF(16280), AOM_ICDF(17023), AOM_ICDF(18282), AOM_ICDF(19418),
+          AOM_ICDF(21325), AOM_ICDF(27309), AOM_ICDF(30004), AOM_ICDF(31818),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3968), AOM_ICDF(9094), AOM_ICDF(10606), AOM_ICDF(12007),
+          AOM_ICDF(14218), AOM_ICDF(18911), AOM_ICDF(20089), AOM_ICDF(20924),
+          AOM_ICDF(23587), AOM_ICDF(27808), AOM_ICDF(30253), AOM_ICDF(31305),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(8627), AOM_ICDF(11201), AOM_ICDF(12200),
+          AOM_ICDF(15305), AOM_ICDF(15671), AOM_ICDF(18639), AOM_ICDF(20185),
+          AOM_ICDF(21627), AOM_ICDF(26990), AOM_ICDF(29449), AOM_ICDF(31723),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6272), AOM_ICDF(8768), AOM_ICDF(12320), AOM_ICDF(13296),
+          AOM_ICDF(14643), AOM_ICDF(14970), AOM_ICDF(15760), AOM_ICDF(20545),
+          AOM_ICDF(21863), AOM_ICDF(27473), AOM_ICDF(29535), AOM_ICDF(31836),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(10905), AOM_ICDF(12656), AOM_ICDF(14084),
+          AOM_ICDF(15705), AOM_ICDF(16069), AOM_ICDF(16674), AOM_ICDF(17779),
+          AOM_ICDF(21041), AOM_ICDF(26586), AOM_ICDF(29539), AOM_ICDF(31253),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5248), AOM_ICDF(9672), AOM_ICDF(12113), AOM_ICDF(12871),
+          AOM_ICDF(14423), AOM_ICDF(14710), AOM_ICDF(15376), AOM_ICDF(16708),
+          AOM_ICDF(18092), AOM_ICDF(25260), AOM_ICDF(28991), AOM_ICDF(31585),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4736), AOM_ICDF(10789), AOM_ICDF(13029), AOM_ICDF(13750),
+          AOM_ICDF(15040), AOM_ICDF(15385), AOM_ICDF(15840), AOM_ICDF(16887),
+          AOM_ICDF(18393), AOM_ICDF(25230), AOM_ICDF(29558), AOM_ICDF(31454),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(9916), AOM_ICDF(12938), AOM_ICDF(13741),
+          AOM_ICDF(15030), AOM_ICDF(15297), AOM_ICDF(16116), AOM_ICDF(17333),
+          AOM_ICDF(18672), AOM_ICDF(25954), AOM_ICDF(28498), AOM_ICDF(31618),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4608), AOM_ICDF(10266), AOM_ICDF(15450), AOM_ICDF(16299),
+          AOM_ICDF(17114), AOM_ICDF(17288), AOM_ICDF(17775), AOM_ICDF(18835),
+          AOM_ICDF(20227), AOM_ICDF(25199), AOM_ICDF(28098), AOM_ICDF(30018),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(7296), AOM_ICDF(9951), AOM_ICDF(14124), AOM_ICDF(14806),
+          AOM_ICDF(16181), AOM_ICDF(16377), AOM_ICDF(17485), AOM_ICDF(19069),
+          AOM_ICDF(20078), AOM_ICDF(26051), AOM_ICDF(27777), AOM_ICDF(31574),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(13823), AOM_ICDF(15889), AOM_ICDF(16620),
+          AOM_ICDF(17709), AOM_ICDF(17881), AOM_ICDF(18327), AOM_ICDF(19140),
+          AOM_ICDF(20374), AOM_ICDF(25685), AOM_ICDF(28160), AOM_ICDF(30521),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3200), AOM_ICDF(4602), AOM_ICDF(16404), AOM_ICDF(17042),
+          AOM_ICDF(17780), AOM_ICDF(17829), AOM_ICDF(18706), AOM_ICDF(20608),
+          AOM_ICDF(21115), AOM_ICDF(25884), AOM_ICDF(26960), AOM_ICDF(30804),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7040), AOM_ICDF(9444), AOM_ICDF(11770), AOM_ICDF(14321),
+          AOM_ICDF(15951), AOM_ICDF(16074), AOM_ICDF(17033), AOM_ICDF(20352),
+          AOM_ICDF(22301), AOM_ICDF(27567), AOM_ICDF(29151), AOM_ICDF(31662),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6016), AOM_ICDF(8316), AOM_ICDF(10849), AOM_ICDF(12136),
+          AOM_ICDF(15860), AOM_ICDF(16430), AOM_ICDF(17935), AOM_ICDF(19659),
+          AOM_ICDF(21083), AOM_ICDF(26968), AOM_ICDF(28839), AOM_ICDF(31618),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(7472), AOM_ICDF(9436), AOM_ICDF(11038),
+          AOM_ICDF(13625), AOM_ICDF(17596), AOM_ICDF(18959), AOM_ICDF(20543),
+          AOM_ICDF(22879), AOM_ICDF(27487), AOM_ICDF(29351), AOM_ICDF(31186),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(7117), AOM_ICDF(11424), AOM_ICDF(12381),
+          AOM_ICDF(14823), AOM_ICDF(15053), AOM_ICDF(18656), AOM_ICDF(20818),
+          AOM_ICDF(21722), AOM_ICDF(27042), AOM_ICDF(28233), AOM_ICDF(31591),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(7281), AOM_ICDF(11910), AOM_ICDF(12912),
+          AOM_ICDF(14229), AOM_ICDF(14391), AOM_ICDF(15474), AOM_ICDF(20113),
+          AOM_ICDF(21128), AOM_ICDF(26627), AOM_ICDF(28077), AOM_ICDF(31713),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(9452), AOM_ICDF(11526), AOM_ICDF(13288),
+          AOM_ICDF(14861), AOM_ICDF(15062), AOM_ICDF(15909), AOM_ICDF(17695),
+          AOM_ICDF(20429), AOM_ICDF(26225), AOM_ICDF(28603), AOM_ICDF(31340),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5376), AOM_ICDF(7722), AOM_ICDF(10921), AOM_ICDF(11813),
+          AOM_ICDF(13222), AOM_ICDF(13348), AOM_ICDF(14211), AOM_ICDF(15976),
+          AOM_ICDF(17110), AOM_ICDF(24634), AOM_ICDF(27176), AOM_ICDF(31484),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4736), AOM_ICDF(8226), AOM_ICDF(11137), AOM_ICDF(11988),
+          AOM_ICDF(13518), AOM_ICDF(13706), AOM_ICDF(14332), AOM_ICDF(16016),
+          AOM_ICDF(17301), AOM_ICDF(24641), AOM_ICDF(27704), AOM_ICDF(31016),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(7592), AOM_ICDF(11880), AOM_ICDF(12612),
+          AOM_ICDF(13738), AOM_ICDF(13813), AOM_ICDF(14681), AOM_ICDF(16392),
+          AOM_ICDF(17306), AOM_ICDF(24619), AOM_ICDF(26334), AOM_ICDF(31818),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4096), AOM_ICDF(8524), AOM_ICDF(14316), AOM_ICDF(15392),
+          AOM_ICDF(16295), AOM_ICDF(16433), AOM_ICDF(17197), AOM_ICDF(18718),
+          AOM_ICDF(19924), AOM_ICDF(25123), AOM_ICDF(26953), AOM_ICDF(29856),
+          AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(6528), AOM_ICDF(13383), AOM_ICDF(17642), AOM_ICDF(18342),
+          AOM_ICDF(19224), AOM_ICDF(20209), AOM_ICDF(20899), AOM_ICDF(21944),
+          AOM_ICDF(23137), AOM_ICDF(25966), AOM_ICDF(27429), AOM_ICDF(28463),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4480), AOM_ICDF(16901), AOM_ICDF(18876), AOM_ICDF(19560),
+          AOM_ICDF(20257), AOM_ICDF(20912), AOM_ICDF(21169), AOM_ICDF(21959),
+          AOM_ICDF(23036), AOM_ICDF(25781), AOM_ICDF(27676), AOM_ICDF(28569),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2688), AOM_ICDF(5337), AOM_ICDF(18178), AOM_ICDF(18829),
+          AOM_ICDF(19344), AOM_ICDF(19628), AOM_ICDF(20267), AOM_ICDF(22135),
+          AOM_ICDF(22671), AOM_ICDF(25817), AOM_ICDF(26914), AOM_ICDF(28773),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8192), AOM_ICDF(11378), AOM_ICDF(14742), AOM_ICDF(17269),
+          AOM_ICDF(18230), AOM_ICDF(19001), AOM_ICDF(19655), AOM_ICDF(22949),
+          AOM_ICDF(24337), AOM_ICDF(28025), AOM_ICDF(29503), AOM_ICDF(30848),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5120), AOM_ICDF(10133), AOM_ICDF(13144), AOM_ICDF(14374),
+          AOM_ICDF(17020), AOM_ICDF(18920), AOM_ICDF(20235), AOM_ICDF(21677),
+          AOM_ICDF(23142), AOM_ICDF(27131), AOM_ICDF(28671), AOM_ICDF(30284),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(15588), AOM_ICDF(18431), AOM_ICDF(19723),
+          AOM_ICDF(21455), AOM_ICDF(24705), AOM_ICDF(25461), AOM_ICDF(26753),
+          AOM_ICDF(28923), AOM_ICDF(29475), AOM_ICDF(29729), AOM_ICDF(29897),
+          AOM_ICDF(32768), 0,
+      },
       {
-          { AOM_ICDF(14208), AOM_ICDF(16238), AOM_ICDF(19079), AOM_ICDF(22512),
-            AOM_ICDF(23632), AOM_ICDF(24373), AOM_ICDF(25291), AOM_ICDF(26109),
-            AOM_ICDF(26811), AOM_ICDF(27858), AOM_ICDF(30428), AOM_ICDF(31424),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10496), AOM_ICDF(13193), AOM_ICDF(20992), AOM_ICDF(22569),
-            AOM_ICDF(23557), AOM_ICDF(24442), AOM_ICDF(25515), AOM_ICDF(26478),
-            AOM_ICDF(26994), AOM_ICDF(27693), AOM_ICDF(30349), AOM_ICDF(31757),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(8252), AOM_ICDF(9593), AOM_ICDF(22972),
-            AOM_ICDF(23813), AOM_ICDF(24168), AOM_ICDF(24638), AOM_ICDF(25019),
-            AOM_ICDF(26048), AOM_ICDF(27413), AOM_ICDF(30090), AOM_ICDF(30812),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12544), AOM_ICDF(14045), AOM_ICDF(16678), AOM_ICDF(19167),
-            AOM_ICDF(20459), AOM_ICDF(21329), AOM_ICDF(23518), AOM_ICDF(24783),
-            AOM_ICDF(25563), AOM_ICDF(27280), AOM_ICDF(30217), AOM_ICDF(31273),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7552), AOM_ICDF(8636), AOM_ICDF(10993), AOM_ICDF(12992),
-            AOM_ICDF(18616), AOM_ICDF(21880), AOM_ICDF(23113), AOM_ICDF(23867),
-            AOM_ICDF(25710), AOM_ICDF(26758), AOM_ICDF(30115), AOM_ICDF(31328),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11008), AOM_ICDF(12708), AOM_ICDF(16704), AOM_ICDF(18234),
-            AOM_ICDF(21591), AOM_ICDF(26744), AOM_ICDF(28368), AOM_ICDF(30104),
-            AOM_ICDF(31270), AOM_ICDF(32171), AOM_ICDF(32539), AOM_ICDF(32669),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(7307), AOM_ICDF(8998), AOM_ICDF(12979),
-            AOM_ICDF(18143), AOM_ICDF(19397), AOM_ICDF(20233), AOM_ICDF(20772),
-            AOM_ICDF(25645), AOM_ICDF(26869), AOM_ICDF(30049), AOM_ICDF(30984),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8192), AOM_ICDF(9536), AOM_ICDF(11533), AOM_ICDF(15940),
-            AOM_ICDF(17403), AOM_ICDF(18169), AOM_ICDF(19253), AOM_ICDF(20045),
-            AOM_ICDF(21337), AOM_ICDF(25847), AOM_ICDF(29551), AOM_ICDF(30682),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10752), AOM_ICDF(12558), AOM_ICDF(15005), AOM_ICDF(16854),
-            AOM_ICDF(18148), AOM_ICDF(19307), AOM_ICDF(21410), AOM_ICDF(23939),
-            AOM_ICDF(24698), AOM_ICDF(26117), AOM_ICDF(29832), AOM_ICDF(31323),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(9008), AOM_ICDF(11885), AOM_ICDF(14829),
-            AOM_ICDF(16543), AOM_ICDF(16779), AOM_ICDF(17841), AOM_ICDF(19182),
-            AOM_ICDF(20190), AOM_ICDF(21664), AOM_ICDF(27650), AOM_ICDF(29909),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(8476), AOM_ICDF(12841), AOM_ICDF(15721),
-            AOM_ICDF(17437), AOM_ICDF(17823), AOM_ICDF(18874), AOM_ICDF(20394),
-            AOM_ICDF(21216), AOM_ICDF(22344), AOM_ICDF(27922), AOM_ICDF(30743),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(10114), AOM_ICDF(12415), AOM_ICDF(15730),
-            AOM_ICDF(17127), AOM_ICDF(17265), AOM_ICDF(18294), AOM_ICDF(19255),
-            AOM_ICDF(20258), AOM_ICDF(21675), AOM_ICDF(27525), AOM_ICDF(29082),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(12586), AOM_ICDF(15818), AOM_ICDF(21751),
-            AOM_ICDF(22707), AOM_ICDF(23300), AOM_ICDF(24262), AOM_ICDF(25126),
-            AOM_ICDF(25992), AOM_ICDF(27448), AOM_ICDF(30004), AOM_ICDF(31073),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(3968), AOM_ICDF(6893), AOM_ICDF(20538), AOM_ICDF(22050),
-            AOM_ICDF(22805), AOM_ICDF(24408), AOM_ICDF(24833), AOM_ICDF(26073),
-            AOM_ICDF(26439), AOM_ICDF(26884), AOM_ICDF(29895), AOM_ICDF(31938),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3072), AOM_ICDF(6204), AOM_ICDF(24363), AOM_ICDF(24995),
-            AOM_ICDF(25363), AOM_ICDF(26103), AOM_ICDF(26546), AOM_ICDF(27518),
-            AOM_ICDF(27621), AOM_ICDF(27902), AOM_ICDF(30164), AOM_ICDF(32148),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2560), AOM_ICDF(6572), AOM_ICDF(13837), AOM_ICDF(19693),
-            AOM_ICDF(20377), AOM_ICDF(21010), AOM_ICDF(21699), AOM_ICDF(22737),
-            AOM_ICDF(23286), AOM_ICDF(24323), AOM_ICDF(28875), AOM_ICDF(30837),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(8831), AOM_ICDF(17059), AOM_ICDF(18404),
-            AOM_ICDF(19221), AOM_ICDF(20434), AOM_ICDF(22313), AOM_ICDF(24151),
-            AOM_ICDF(24420), AOM_ICDF(25855), AOM_ICDF(29474), AOM_ICDF(31623),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2944), AOM_ICDF(4808), AOM_ICDF(14965), AOM_ICDF(15870),
-            AOM_ICDF(18714), AOM_ICDF(21989), AOM_ICDF(22957), AOM_ICDF(24528),
-            AOM_ICDF(25365), AOM_ICDF(26001), AOM_ICDF(29596), AOM_ICDF(31678),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4352), AOM_ICDF(6239), AOM_ICDF(19711), AOM_ICDF(20602),
-            AOM_ICDF(22489), AOM_ICDF(27311), AOM_ICDF(28228), AOM_ICDF(30516),
-            AOM_ICDF(31097), AOM_ICDF(31750), AOM_ICDF(32319), AOM_ICDF(32656),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2944), AOM_ICDF(4925), AOM_ICDF(13952), AOM_ICDF(15490),
-            AOM_ICDF(18397), AOM_ICDF(20200), AOM_ICDF(20986), AOM_ICDF(22367),
-            AOM_ICDF(24967), AOM_ICDF(25820), AOM_ICDF(29755), AOM_ICDF(31473),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4224), AOM_ICDF(6120), AOM_ICDF(14968), AOM_ICDF(17184),
-            AOM_ICDF(18063), AOM_ICDF(19140), AOM_ICDF(20258), AOM_ICDF(21822),
-            AOM_ICDF(22463), AOM_ICDF(24838), AOM_ICDF(28989), AOM_ICDF(31277),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(7280), AOM_ICDF(17535), AOM_ICDF(18348),
-            AOM_ICDF(19116), AOM_ICDF(20689), AOM_ICDF(21916), AOM_ICDF(24968),
-            AOM_ICDF(25242), AOM_ICDF(26095), AOM_ICDF(29588), AOM_ICDF(31787),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2816), AOM_ICDF(4922), AOM_ICDF(17105), AOM_ICDF(18458),
-            AOM_ICDF(19325), AOM_ICDF(19614), AOM_ICDF(20231), AOM_ICDF(21700),
-            AOM_ICDF(22089), AOM_ICDF(22756), AOM_ICDF(27879), AOM_ICDF(31278),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2560), AOM_ICDF(4920), AOM_ICDF(18518), AOM_ICDF(19680),
-            AOM_ICDF(20386), AOM_ICDF(20689), AOM_ICDF(21208), AOM_ICDF(22472),
-            AOM_ICDF(22754), AOM_ICDF(23223), AOM_ICDF(27809), AOM_ICDF(31664),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3328), AOM_ICDF(5513), AOM_ICDF(17331), AOM_ICDF(19065),
-            AOM_ICDF(19882), AOM_ICDF(20105), AOM_ICDF(20748), AOM_ICDF(22110),
-            AOM_ICDF(22443), AOM_ICDF(23129), AOM_ICDF(28099), AOM_ICDF(30944),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2560), AOM_ICDF(6690), AOM_ICDF(20748), AOM_ICDF(22590),
-            AOM_ICDF(23037), AOM_ICDF(23659), AOM_ICDF(24406), AOM_ICDF(25582),
-            AOM_ICDF(25835), AOM_ICDF(26485), AOM_ICDF(29553), AOM_ICDF(31826),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(11392), AOM_ICDF(13647), AOM_ICDF(15216), AOM_ICDF(23156),
-            AOM_ICDF(24102), AOM_ICDF(24540), AOM_ICDF(25183), AOM_ICDF(25746),
-            AOM_ICDF(26706), AOM_ICDF(28032), AOM_ICDF(30511), AOM_ICDF(31357),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8064), AOM_ICDF(11635), AOM_ICDF(17166), AOM_ICDF(22459),
-            AOM_ICDF(23608), AOM_ICDF(24297), AOM_ICDF(25025), AOM_ICDF(25902),
-            AOM_ICDF(26438), AOM_ICDF(27551), AOM_ICDF(30343), AOM_ICDF(31641),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4352), AOM_ICDF(6905), AOM_ICDF(7612), AOM_ICDF(24258),
-            AOM_ICDF(24862), AOM_ICDF(25005), AOM_ICDF(25399), AOM_ICDF(25658),
-            AOM_ICDF(26491), AOM_ICDF(28281), AOM_ICDF(30472), AOM_ICDF(31037),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10752), AOM_ICDF(13246), AOM_ICDF(14771), AOM_ICDF(18965),
-            AOM_ICDF(20132), AOM_ICDF(20606), AOM_ICDF(22411), AOM_ICDF(23422),
-            AOM_ICDF(24663), AOM_ICDF(27386), AOM_ICDF(30203), AOM_ICDF(31265),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8320), AOM_ICDF(10135), AOM_ICDF(11815), AOM_ICDF(15962),
-            AOM_ICDF(19829), AOM_ICDF(21555), AOM_ICDF(22738), AOM_ICDF(23482),
-            AOM_ICDF(25513), AOM_ICDF(27100), AOM_ICDF(30222), AOM_ICDF(31246),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11264), AOM_ICDF(13364), AOM_ICDF(16851), AOM_ICDF(20617),
-            AOM_ICDF(23504), AOM_ICDF(26302), AOM_ICDF(28070), AOM_ICDF(29189),
-            AOM_ICDF(30531), AOM_ICDF(31903), AOM_ICDF(32342), AOM_ICDF(32512),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(7656), AOM_ICDF(8637), AOM_ICDF(15318),
-            AOM_ICDF(18270), AOM_ICDF(18817), AOM_ICDF(19580), AOM_ICDF(20044),
-            AOM_ICDF(24666), AOM_ICDF(26502), AOM_ICDF(29733), AOM_ICDF(30670),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(8307), AOM_ICDF(9167), AOM_ICDF(17476),
-            AOM_ICDF(18366), AOM_ICDF(18663), AOM_ICDF(19765), AOM_ICDF(20425),
-            AOM_ICDF(21534), AOM_ICDF(26888), AOM_ICDF(29989), AOM_ICDF(30857),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8192), AOM_ICDF(11072), AOM_ICDF(12682), AOM_ICDF(17399),
-            AOM_ICDF(19010), AOM_ICDF(19743), AOM_ICDF(20964), AOM_ICDF(22993),
-            AOM_ICDF(23871), AOM_ICDF(25817), AOM_ICDF(29727), AOM_ICDF(31164),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5888), AOM_ICDF(7988), AOM_ICDF(9634), AOM_ICDF(16735),
-            AOM_ICDF(18009), AOM_ICDF(18129), AOM_ICDF(18930), AOM_ICDF(19741),
-            AOM_ICDF(20911), AOM_ICDF(22671), AOM_ICDF(27877), AOM_ICDF(29749),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5248), AOM_ICDF(8151), AOM_ICDF(10267), AOM_ICDF(17761),
-            AOM_ICDF(19077), AOM_ICDF(19232), AOM_ICDF(19919), AOM_ICDF(20772),
-            AOM_ICDF(21615), AOM_ICDF(23140), AOM_ICDF(28142), AOM_ICDF(30618),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(8390), AOM_ICDF(9723), AOM_ICDF(17206),
-            AOM_ICDF(18212), AOM_ICDF(18275), AOM_ICDF(19068), AOM_ICDF(19657),
-            AOM_ICDF(20886), AOM_ICDF(22650), AOM_ICDF(27907), AOM_ICDF(29084),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(9232), AOM_ICDF(11163), AOM_ICDF(22580),
-            AOM_ICDF(23368), AOM_ICDF(23653), AOM_ICDF(24436), AOM_ICDF(24989),
-            AOM_ICDF(25809), AOM_ICDF(27087), AOM_ICDF(30038), AOM_ICDF(31104),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(10240), AOM_ICDF(11472), AOM_ICDF(14051), AOM_ICDF(16777),
-            AOM_ICDF(18308), AOM_ICDF(19461), AOM_ICDF(22164), AOM_ICDF(24235),
-            AOM_ICDF(25202), AOM_ICDF(26680), AOM_ICDF(29962), AOM_ICDF(31168),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7552), AOM_ICDF(9128), AOM_ICDF(16885), AOM_ICDF(18221),
-            AOM_ICDF(19114), AOM_ICDF(20111), AOM_ICDF(23226), AOM_ICDF(25462),
-            AOM_ICDF(26033), AOM_ICDF(27085), AOM_ICDF(30259), AOM_ICDF(31729),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5248), AOM_ICDF(7291), AOM_ICDF(8883), AOM_ICDF(18172),
-            AOM_ICDF(19301), AOM_ICDF(19892), AOM_ICDF(21703), AOM_ICDF(22870),
-            AOM_ICDF(23798), AOM_ICDF(25970), AOM_ICDF(29581), AOM_ICDF(30440),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11008), AOM_ICDF(11943), AOM_ICDF(12838), AOM_ICDF(14729),
-            AOM_ICDF(15340), AOM_ICDF(15719), AOM_ICDF(23245), AOM_ICDF(25217),
-            AOM_ICDF(25453), AOM_ICDF(28282), AOM_ICDF(30735), AOM_ICDF(31696),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(7100), AOM_ICDF(9506), AOM_ICDF(11141),
-            AOM_ICDF(14891), AOM_ICDF(18048), AOM_ICDF(20808), AOM_ICDF(22910),
-            AOM_ICDF(24450), AOM_ICDF(26172), AOM_ICDF(29625), AOM_ICDF(31233),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(8513), AOM_ICDF(11924), AOM_ICDF(13742),
-            AOM_ICDF(16971), AOM_ICDF(22265), AOM_ICDF(25957), AOM_ICDF(29762),
-            AOM_ICDF(30831), AOM_ICDF(32193), AOM_ICDF(32537), AOM_ICDF(32669),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4992), AOM_ICDF(5969), AOM_ICDF(7435), AOM_ICDF(10922),
-            AOM_ICDF(15097), AOM_ICDF(16638), AOM_ICDF(18654), AOM_ICDF(20087),
-            AOM_ICDF(23356), AOM_ICDF(25452), AOM_ICDF(29281), AOM_ICDF(30725),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9600), AOM_ICDF(10324), AOM_ICDF(12603), AOM_ICDF(15427),
-            AOM_ICDF(16950), AOM_ICDF(17959), AOM_ICDF(20909), AOM_ICDF(22299),
-            AOM_ICDF(22994), AOM_ICDF(27308), AOM_ICDF(30379), AOM_ICDF(31154),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9856), AOM_ICDF(11020), AOM_ICDF(12549), AOM_ICDF(14621),
-            AOM_ICDF(15493), AOM_ICDF(16182), AOM_ICDF(21430), AOM_ICDF(25947),
-            AOM_ICDF(26427), AOM_ICDF(27888), AOM_ICDF(30595), AOM_ICDF(31809),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(7618), AOM_ICDF(10664), AOM_ICDF(12915),
-            AOM_ICDF(14454), AOM_ICDF(14722), AOM_ICDF(17965), AOM_ICDF(20394),
-            AOM_ICDF(21312), AOM_ICDF(23371), AOM_ICDF(28730), AOM_ICDF(30623),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5888), AOM_ICDF(7463), AOM_ICDF(10923), AOM_ICDF(12991),
-            AOM_ICDF(14555), AOM_ICDF(14934), AOM_ICDF(18208), AOM_ICDF(21052),
-            AOM_ICDF(21876), AOM_ICDF(23450), AOM_ICDF(28655), AOM_ICDF(31017),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(8023), AOM_ICDF(10440), AOM_ICDF(13329),
-            AOM_ICDF(14958), AOM_ICDF(15150), AOM_ICDF(18109), AOM_ICDF(20056),
-            AOM_ICDF(21049), AOM_ICDF(23063), AOM_ICDF(28219), AOM_ICDF(29978),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(7928), AOM_ICDF(11906), AOM_ICDF(15940),
-            AOM_ICDF(16978), AOM_ICDF(17773), AOM_ICDF(22342), AOM_ICDF(24419),
-            AOM_ICDF(25300), AOM_ICDF(27021), AOM_ICDF(30007), AOM_ICDF(31312),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(7296), AOM_ICDF(8291), AOM_ICDF(10299), AOM_ICDF(12767),
-            AOM_ICDF(18252), AOM_ICDF(20656), AOM_ICDF(21413), AOM_ICDF(22300),
-            AOM_ICDF(24958), AOM_ICDF(26544), AOM_ICDF(30069), AOM_ICDF(31387),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(8668), AOM_ICDF(13187), AOM_ICDF(15041),
-            AOM_ICDF(18824), AOM_ICDF(21371), AOM_ICDF(22261), AOM_ICDF(23574),
-            AOM_ICDF(25082), AOM_ICDF(26133), AOM_ICDF(29839), AOM_ICDF(31693),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3584), AOM_ICDF(5750), AOM_ICDF(6594), AOM_ICDF(15662),
-            AOM_ICDF(18845), AOM_ICDF(20090), AOM_ICDF(20783), AOM_ICDF(21438),
-            AOM_ICDF(23430), AOM_ICDF(25436), AOM_ICDF(29446), AOM_ICDF(30471),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(8711), AOM_ICDF(10121), AOM_ICDF(11786),
-            AOM_ICDF(15100), AOM_ICDF(16579), AOM_ICDF(20437), AOM_ICDF(21593),
-            AOM_ICDF(22903), AOM_ICDF(25678), AOM_ICDF(29638), AOM_ICDF(31130),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(5033), AOM_ICDF(6441), AOM_ICDF(7646),
-            AOM_ICDF(18034), AOM_ICDF(21867), AOM_ICDF(22676), AOM_ICDF(23504),
-            AOM_ICDF(25892), AOM_ICDF(26913), AOM_ICDF(30206), AOM_ICDF(31507),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7296), AOM_ICDF(8192), AOM_ICDF(11072), AOM_ICDF(12676),
-            AOM_ICDF(19460), AOM_ICDF(25734), AOM_ICDF(26778), AOM_ICDF(28439),
-            AOM_ICDF(31077), AOM_ICDF(32002), AOM_ICDF(32469), AOM_ICDF(32671),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(4518), AOM_ICDF(5511), AOM_ICDF(8229),
-            AOM_ICDF(16448), AOM_ICDF(18394), AOM_ICDF(19292), AOM_ICDF(20345),
-            AOM_ICDF(25683), AOM_ICDF(27399), AOM_ICDF(30566), AOM_ICDF(31375),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(7451), AOM_ICDF(8934), AOM_ICDF(12006),
-            AOM_ICDF(15767), AOM_ICDF(17127), AOM_ICDF(18471), AOM_ICDF(19476),
-            AOM_ICDF(21553), AOM_ICDF(25715), AOM_ICDF(29572), AOM_ICDF(30795),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(8368), AOM_ICDF(10370), AOM_ICDF(11855),
-            AOM_ICDF(14966), AOM_ICDF(17544), AOM_ICDF(19328), AOM_ICDF(21271),
-            AOM_ICDF(22708), AOM_ICDF(24555), AOM_ICDF(29207), AOM_ICDF(31280),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5504), AOM_ICDF(6782), AOM_ICDF(8914), AOM_ICDF(11452),
-            AOM_ICDF(15958), AOM_ICDF(16648), AOM_ICDF(17530), AOM_ICDF(18899),
-            AOM_ICDF(20578), AOM_ICDF(22245), AOM_ICDF(28123), AOM_ICDF(30427),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5248), AOM_ICDF(6538), AOM_ICDF(9100), AOM_ICDF(11294),
-            AOM_ICDF(15638), AOM_ICDF(16589), AOM_ICDF(17600), AOM_ICDF(19318),
-            AOM_ICDF(20842), AOM_ICDF(22193), AOM_ICDF(28018), AOM_ICDF(30875),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(6553), AOM_ICDF(8499), AOM_ICDF(11769),
-            AOM_ICDF(15661), AOM_ICDF(16178), AOM_ICDF(17280), AOM_ICDF(18490),
-            AOM_ICDF(20386), AOM_ICDF(22127), AOM_ICDF(28071), AOM_ICDF(30089),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4608), AOM_ICDF(7468), AOM_ICDF(10136), AOM_ICDF(15013),
-            AOM_ICDF(17716), AOM_ICDF(19595), AOM_ICDF(20830), AOM_ICDF(22136),
-            AOM_ICDF(23714), AOM_ICDF(25341), AOM_ICDF(29403), AOM_ICDF(31072),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(6656), AOM_ICDF(8186), AOM_ICDF(13755), AOM_ICDF(15971),
-            AOM_ICDF(20413), AOM_ICDF(27940), AOM_ICDF(28657), AOM_ICDF(29910),
-            AOM_ICDF(31004), AOM_ICDF(31969), AOM_ICDF(32443), AOM_ICDF(32665),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6400), AOM_ICDF(8048), AOM_ICDF(16256), AOM_ICDF(17568),
-            AOM_ICDF(21074), AOM_ICDF(28253), AOM_ICDF(28976), AOM_ICDF(30531),
-            AOM_ICDF(31099), AOM_ICDF(31875), AOM_ICDF(32426), AOM_ICDF(32701),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(6439), AOM_ICDF(9524), AOM_ICDF(17270),
-            AOM_ICDF(21391), AOM_ICDF(25777), AOM_ICDF(26815), AOM_ICDF(27908),
-            AOM_ICDF(29199), AOM_ICDF(31151), AOM_ICDF(32168), AOM_ICDF(32407),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9600), AOM_ICDF(10958), AOM_ICDF(14962), AOM_ICDF(16560),
-            AOM_ICDF(19908), AOM_ICDF(23309), AOM_ICDF(25637), AOM_ICDF(28033),
-            AOM_ICDF(29032), AOM_ICDF(32009), AOM_ICDF(32528), AOM_ICDF(32701),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4608), AOM_ICDF(5598), AOM_ICDF(9525), AOM_ICDF(10578),
-            AOM_ICDF(18511), AOM_ICDF(27502), AOM_ICDF(28654), AOM_ICDF(29907),
-            AOM_ICDF(31069), AOM_ICDF(32071), AOM_ICDF(32493), AOM_ICDF(32670),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4864), AOM_ICDF(5845), AOM_ICDF(11524), AOM_ICDF(12294),
-            AOM_ICDF(16882), AOM_ICDF(27955), AOM_ICDF(28839), AOM_ICDF(30251),
-            AOM_ICDF(30949), AOM_ICDF(31873), AOM_ICDF(32467), AOM_ICDF(32703),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3968), AOM_ICDF(5431), AOM_ICDF(8955), AOM_ICDF(11746),
-            AOM_ICDF(18914), AOM_ICDF(24489), AOM_ICDF(25524), AOM_ICDF(27194),
-            AOM_ICDF(29894), AOM_ICDF(31589), AOM_ICDF(32335), AOM_ICDF(32551),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(7584), AOM_ICDF(11814), AOM_ICDF(14567),
-            AOM_ICDF(18253), AOM_ICDF(21882), AOM_ICDF(23966), AOM_ICDF(26442),
-            AOM_ICDF(27628), AOM_ICDF(31142), AOM_ICDF(32177), AOM_ICDF(32466),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7808), AOM_ICDF(9661), AOM_ICDF(15257), AOM_ICDF(16576),
-            AOM_ICDF(20349), AOM_ICDF(24902), AOM_ICDF(26592), AOM_ICDF(29415),
-            AOM_ICDF(30083), AOM_ICDF(31782), AOM_ICDF(32360), AOM_ICDF(32680),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2816), AOM_ICDF(4805), AOM_ICDF(8519), AOM_ICDF(10112),
-            AOM_ICDF(13408), AOM_ICDF(18465), AOM_ICDF(19582), AOM_ICDF(21333),
-            AOM_ICDF(22494), AOM_ICDF(24059), AOM_ICDF(29026), AOM_ICDF(31321),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2432), AOM_ICDF(4091), AOM_ICDF(8236), AOM_ICDF(9669),
-            AOM_ICDF(13111), AOM_ICDF(19352), AOM_ICDF(20557), AOM_ICDF(22370),
-            AOM_ICDF(23060), AOM_ICDF(24425), AOM_ICDF(28890), AOM_ICDF(31586),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3200), AOM_ICDF(5164), AOM_ICDF(8938), AOM_ICDF(11126),
-            AOM_ICDF(14494), AOM_ICDF(18433), AOM_ICDF(19721), AOM_ICDF(21148),
-            AOM_ICDF(22510), AOM_ICDF(24233), AOM_ICDF(29134), AOM_ICDF(31235),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(7132), AOM_ICDF(13341), AOM_ICDF(17959),
-            AOM_ICDF(21108), AOM_ICDF(25786), AOM_ICDF(27068), AOM_ICDF(29161),
-            AOM_ICDF(30077), AOM_ICDF(31286), AOM_ICDF(32363), AOM_ICDF(32565),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(9600), AOM_ICDF(10686), AOM_ICDF(12152), AOM_ICDF(16918),
-            AOM_ICDF(19247), AOM_ICDF(20286), AOM_ICDF(20969), AOM_ICDF(21568),
-            AOM_ICDF(25987), AOM_ICDF(27444), AOM_ICDF(30376), AOM_ICDF(31348),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8832), AOM_ICDF(10609), AOM_ICDF(14591), AOM_ICDF(17948),
-            AOM_ICDF(19973), AOM_ICDF(21052), AOM_ICDF(21922), AOM_ICDF(22854),
-            AOM_ICDF(25642), AOM_ICDF(26783), AOM_ICDF(29892), AOM_ICDF(31499),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(5196), AOM_ICDF(5842), AOM_ICDF(17177),
-            AOM_ICDF(19308), AOM_ICDF(19726), AOM_ICDF(20235), AOM_ICDF(20627),
-            AOM_ICDF(24184), AOM_ICDF(26799), AOM_ICDF(29993), AOM_ICDF(30752),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9856), AOM_ICDF(11288), AOM_ICDF(12295), AOM_ICDF(15482),
-            AOM_ICDF(18345), AOM_ICDF(19093), AOM_ICDF(20963), AOM_ICDF(21747),
-            AOM_ICDF(24718), AOM_ICDF(26793), AOM_ICDF(29991), AOM_ICDF(31032),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(7348), AOM_ICDF(8440), AOM_ICDF(11002),
-            AOM_ICDF(17084), AOM_ICDF(19749), AOM_ICDF(20766), AOM_ICDF(21563),
-            AOM_ICDF(25502), AOM_ICDF(26950), AOM_ICDF(30245), AOM_ICDF(31152),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9728), AOM_ICDF(10448), AOM_ICDF(12541), AOM_ICDF(14674),
-            AOM_ICDF(19296), AOM_ICDF(23919), AOM_ICDF(25198), AOM_ICDF(26558),
-            AOM_ICDF(30755), AOM_ICDF(31958), AOM_ICDF(32461), AOM_ICDF(32594),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(5768), AOM_ICDF(6401), AOM_ICDF(10321),
-            AOM_ICDF(14515), AOM_ICDF(15362), AOM_ICDF(15838), AOM_ICDF(16301),
-            AOM_ICDF(26078), AOM_ICDF(27489), AOM_ICDF(30397), AOM_ICDF(31175),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5888), AOM_ICDF(6518), AOM_ICDF(7236), AOM_ICDF(12128),
-            AOM_ICDF(14327), AOM_ICDF(15015), AOM_ICDF(16055), AOM_ICDF(16773),
-            AOM_ICDF(20897), AOM_ICDF(25395), AOM_ICDF(29341), AOM_ICDF(30452),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10368), AOM_ICDF(11856), AOM_ICDF(13245), AOM_ICDF(15614),
-            AOM_ICDF(18451), AOM_ICDF(19498), AOM_ICDF(20846), AOM_ICDF(22429),
-            AOM_ICDF(24610), AOM_ICDF(26522), AOM_ICDF(30279), AOM_ICDF(31523),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(7061), AOM_ICDF(8668), AOM_ICDF(12423),
-            AOM_ICDF(15346), AOM_ICDF(15634), AOM_ICDF(16504), AOM_ICDF(17584),
-            AOM_ICDF(21083), AOM_ICDF(23000), AOM_ICDF(28456), AOM_ICDF(30241),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(7026), AOM_ICDF(8735), AOM_ICDF(12665),
-            AOM_ICDF(15507), AOM_ICDF(15870), AOM_ICDF(16794), AOM_ICDF(17792),
-            AOM_ICDF(21068), AOM_ICDF(23033), AOM_ICDF(28395), AOM_ICDF(30701),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7040), AOM_ICDF(8045), AOM_ICDF(9300), AOM_ICDF(13303),
-            AOM_ICDF(15462), AOM_ICDF(15625), AOM_ICDF(16362), AOM_ICDF(17067),
-            AOM_ICDF(20686), AOM_ICDF(22810), AOM_ICDF(27983), AOM_ICDF(29347),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5632), AOM_ICDF(8070), AOM_ICDF(9903), AOM_ICDF(16658),
-            AOM_ICDF(18637), AOM_ICDF(19728), AOM_ICDF(20543), AOM_ICDF(21450),
-            AOM_ICDF(24456), AOM_ICDF(26372), AOM_ICDF(29645), AOM_ICDF(30731),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(11008), AOM_ICDF(12283), AOM_ICDF(14364), AOM_ICDF(18419),
-            AOM_ICDF(19948), AOM_ICDF(20618), AOM_ICDF(21899), AOM_ICDF(22960),
-            AOM_ICDF(23994), AOM_ICDF(26565), AOM_ICDF(30078), AOM_ICDF(31213),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(11586), AOM_ICDF(16716), AOM_ICDF(18876),
-            AOM_ICDF(20112), AOM_ICDF(21105), AOM_ICDF(22426), AOM_ICDF(23800),
-            AOM_ICDF(24396), AOM_ICDF(26653), AOM_ICDF(30021), AOM_ICDF(31566),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6144), AOM_ICDF(8328), AOM_ICDF(9378), AOM_ICDF(20096),
-            AOM_ICDF(20984), AOM_ICDF(21256), AOM_ICDF(22335), AOM_ICDF(23109),
-            AOM_ICDF(24128), AOM_ICDF(26896), AOM_ICDF(29947), AOM_ICDF(30740),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10496), AOM_ICDF(12323), AOM_ICDF(13441), AOM_ICDF(15479),
-            AOM_ICDF(16976), AOM_ICDF(17518), AOM_ICDF(20794), AOM_ICDF(22571),
-            AOM_ICDF(23328), AOM_ICDF(27421), AOM_ICDF(30512), AOM_ICDF(31561),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7296), AOM_ICDF(8391), AOM_ICDF(10010), AOM_ICDF(12258),
-            AOM_ICDF(15388), AOM_ICDF(16944), AOM_ICDF(19602), AOM_ICDF(21196),
-            AOM_ICDF(22869), AOM_ICDF(25112), AOM_ICDF(29389), AOM_ICDF(30709),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9728), AOM_ICDF(11168), AOM_ICDF(14881), AOM_ICDF(17298),
-            AOM_ICDF(20151), AOM_ICDF(22916), AOM_ICDF(25918), AOM_ICDF(28032),
-            AOM_ICDF(29549), AOM_ICDF(31787), AOM_ICDF(32293), AOM_ICDF(32521),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6400), AOM_ICDF(7636), AOM_ICDF(8716), AOM_ICDF(12718),
-            AOM_ICDF(15711), AOM_ICDF(16420), AOM_ICDF(18144), AOM_ICDF(19287),
-            AOM_ICDF(22815), AOM_ICDF(25886), AOM_ICDF(29596), AOM_ICDF(30674),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9856), AOM_ICDF(10662), AOM_ICDF(11785), AOM_ICDF(14236),
-            AOM_ICDF(14998), AOM_ICDF(15391), AOM_ICDF(17156), AOM_ICDF(17949),
-            AOM_ICDF(18470), AOM_ICDF(27797), AOM_ICDF(30418), AOM_ICDF(31244),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(10448), AOM_ICDF(12017), AOM_ICDF(14128),
-            AOM_ICDF(15765), AOM_ICDF(16637), AOM_ICDF(19347), AOM_ICDF(21759),
-            AOM_ICDF(22490), AOM_ICDF(25300), AOM_ICDF(29676), AOM_ICDF(31077),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(8468), AOM_ICDF(10177), AOM_ICDF(13693),
-            AOM_ICDF(15333), AOM_ICDF(15472), AOM_ICDF(17094), AOM_ICDF(18257),
-            AOM_ICDF(19277), AOM_ICDF(22386), AOM_ICDF(28023), AOM_ICDF(29969),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6400), AOM_ICDF(8151), AOM_ICDF(10651), AOM_ICDF(13992),
-            AOM_ICDF(15677), AOM_ICDF(15835), AOM_ICDF(17422), AOM_ICDF(18621),
-            AOM_ICDF(19450), AOM_ICDF(22207), AOM_ICDF(27735), AOM_ICDF(30409),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7296), AOM_ICDF(8391), AOM_ICDF(9915), AOM_ICDF(13980),
-            AOM_ICDF(15352), AOM_ICDF(15450), AOM_ICDF(17006), AOM_ICDF(17930),
-            AOM_ICDF(18973), AOM_ICDF(22045), AOM_ICDF(27658), AOM_ICDF(29235),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(9481), AOM_ICDF(11664), AOM_ICDF(16537),
-            AOM_ICDF(17656), AOM_ICDF(18094), AOM_ICDF(20673), AOM_ICDF(21949),
-            AOM_ICDF(22752), AOM_ICDF(25921), AOM_ICDF(29612), AOM_ICDF(30869),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(6784), AOM_ICDF(8104), AOM_ICDF(12536), AOM_ICDF(14589),
-            AOM_ICDF(15843), AOM_ICDF(17357), AOM_ICDF(19765), AOM_ICDF(23981),
-            AOM_ICDF(24633), AOM_ICDF(25618), AOM_ICDF(29556), AOM_ICDF(31438),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(7237), AOM_ICDF(14717), AOM_ICDF(15587),
-            AOM_ICDF(16364), AOM_ICDF(17537), AOM_ICDF(20393), AOM_ICDF(26097),
-            AOM_ICDF(26462), AOM_ICDF(27029), AOM_ICDF(30123), AOM_ICDF(31921),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4352), AOM_ICDF(5906), AOM_ICDF(8424), AOM_ICDF(16214),
-            AOM_ICDF(16978), AOM_ICDF(17743), AOM_ICDF(19680), AOM_ICDF(22441),
-            AOM_ICDF(23167), AOM_ICDF(25080), AOM_ICDF(29224), AOM_ICDF(30650),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9472), AOM_ICDF(10473), AOM_ICDF(12737), AOM_ICDF(14173),
-            AOM_ICDF(15051), AOM_ICDF(15632), AOM_ICDF(20652), AOM_ICDF(24864),
-            AOM_ICDF(25204), AOM_ICDF(27006), AOM_ICDF(30292), AOM_ICDF(31501),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(5475), AOM_ICDF(8247), AOM_ICDF(9646),
-            AOM_ICDF(12203), AOM_ICDF(14760), AOM_ICDF(18488), AOM_ICDF(22616),
-            AOM_ICDF(23449), AOM_ICDF(24650), AOM_ICDF(29026), AOM_ICDF(30955),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(6957), AOM_ICDF(12502), AOM_ICDF(13805),
-            AOM_ICDF(16777), AOM_ICDF(21052), AOM_ICDF(23981), AOM_ICDF(30606),
-            AOM_ICDF(31206), AOM_ICDF(31981), AOM_ICDF(32414), AOM_ICDF(32681),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(5475), AOM_ICDF(7820), AOM_ICDF(9805),
-            AOM_ICDF(12793), AOM_ICDF(14252), AOM_ICDF(16711), AOM_ICDF(20725),
-            AOM_ICDF(23406), AOM_ICDF(25015), AOM_ICDF(29225), AOM_ICDF(30775),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(7619), AOM_ICDF(10173), AOM_ICDF(12424),
-            AOM_ICDF(13502), AOM_ICDF(14410), AOM_ICDF(17852), AOM_ICDF(21348),
-            AOM_ICDF(22017), AOM_ICDF(25461), AOM_ICDF(29571), AOM_ICDF(31020),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7680), AOM_ICDF(8562), AOM_ICDF(11399), AOM_ICDF(12263),
-            AOM_ICDF(12870), AOM_ICDF(13486), AOM_ICDF(18307), AOM_ICDF(26385),
-            AOM_ICDF(26734), AOM_ICDF(27724), AOM_ICDF(30482), AOM_ICDF(31955),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4992), AOM_ICDF(6186), AOM_ICDF(9820), AOM_ICDF(11725),
-            AOM_ICDF(13117), AOM_ICDF(13406), AOM_ICDF(15978), AOM_ICDF(20372),
-            AOM_ICDF(20953), AOM_ICDF(22245), AOM_ICDF(28205), AOM_ICDF(30879),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4736), AOM_ICDF(6050), AOM_ICDF(10747), AOM_ICDF(12295),
-            AOM_ICDF(13445), AOM_ICDF(13844), AOM_ICDF(16357), AOM_ICDF(21485),
-            AOM_ICDF(21838), AOM_ICDF(22820), AOM_ICDF(28183), AOM_ICDF(31138),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(6710), AOM_ICDF(10476), AOM_ICDF(12855),
-            AOM_ICDF(14101), AOM_ICDF(14482), AOM_ICDF(17053), AOM_ICDF(20613),
-            AOM_ICDF(21278), AOM_ICDF(22580), AOM_ICDF(28351), AOM_ICDF(30542),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(6359), AOM_ICDF(11826), AOM_ICDF(14265),
-            AOM_ICDF(14852), AOM_ICDF(15753), AOM_ICDF(19276), AOM_ICDF(24757),
-            AOM_ICDF(25226), AOM_ICDF(26287), AOM_ICDF(29629), AOM_ICDF(31493),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(7424), AOM_ICDF(8612), AOM_ICDF(11726), AOM_ICDF(15286),
-            AOM_ICDF(16881), AOM_ICDF(17151), AOM_ICDF(17944), AOM_ICDF(19160),
-            AOM_ICDF(20011), AOM_ICDF(21356), AOM_ICDF(27463), AOM_ICDF(29805),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(7516), AOM_ICDF(15210), AOM_ICDF(17109),
-            AOM_ICDF(18458), AOM_ICDF(18708), AOM_ICDF(19587), AOM_ICDF(20977),
-            AOM_ICDF(21484), AOM_ICDF(22277), AOM_ICDF(27768), AOM_ICDF(30893),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3584), AOM_ICDF(5522), AOM_ICDF(7225), AOM_ICDF(18079),
-            AOM_ICDF(18909), AOM_ICDF(18999), AOM_ICDF(19698), AOM_ICDF(20413),
-            AOM_ICDF(21185), AOM_ICDF(23040), AOM_ICDF(28056), AOM_ICDF(29473),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(8612), AOM_ICDF(10782), AOM_ICDF(12958),
-            AOM_ICDF(14687), AOM_ICDF(14818), AOM_ICDF(17553), AOM_ICDF(19395),
-            AOM_ICDF(20231), AOM_ICDF(23316), AOM_ICDF(28559), AOM_ICDF(30614),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5632), AOM_ICDF(6586), AOM_ICDF(9347), AOM_ICDF(11520),
-            AOM_ICDF(15628), AOM_ICDF(16300), AOM_ICDF(17651), AOM_ICDF(19245),
-            AOM_ICDF(20671), AOM_ICDF(22089), AOM_ICDF(28013), AOM_ICDF(30279),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(5309), AOM_ICDF(9385), AOM_ICDF(10995),
-            AOM_ICDF(14099), AOM_ICDF(18154), AOM_ICDF(19638), AOM_ICDF(21690),
-            AOM_ICDF(23031), AOM_ICDF(24552), AOM_ICDF(29238), AOM_ICDF(31251),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(6339), AOM_ICDF(8301), AOM_ICDF(11620),
-            AOM_ICDF(14701), AOM_ICDF(14991), AOM_ICDF(16033), AOM_ICDF(17210),
-            AOM_ICDF(20431), AOM_ICDF(22310), AOM_ICDF(27948), AOM_ICDF(29774),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5632), AOM_ICDF(6692), AOM_ICDF(8729), AOM_ICDF(12618),
-            AOM_ICDF(13927), AOM_ICDF(14081), AOM_ICDF(15176), AOM_ICDF(16413),
-            AOM_ICDF(17371), AOM_ICDF(22183), AOM_ICDF(28013), AOM_ICDF(29815),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(7861), AOM_ICDF(11072), AOM_ICDF(12945),
-            AOM_ICDF(14726), AOM_ICDF(14971), AOM_ICDF(16570), AOM_ICDF(19481),
-            AOM_ICDF(20260), AOM_ICDF(21921), AOM_ICDF(27980), AOM_ICDF(30449),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(6553), AOM_ICDF(9523), AOM_ICDF(12199),
-            AOM_ICDF(13764), AOM_ICDF(13972), AOM_ICDF(14926), AOM_ICDF(16320),
-            AOM_ICDF(17091), AOM_ICDF(18744), AOM_ICDF(26359), AOM_ICDF(29288),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4736), AOM_ICDF(6160), AOM_ICDF(10318), AOM_ICDF(12718),
-            AOM_ICDF(14251), AOM_ICDF(14527), AOM_ICDF(15453), AOM_ICDF(17009),
-            AOM_ICDF(17625), AOM_ICDF(19045), AOM_ICDF(26335), AOM_ICDF(30079),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(6815), AOM_ICDF(9248), AOM_ICDF(12722),
-            AOM_ICDF(14141), AOM_ICDF(14301), AOM_ICDF(15095), AOM_ICDF(16200),
-            AOM_ICDF(17106), AOM_ICDF(18697), AOM_ICDF(26172), AOM_ICDF(28388),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4096), AOM_ICDF(6672), AOM_ICDF(11055), AOM_ICDF(16327),
-            AOM_ICDF(17508), AOM_ICDF(17671), AOM_ICDF(18733), AOM_ICDF(19994),
-            AOM_ICDF(20742), AOM_ICDF(22151), AOM_ICDF(27708), AOM_ICDF(30021),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(7936), AOM_ICDF(9197), AOM_ICDF(13524), AOM_ICDF(16819),
-            AOM_ICDF(18267), AOM_ICDF(18636), AOM_ICDF(19409), AOM_ICDF(20661),
-            AOM_ICDF(21323), AOM_ICDF(22307), AOM_ICDF(27905), AOM_ICDF(30678),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(7302), AOM_ICDF(16951), AOM_ICDF(18383),
-            AOM_ICDF(19388), AOM_ICDF(19608), AOM_ICDF(20225), AOM_ICDF(21597),
-            AOM_ICDF(21946), AOM_ICDF(22538), AOM_ICDF(27613), AOM_ICDF(31318),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(5987), AOM_ICDF(8184), AOM_ICDF(19612),
-            AOM_ICDF(20392), AOM_ICDF(20476), AOM_ICDF(21100), AOM_ICDF(21693),
-            AOM_ICDF(22428), AOM_ICDF(23963), AOM_ICDF(28709), AOM_ICDF(30342),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(9588), AOM_ICDF(12395), AOM_ICDF(14447),
-            AOM_ICDF(16163), AOM_ICDF(16374), AOM_ICDF(18743), AOM_ICDF(20606),
-            AOM_ICDF(21271), AOM_ICDF(23786), AOM_ICDF(28768), AOM_ICDF(30877),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(6710), AOM_ICDF(10069), AOM_ICDF(11965),
-            AOM_ICDF(15976), AOM_ICDF(16719), AOM_ICDF(17973), AOM_ICDF(19880),
-            AOM_ICDF(21139), AOM_ICDF(22275), AOM_ICDF(28259), AOM_ICDF(30954),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3968), AOM_ICDF(5431), AOM_ICDF(10557), AOM_ICDF(12069),
-            AOM_ICDF(14280), AOM_ICDF(18973), AOM_ICDF(20374), AOM_ICDF(23037),
-            AOM_ICDF(24215), AOM_ICDF(25050), AOM_ICDF(29271), AOM_ICDF(31716),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(7061), AOM_ICDF(9672), AOM_ICDF(12246),
-            AOM_ICDF(15351), AOM_ICDF(15717), AOM_ICDF(16716), AOM_ICDF(18158),
-            AOM_ICDF(21126), AOM_ICDF(22672), AOM_ICDF(28035), AOM_ICDF(30494),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6272), AOM_ICDF(7204), AOM_ICDF(9700), AOM_ICDF(13252),
-            AOM_ICDF(14599), AOM_ICDF(14926), AOM_ICDF(15902), AOM_ICDF(17220),
-            AOM_ICDF(18010), AOM_ICDF(22795), AOM_ICDF(28405), AOM_ICDF(30467),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(8427), AOM_ICDF(12420), AOM_ICDF(14171),
-            AOM_ICDF(15792), AOM_ICDF(16156), AOM_ICDF(17584), AOM_ICDF(20846),
-            AOM_ICDF(21451), AOM_ICDF(22556), AOM_ICDF(28101), AOM_ICDF(31054),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5248), AOM_ICDF(6431), AOM_ICDF(10855), AOM_ICDF(13296),
-            AOM_ICDF(14848), AOM_ICDF(15135), AOM_ICDF(15893), AOM_ICDF(17277),
-            AOM_ICDF(17943), AOM_ICDF(19275), AOM_ICDF(26443), AOM_ICDF(30174),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4736), AOM_ICDF(6050), AOM_ICDF(12103), AOM_ICDF(14343),
-            AOM_ICDF(15633), AOM_ICDF(15978), AOM_ICDF(16699), AOM_ICDF(18205),
-            AOM_ICDF(18660), AOM_ICDF(19707), AOM_ICDF(26544), AOM_ICDF(30872),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(7166), AOM_ICDF(11066), AOM_ICDF(14088),
-            AOM_ICDF(15377), AOM_ICDF(15644), AOM_ICDF(16447), AOM_ICDF(17786),
-            AOM_ICDF(18605), AOM_ICDF(19822), AOM_ICDF(27104), AOM_ICDF(29648),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4608), AOM_ICDF(7358), AOM_ICDF(13016), AOM_ICDF(18200),
-            AOM_ICDF(19015), AOM_ICDF(19189), AOM_ICDF(20038), AOM_ICDF(21430),
-            AOM_ICDF(21917), AOM_ICDF(22977), AOM_ICDF(27949), AOM_ICDF(30848),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(7296), AOM_ICDF(8490), AOM_ICDF(11145), AOM_ICDF(15318),
-            AOM_ICDF(16693), AOM_ICDF(16889), AOM_ICDF(17571), AOM_ICDF(18580),
-            AOM_ICDF(19688), AOM_ICDF(21272), AOM_ICDF(27245), AOM_ICDF(28971),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(7623), AOM_ICDF(16070), AOM_ICDF(18136),
-            AOM_ICDF(19225), AOM_ICDF(19397), AOM_ICDF(20128), AOM_ICDF(21362),
-            AOM_ICDF(21808), AOM_ICDF(22621), AOM_ICDF(27932), AOM_ICDF(30407),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3200), AOM_ICDF(5164), AOM_ICDF(6566), AOM_ICDF(18368),
-            AOM_ICDF(19106), AOM_ICDF(19155), AOM_ICDF(19793), AOM_ICDF(20300),
-            AOM_ICDF(21177), AOM_ICDF(23079), AOM_ICDF(27848), AOM_ICDF(28924),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7040), AOM_ICDF(8146), AOM_ICDF(10550), AOM_ICDF(12876),
-            AOM_ICDF(14506), AOM_ICDF(14629), AOM_ICDF(17180), AOM_ICDF(19129),
-            AOM_ICDF(20088), AOM_ICDF(23407), AOM_ICDF(28673), AOM_ICDF(30257),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6016), AOM_ICDF(7166), AOM_ICDF(9466), AOM_ICDF(11999),
-            AOM_ICDF(15723), AOM_ICDF(16293), AOM_ICDF(17580), AOM_ICDF(19004),
-            AOM_ICDF(20509), AOM_ICDF(22233), AOM_ICDF(28118), AOM_ICDF(29989),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(5422), AOM_ICDF(9054), AOM_ICDF(11018),
-            AOM_ICDF(13605), AOM_ICDF(17576), AOM_ICDF(19178), AOM_ICDF(21514),
-            AOM_ICDF(22877), AOM_ICDF(24461), AOM_ICDF(29069), AOM_ICDF(30933),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(6553), AOM_ICDF(8294), AOM_ICDF(12601),
-            AOM_ICDF(15043), AOM_ICDF(15273), AOM_ICDF(16230), AOM_ICDF(17134),
-            AOM_ICDF(20737), AOM_ICDF(22899), AOM_ICDF(28219), AOM_ICDF(29410),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(6815), AOM_ICDF(8336), AOM_ICDF(12965),
-            AOM_ICDF(14282), AOM_ICDF(14444), AOM_ICDF(15446), AOM_ICDF(16461),
-            AOM_ICDF(17544), AOM_ICDF(22183), AOM_ICDF(27682), AOM_ICDF(29132),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(8084), AOM_ICDF(10880), AOM_ICDF(12954),
-            AOM_ICDF(14527), AOM_ICDF(14728), AOM_ICDF(16490), AOM_ICDF(19224),
-            AOM_ICDF(20071), AOM_ICDF(21857), AOM_ICDF(27653), AOM_ICDF(30031),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5376), AOM_ICDF(6660), AOM_ICDF(9006), AOM_ICDF(12205),
-            AOM_ICDF(13614), AOM_ICDF(13740), AOM_ICDF(14632), AOM_ICDF(15766),
-            AOM_ICDF(16629), AOM_ICDF(18394), AOM_ICDF(25918), AOM_ICDF(28460),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4736), AOM_ICDF(6488), AOM_ICDF(9978), AOM_ICDF(12889),
-            AOM_ICDF(14419), AOM_ICDF(14607), AOM_ICDF(15458), AOM_ICDF(16743),
-            AOM_ICDF(17369), AOM_ICDF(19053), AOM_ICDF(26393), AOM_ICDF(29456),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(6710), AOM_ICDF(8542), AOM_ICDF(12830),
-            AOM_ICDF(13956), AOM_ICDF(14031), AOM_ICDF(14763), AOM_ICDF(15677),
-            AOM_ICDF(16545), AOM_ICDF(18256), AOM_ICDF(25569), AOM_ICDF(27284),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4096), AOM_ICDF(7008), AOM_ICDF(11436), AOM_ICDF(17228),
-            AOM_ICDF(18131), AOM_ICDF(18269), AOM_ICDF(19345), AOM_ICDF(20551),
-            AOM_ICDF(21315), AOM_ICDF(22836), AOM_ICDF(28035), AOM_ICDF(29865),
-            AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(6528), AOM_ICDF(10833), AOM_ICDF(17688), AOM_ICDF(21947),
-            AOM_ICDF(22829), AOM_ICDF(23814), AOM_ICDF(24514), AOM_ICDF(25707),
-            AOM_ICDF(26397), AOM_ICDF(27442), AOM_ICDF(30271), AOM_ICDF(31734),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4480), AOM_ICDF(8679), AOM_ICDF(21100), AOM_ICDF(23075),
-            AOM_ICDF(23772), AOM_ICDF(24427), AOM_ICDF(25111), AOM_ICDF(26188),
-            AOM_ICDF(26445), AOM_ICDF(27235), AOM_ICDF(29980), AOM_ICDF(31875),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2688), AOM_ICDF(6683), AOM_ICDF(9332), AOM_ICDF(22173),
-            AOM_ICDF(22688), AOM_ICDF(22972), AOM_ICDF(23623), AOM_ICDF(24159),
-            AOM_ICDF(24798), AOM_ICDF(26666), AOM_ICDF(29812), AOM_ICDF(30909),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8192), AOM_ICDF(10112), AOM_ICDF(13298), AOM_ICDF(16662),
-            AOM_ICDF(17623), AOM_ICDF(18394), AOM_ICDF(20921), AOM_ICDF(22309),
-            AOM_ICDF(22963), AOM_ICDF(26257), AOM_ICDF(29945), AOM_ICDF(31423),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(7604), AOM_ICDF(12617), AOM_ICDF(15628),
-            AOM_ICDF(18274), AOM_ICDF(20174), AOM_ICDF(21404), AOM_ICDF(22869),
-            AOM_ICDF(24184), AOM_ICDF(25626), AOM_ICDF(29615), AOM_ICDF(31155),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(10295), AOM_ICDF(18459), AOM_ICDF(21302),
-            AOM_ICDF(23034), AOM_ICDF(26284), AOM_ICDF(27576), AOM_ICDF(29746),
-            AOM_ICDF(30502), AOM_ICDF(31794), AOM_ICDF(32346), AOM_ICDF(32600),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4224), AOM_ICDF(6789), AOM_ICDF(11254), AOM_ICDF(15589),
-            AOM_ICDF(18568), AOM_ICDF(19238), AOM_ICDF(19872), AOM_ICDF(20880),
-            AOM_ICDF(24409), AOM_ICDF(26238), AOM_ICDF(29580), AOM_ICDF(30875),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5120), AOM_ICDF(7388), AOM_ICDF(10164), AOM_ICDF(15886),
-            AOM_ICDF(16694), AOM_ICDF(17139), AOM_ICDF(18421), AOM_ICDF(19262),
-            AOM_ICDF(20106), AOM_ICDF(26734), AOM_ICDF(29987), AOM_ICDF(31160),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5760), AOM_ICDF(8292), AOM_ICDF(13837), AOM_ICDF(16201),
-            AOM_ICDF(17303), AOM_ICDF(18422), AOM_ICDF(20215), AOM_ICDF(23059),
-            AOM_ICDF(23628), AOM_ICDF(25449), AOM_ICDF(29537), AOM_ICDF(31455),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4224), AOM_ICDF(7235), AOM_ICDF(12521), AOM_ICDF(16798),
-            AOM_ICDF(17964), AOM_ICDF(18136), AOM_ICDF(18936), AOM_ICDF(20233),
-            AOM_ICDF(20821), AOM_ICDF(22501), AOM_ICDF(27955), AOM_ICDF(30493),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3840), AOM_ICDF(7117), AOM_ICDF(13329), AOM_ICDF(17383),
-            AOM_ICDF(18323), AOM_ICDF(18492), AOM_ICDF(19273), AOM_ICDF(20538),
-            AOM_ICDF(21064), AOM_ICDF(22481), AOM_ICDF(27785), AOM_ICDF(30938),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(4736), AOM_ICDF(7474), AOM_ICDF(12414), AOM_ICDF(17230),
-            AOM_ICDF(18246), AOM_ICDF(18457), AOM_ICDF(19128), AOM_ICDF(20087),
-            AOM_ICDF(20830), AOM_ICDF(22602), AOM_ICDF(27923), AOM_ICDF(29929),
-            AOM_ICDF(32768), 0 },
-          { AOM_ICDF(3584), AOM_ICDF(9626), AOM_ICDF(15412), AOM_ICDF(20788),
-            AOM_ICDF(21676), AOM_ICDF(22192), AOM_ICDF(23266), AOM_ICDF(24342),
-            AOM_ICDF(24836), AOM_ICDF(26447), AOM_ICDF(29583), AOM_ICDF(31300),
-            AOM_ICDF(32768), 0 },
+          AOM_ICDF(4224), AOM_ICDF(8689), AOM_ICDF(13024), AOM_ICDF(13658),
+          AOM_ICDF(16637), AOM_ICDF(17307), AOM_ICDF(20836), AOM_ICDF(22665),
+          AOM_ICDF(23673), AOM_ICDF(27015), AOM_ICDF(28310), AOM_ICDF(30203),
+          AOM_ICDF(32768), 0,
       },
+      {
+          AOM_ICDF(5120), AOM_ICDF(7896), AOM_ICDF(13618), AOM_ICDF(14900),
+          AOM_ICDF(15708), AOM_ICDF(16153), AOM_ICDF(16997), AOM_ICDF(23625),
+          AOM_ICDF(24466), AOM_ICDF(27719), AOM_ICDF(28892), AOM_ICDF(30500),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5760), AOM_ICDF(11305), AOM_ICDF(13669), AOM_ICDF(15462),
+          AOM_ICDF(16564), AOM_ICDF(17683), AOM_ICDF(18252), AOM_ICDF(20073),
+          AOM_ICDF(22917), AOM_ICDF(27005), AOM_ICDF(28923), AOM_ICDF(30236),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4224), AOM_ICDF(9510), AOM_ICDF(13787), AOM_ICDF(14587),
+          AOM_ICDF(15753), AOM_ICDF(15925), AOM_ICDF(16513), AOM_ICDF(18193),
+          AOM_ICDF(19490), AOM_ICDF(24944), AOM_ICDF(27482), AOM_ICDF(29757),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3840), AOM_ICDF(10052), AOM_ICDF(14106), AOM_ICDF(14887),
+          AOM_ICDF(15827), AOM_ICDF(15996), AOM_ICDF(16522), AOM_ICDF(17939),
+          AOM_ICDF(19204), AOM_ICDF(24508), AOM_ICDF(27661), AOM_ICDF(29491),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(4736), AOM_ICDF(9676), AOM_ICDF(14492), AOM_ICDF(15163),
+          AOM_ICDF(16179), AOM_ICDF(16390), AOM_ICDF(17133), AOM_ICDF(18905),
+          AOM_ICDF(19864), AOM_ICDF(25185), AOM_ICDF(27191), AOM_ICDF(30030),
+          AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(3584), AOM_ICDF(9370), AOM_ICDF(14746), AOM_ICDF(15820),
+          AOM_ICDF(16708), AOM_ICDF(17224), AOM_ICDF(17718), AOM_ICDF(19329),
+          AOM_ICDF(20405), AOM_ICDF(23541), AOM_ICDF(25258), AOM_ICDF(26726),
+          AOM_ICDF(32768), 0,
+      },
+  },
 #else
+  {
+      {
+          AOM_ICDF(15488), AOM_ICDF(18706), AOM_ICDF(22561), AOM_ICDF(23619),
+          AOM_ICDF(24954), AOM_ICDF(25782), AOM_ICDF(26710), AOM_ICDF(27861),
+          AOM_ICDF(28656), AOM_ICDF(30743), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11648), AOM_ICDF(18744), AOM_ICDF(20846), AOM_ICDF(22100),
+          AOM_ICDF(23332), AOM_ICDF(24337), AOM_ICDF(25093), AOM_ICDF(26104),
+          AOM_ICDF(27097), AOM_ICDF(29633), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(10732), AOM_ICDF(22507), AOM_ICDF(23254),
+          AOM_ICDF(24382), AOM_ICDF(24876), AOM_ICDF(25827), AOM_ICDF(27488),
+          AOM_ICDF(28040), AOM_ICDF(30108), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(13568), AOM_ICDF(16981), AOM_ICDF(19885), AOM_ICDF(22014),
+          AOM_ICDF(23543), AOM_ICDF(24658), AOM_ICDF(25641), AOM_ICDF(27378),
+          AOM_ICDF(28625), AOM_ICDF(31043), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(12225), AOM_ICDF(14408), AOM_ICDF(16033),
+          AOM_ICDF(19544), AOM_ICDF(22318), AOM_ICDF(23960), AOM_ICDF(25617),
+          AOM_ICDF(26522), AOM_ICDF(30596), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12160), AOM_ICDF(15078), AOM_ICDF(16990), AOM_ICDF(18964),
+          AOM_ICDF(22725), AOM_ICDF(25793), AOM_ICDF(27133), AOM_ICDF(28447),
+          AOM_ICDF(30831), AOM_ICDF(30836), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(11274), AOM_ICDF(15818), AOM_ICDF(16940),
+          AOM_ICDF(21178), AOM_ICDF(22338), AOM_ICDF(26171), AOM_ICDF(27754),
+          AOM_ICDF(28503), AOM_ICDF(31473), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10880), AOM_ICDF(13846), AOM_ICDF(18649), AOM_ICDF(20252),
+          AOM_ICDF(22157), AOM_ICDF(22992), AOM_ICDF(24396), AOM_ICDF(27581),
+          AOM_ICDF(28501), AOM_ICDF(31400), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11008), AOM_ICDF(13462), AOM_ICDF(15747), AOM_ICDF(18378),
+          AOM_ICDF(20085), AOM_ICDF(21663), AOM_ICDF(22766), AOM_ICDF(24635),
+          AOM_ICDF(27476), AOM_ICDF(30643), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10112), AOM_ICDF(13147), AOM_ICDF(16135), AOM_ICDF(17577),
+          AOM_ICDF(19681), AOM_ICDF(19689), AOM_ICDF(20856), AOM_ICDF(22374),
+          AOM_ICDF(24454), AOM_ICDF(30555), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(12176), AOM_ICDF(17582), AOM_ICDF(18905),
+          AOM_ICDF(19994), AOM_ICDF(20669), AOM_ICDF(21635), AOM_ICDF(23564),
+          AOM_ICDF(24741), AOM_ICDF(27222), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(8448), AOM_ICDF(18738), AOM_ICDF(21694), AOM_ICDF(22413),
+          AOM_ICDF(23358), AOM_ICDF(24675), AOM_ICDF(25193), AOM_ICDF(26119),
+          AOM_ICDF(27310), AOM_ICDF(30773), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(22027), AOM_ICDF(23242), AOM_ICDF(23986),
+          AOM_ICDF(24529), AOM_ICDF(25363), AOM_ICDF(25646), AOM_ICDF(26087),
+          AOM_ICDF(27130), AOM_ICDF(30218), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(13862), AOM_ICDF(21137), AOM_ICDF(22124),
+          AOM_ICDF(23036), AOM_ICDF(23803), AOM_ICDF(24458), AOM_ICDF(26390),
+          AOM_ICDF(27342), AOM_ICDF(30968), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(17409), AOM_ICDF(19830), AOM_ICDF(21521),
+          AOM_ICDF(22580), AOM_ICDF(23726), AOM_ICDF(24377), AOM_ICDF(25679),
+          AOM_ICDF(27269), AOM_ICDF(30867), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(15832), AOM_ICDF(17559), AOM_ICDF(18777),
+          AOM_ICDF(20425), AOM_ICDF(22719), AOM_ICDF(23447), AOM_ICDF(24952),
+          AOM_ICDF(26527), AOM_ICDF(30950), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7808), AOM_ICDF(18730), AOM_ICDF(20143), AOM_ICDF(21445),
+          AOM_ICDF(23347), AOM_ICDF(26267), AOM_ICDF(27229), AOM_ICDF(28315),
+          AOM_ICDF(30911), AOM_ICDF(30915), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(14299), AOM_ICDF(17264), AOM_ICDF(18505),
+          AOM_ICDF(20765), AOM_ICDF(22440), AOM_ICDF(24331), AOM_ICDF(26038),
+          AOM_ICDF(27481), AOM_ICDF(31448), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8832), AOM_ICDF(15726), AOM_ICDF(19455), AOM_ICDF(20668),
+          AOM_ICDF(21607), AOM_ICDF(22655), AOM_ICDF(23384), AOM_ICDF(26356),
+          AOM_ICDF(27697), AOM_ICDF(31459), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8192), AOM_ICDF(17385), AOM_ICDF(18866), AOM_ICDF(20120),
+          AOM_ICDF(21273), AOM_ICDF(22853), AOM_ICDF(23470), AOM_ICDF(24881),
+          AOM_ICDF(27216), AOM_ICDF(31040), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(16341), AOM_ICDF(18497), AOM_ICDF(19439),
+          AOM_ICDF(20706), AOM_ICDF(20711), AOM_ICDF(21234), AOM_ICDF(22307),
+          AOM_ICDF(23950), AOM_ICDF(30728), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6400), AOM_ICDF(17625), AOM_ICDF(20326), AOM_ICDF(21821),
+          AOM_ICDF(22568), AOM_ICDF(23415), AOM_ICDF(23854), AOM_ICDF(24896),
+          AOM_ICDF(26171), AOM_ICDF(29575), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(12032), AOM_ICDF(14259), AOM_ICDF(22597), AOM_ICDF(23443),
+          AOM_ICDF(24581), AOM_ICDF(25079), AOM_ICDF(26399), AOM_ICDF(27862),
+          AOM_ICDF(28509), AOM_ICDF(30419), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9216), AOM_ICDF(14883), AOM_ICDF(20941), AOM_ICDF(21958),
+          AOM_ICDF(23597), AOM_ICDF(24328), AOM_ICDF(25208), AOM_ICDF(26590),
+          AOM_ICDF(27377), AOM_ICDF(29364), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(8088), AOM_ICDF(24407), AOM_ICDF(25006),
+          AOM_ICDF(25777), AOM_ICDF(25950), AOM_ICDF(26882), AOM_ICDF(28811),
+          AOM_ICDF(29159), AOM_ICDF(30636), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11904), AOM_ICDF(14425), AOM_ICDF(18729), AOM_ICDF(20730),
+          AOM_ICDF(21998), AOM_ICDF(22686), AOM_ICDF(23856), AOM_ICDF(26580),
+          AOM_ICDF(27613), AOM_ICDF(29834), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10752), AOM_ICDF(12784), AOM_ICDF(16305), AOM_ICDF(17624),
+          AOM_ICDF(20320), AOM_ICDF(22450), AOM_ICDF(24380), AOM_ICDF(26773),
+          AOM_ICDF(27837), AOM_ICDF(30016), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10496), AOM_ICDF(14090), AOM_ICDF(18314), AOM_ICDF(20621),
+          AOM_ICDF(23539), AOM_ICDF(25261), AOM_ICDF(26953), AOM_ICDF(28692),
+          AOM_ICDF(30064), AOM_ICDF(30071), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(10229), AOM_ICDF(16542), AOM_ICDF(17725),
+          AOM_ICDF(21504), AOM_ICDF(22332), AOM_ICDF(26006), AOM_ICDF(27895),
+          AOM_ICDF(28487), AOM_ICDF(31248), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9728), AOM_ICDF(11162), AOM_ICDF(19379), AOM_ICDF(20981),
+          AOM_ICDF(22356), AOM_ICDF(22926), AOM_ICDF(24318), AOM_ICDF(28364),
+          AOM_ICDF(29020), AOM_ICDF(31328), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9216), AOM_ICDF(10861), AOM_ICDF(14850), AOM_ICDF(16471),
+          AOM_ICDF(18611), AOM_ICDF(19674), AOM_ICDF(21009), AOM_ICDF(23454),
+          AOM_ICDF(26078), AOM_ICDF(29272), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7808), AOM_ICDF(10132), AOM_ICDF(17327), AOM_ICDF(18472),
+          AOM_ICDF(20126), AOM_ICDF(20132), AOM_ICDF(21599), AOM_ICDF(23338),
+          AOM_ICDF(24514), AOM_ICDF(29843), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(9210), AOM_ICDF(19309), AOM_ICDF(20715),
+          AOM_ICDF(21833), AOM_ICDF(22262), AOM_ICDF(23353), AOM_ICDF(24942),
+          AOM_ICDF(25800), AOM_ICDF(28200), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(12288), AOM_ICDF(15040), AOM_ICDF(18401), AOM_ICDF(21071),
+          AOM_ICDF(22800), AOM_ICDF(23945), AOM_ICDF(25274), AOM_ICDF(26939),
+          AOM_ICDF(28554), AOM_ICDF(31328), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9344), AOM_ICDF(17170), AOM_ICDF(19325), AOM_ICDF(22119),
+          AOM_ICDF(23284), AOM_ICDF(24378), AOM_ICDF(24911), AOM_ICDF(26095),
+          AOM_ICDF(27781), AOM_ICDF(31121), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9344), AOM_ICDF(11650), AOM_ICDF(19788), AOM_ICDF(21928),
+          AOM_ICDF(22916), AOM_ICDF(23571), AOM_ICDF(24362), AOM_ICDF(26633),
+          AOM_ICDF(27946), AOM_ICDF(31212), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12928), AOM_ICDF(14428), AOM_ICDF(17080), AOM_ICDF(20882),
+          AOM_ICDF(22104), AOM_ICDF(23149), AOM_ICDF(23715), AOM_ICDF(27167),
+          AOM_ICDF(28932), AOM_ICDF(31218), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(11962), AOM_ICDF(13849), AOM_ICDF(16880),
+          AOM_ICDF(19818), AOM_ICDF(21895), AOM_ICDF(23000), AOM_ICDF(25923),
+          AOM_ICDF(27961), AOM_ICDF(31380), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10240), AOM_ICDF(13336), AOM_ICDF(15505), AOM_ICDF(18844),
+          AOM_ICDF(21646), AOM_ICDF(24723), AOM_ICDF(25832), AOM_ICDF(27802),
+          AOM_ICDF(31088), AOM_ICDF(31096), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(10683), AOM_ICDF(14446), AOM_ICDF(17035),
+          AOM_ICDF(20211), AOM_ICDF(21577), AOM_ICDF(24370), AOM_ICDF(26477),
+          AOM_ICDF(28223), AOM_ICDF(31734), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12928), AOM_ICDF(17358), AOM_ICDF(19982), AOM_ICDF(22123),
+          AOM_ICDF(23335), AOM_ICDF(23948), AOM_ICDF(24890), AOM_ICDF(28884),
+          AOM_ICDF(30197), AOM_ICDF(32148), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10496), AOM_ICDF(12429), AOM_ICDF(16401), AOM_ICDF(20493),
+          AOM_ICDF(21471), AOM_ICDF(22433), AOM_ICDF(23162), AOM_ICDF(24686),
+          AOM_ICDF(29027), AOM_ICDF(31115), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(12157), AOM_ICDF(14796), AOM_ICDF(17676),
+          AOM_ICDF(19754), AOM_ICDF(19762), AOM_ICDF(20641), AOM_ICDF(23274),
+          AOM_ICDF(25569), AOM_ICDF(31058), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7296), AOM_ICDF(11083), AOM_ICDF(15313), AOM_ICDF(20550),
+          AOM_ICDF(21783), AOM_ICDF(22727), AOM_ICDF(23461), AOM_ICDF(25072),
+          AOM_ICDF(27195), AOM_ICDF(30380), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(10880), AOM_ICDF(13214), AOM_ICDF(15829), AOM_ICDF(16866),
+          AOM_ICDF(20613), AOM_ICDF(22316), AOM_ICDF(24539), AOM_ICDF(27077),
+          AOM_ICDF(28116), AOM_ICDF(31485), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9984), AOM_ICDF(13868), AOM_ICDF(16397), AOM_ICDF(17486),
+          AOM_ICDF(20011), AOM_ICDF(22071), AOM_ICDF(23357), AOM_ICDF(24990),
+          AOM_ICDF(26336), AOM_ICDF(30276), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(8637), AOM_ICDF(17963), AOM_ICDF(18813),
+          AOM_ICDF(21065), AOM_ICDF(22052), AOM_ICDF(23502), AOM_ICDF(25702),
+          AOM_ICDF(26745), AOM_ICDF(30668), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(10682), AOM_ICDF(12496), AOM_ICDF(18240),
+          AOM_ICDF(20500), AOM_ICDF(21585), AOM_ICDF(23387), AOM_ICDF(25795),
+          AOM_ICDF(27119), AOM_ICDF(31001), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9856), AOM_ICDF(12056), AOM_ICDF(13722), AOM_ICDF(15196),
+          AOM_ICDF(19276), AOM_ICDF(21891), AOM_ICDF(23643), AOM_ICDF(25538),
+          AOM_ICDF(26854), AOM_ICDF(31515), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9984), AOM_ICDF(12963), AOM_ICDF(14960), AOM_ICDF(16734),
+          AOM_ICDF(21279), AOM_ICDF(25616), AOM_ICDF(27638), AOM_ICDF(28950),
+          AOM_ICDF(31161), AOM_ICDF(31166), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(8604), AOM_ICDF(12044), AOM_ICDF(13632),
+          AOM_ICDF(18931), AOM_ICDF(20553), AOM_ICDF(23452), AOM_ICDF(25800),
+          AOM_ICDF(27754), AOM_ICDF(31668), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11520), AOM_ICDF(13372), AOM_ICDF(16642), AOM_ICDF(18137),
+          AOM_ICDF(20232), AOM_ICDF(21510), AOM_ICDF(23052), AOM_ICDF(26792),
+          AOM_ICDF(27974), AOM_ICDF(31274), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10240), AOM_ICDF(12483), AOM_ICDF(14364), AOM_ICDF(16168),
+          AOM_ICDF(18668), AOM_ICDF(20707), AOM_ICDF(22158), AOM_ICDF(24410),
+          AOM_ICDF(26370), AOM_ICDF(30744), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8064), AOM_ICDF(10798), AOM_ICDF(13829), AOM_ICDF(15128),
+          AOM_ICDF(19136), AOM_ICDF(19152), AOM_ICDF(21057), AOM_ICDF(22583),
+          AOM_ICDF(24513), AOM_ICDF(30645), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(11025), AOM_ICDF(16073), AOM_ICDF(17603),
+          AOM_ICDF(20094), AOM_ICDF(21468), AOM_ICDF(22971), AOM_ICDF(24628),
+          AOM_ICDF(26015), AOM_ICDF(29728), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(10368), AOM_ICDF(15372), AOM_ICDF(18442), AOM_ICDF(19576),
+          AOM_ICDF(22674), AOM_ICDF(27128), AOM_ICDF(28232), AOM_ICDF(29624),
+          AOM_ICDF(31363), AOM_ICDF(31368), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9472), AOM_ICDF(16687), AOM_ICDF(18957), AOM_ICDF(20272),
+          AOM_ICDF(22852), AOM_ICDF(27082), AOM_ICDF(27839), AOM_ICDF(28995),
+          AOM_ICDF(30943), AOM_ICDF(30948), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8064), AOM_ICDF(12334), AOM_ICDF(19197), AOM_ICDF(20956),
+          AOM_ICDF(24804), AOM_ICDF(26553), AOM_ICDF(27556), AOM_ICDF(29877),
+          AOM_ICDF(31311), AOM_ICDF(31320), AOM_ICDF(32768), 0,
+      },
       {
-          { AOM_ICDF(15488), AOM_ICDF(17513), AOM_ICDF(20731), AOM_ICDF(24586),
-            AOM_ICDF(25921), AOM_ICDF(26749), AOM_ICDF(27807), AOM_ICDF(28602),
-            AOM_ICDF(29530), AOM_ICDF(30681), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11648), AOM_ICDF(14783), AOM_ICDF(21879), AOM_ICDF(23981),
-            AOM_ICDF(25213), AOM_ICDF(26218), AOM_ICDF(27472), AOM_ICDF(28465),
-            AOM_ICDF(29221), AOM_ICDF(30232), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(11108), AOM_ICDF(13392), AOM_ICDF(25167),
-            AOM_ICDF(26295), AOM_ICDF(26789), AOM_ICDF(27536), AOM_ICDF(28088),
-            AOM_ICDF(29039), AOM_ICDF(30700), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(13568), AOM_ICDF(15293), AOM_ICDF(18706), AOM_ICDF(21610),
-            AOM_ICDF(23139), AOM_ICDF(24254), AOM_ICDF(26383), AOM_ICDF(27630),
-            AOM_ICDF(28613), AOM_ICDF(30350), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9600), AOM_ICDF(11772), AOM_ICDF(14397), AOM_ICDF(16580),
-            AOM_ICDF(20091), AOM_ICDF(22865), AOM_ICDF(24490), AOM_ICDF(25395),
-            AOM_ICDF(27037), AOM_ICDF(28694), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12160), AOM_ICDF(14092), AOM_ICDF(17010), AOM_ICDF(18922),
-            AOM_ICDF(22683), AOM_ICDF(25751), AOM_ICDF(27725), AOM_ICDF(30109),
-            AOM_ICDF(31449), AOM_ICDF(32763), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(10383), AOM_ICDF(12569), AOM_ICDF(17113),
-            AOM_ICDF(21351), AOM_ICDF(22511), AOM_ICDF(23633), AOM_ICDF(24382),
-            AOM_ICDF(28215), AOM_ICDF(29798), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10880), AOM_ICDF(12248), AOM_ICDF(15214), AOM_ICDF(20017),
-            AOM_ICDF(21922), AOM_ICDF(22757), AOM_ICDF(24360), AOM_ICDF(25280),
-            AOM_ICDF(26684), AOM_ICDF(29869), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11008), AOM_ICDF(13133), AOM_ICDF(15587), AOM_ICDF(17872),
-            AOM_ICDF(19579), AOM_ICDF(21157), AOM_ICDF(23788), AOM_ICDF(26629),
-            AOM_ICDF(27732), AOM_ICDF(29601), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10112), AOM_ICDF(12325), AOM_ICDF(15360), AOM_ICDF(18348),
-            AOM_ICDF(20452), AOM_ICDF(20460), AOM_ICDF(21902), AOM_ICDF(23982),
-            AOM_ICDF(25149), AOM_ICDF(26667), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(14250), AOM_ICDF(17722), AOM_ICDF(23128),
-            AOM_ICDF(24217), AOM_ICDF(24892), AOM_ICDF(26215), AOM_ICDF(27392),
-            AOM_ICDF(28358), AOM_ICDF(30287), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(8448), AOM_ICDF(10443), AOM_ICDF(20733), AOM_ICDF(23689),
-            AOM_ICDF(24634), AOM_ICDF(25951), AOM_ICDF(26670), AOM_ICDF(27861),
-            AOM_ICDF(28379), AOM_ICDF(29305), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(9206), AOM_ICDF(24577), AOM_ICDF(25792),
-            AOM_ICDF(26335), AOM_ICDF(27169), AOM_ICDF(27913), AOM_ICDF(28956),
-            AOM_ICDF(29239), AOM_ICDF(29680), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(8968), AOM_ICDF(15662), AOM_ICDF(22937),
-            AOM_ICDF(23849), AOM_ICDF(24616), AOM_ICDF(25603), AOM_ICDF(26555),
-            AOM_ICDF(27210), AOM_ICDF(29142), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9600), AOM_ICDF(11501), AOM_ICDF(19310), AOM_ICDF(21731),
-            AOM_ICDF(22790), AOM_ICDF(23936), AOM_ICDF(25627), AOM_ICDF(27217),
-            AOM_ICDF(27868), AOM_ICDF(29170), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(8730), AOM_ICDF(17650), AOM_ICDF(19377),
-            AOM_ICDF(21025), AOM_ICDF(23319), AOM_ICDF(24537), AOM_ICDF(26112),
-            AOM_ICDF(26840), AOM_ICDF(28345), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7808), AOM_ICDF(9661), AOM_ICDF(20583), AOM_ICDF(21996),
-            AOM_ICDF(23898), AOM_ICDF(26818), AOM_ICDF(28120), AOM_ICDF(30716),
-            AOM_ICDF(31678), AOM_ICDF(32764), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(8104), AOM_ICDF(15619), AOM_ICDF(18584),
-            AOM_ICDF(20844), AOM_ICDF(22519), AOM_ICDF(23760), AOM_ICDF(25203),
-            AOM_ICDF(27094), AOM_ICDF(28801), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8832), AOM_ICDF(10141), AOM_ICDF(17035), AOM_ICDF(20764),
-            AOM_ICDF(21703), AOM_ICDF(22751), AOM_ICDF(23964), AOM_ICDF(25305),
-            AOM_ICDF(26034), AOM_ICDF(29006), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8192), AOM_ICDF(9920), AOM_ICDF(19113), AOM_ICDF(20594),
-            AOM_ICDF(21747), AOM_ICDF(23327), AOM_ICDF(24581), AOM_ICDF(26916),
-            AOM_ICDF(27533), AOM_ICDF(28944), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(8696), AOM_ICDF(18381), AOM_ICDF(20537),
-            AOM_ICDF(21804), AOM_ICDF(21809), AOM_ICDF(22751), AOM_ICDF(24394),
-            AOM_ICDF(24917), AOM_ICDF(25990), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6400), AOM_ICDF(9593), AOM_ICDF(20818), AOM_ICDF(23519),
-            AOM_ICDF(24266), AOM_ICDF(25113), AOM_ICDF(26608), AOM_ICDF(27883),
-            AOM_ICDF(28322), AOM_ICDF(29364), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(12032), AOM_ICDF(14381), AOM_ICDF(16608), AOM_ICDF(24946),
-            AOM_ICDF(26084), AOM_ICDF(26582), AOM_ICDF(27428), AOM_ICDF(28075),
-            AOM_ICDF(29395), AOM_ICDF(30858), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9216), AOM_ICDF(12620), AOM_ICDF(18287), AOM_ICDF(24345),
-            AOM_ICDF(25984), AOM_ICDF(26715), AOM_ICDF(27732), AOM_ICDF(28519),
-            AOM_ICDF(29399), AOM_ICDF(30781), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(8916), AOM_ICDF(10220), AOM_ICDF(26539),
-            AOM_ICDF(27310), AOM_ICDF(27483), AOM_ICDF(28082), AOM_ICDF(28430),
-            AOM_ICDF(29362), AOM_ICDF(31291), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11904), AOM_ICDF(14838), AOM_ICDF(17359), AOM_ICDF(21663),
-            AOM_ICDF(22931), AOM_ICDF(23619), AOM_ICDF(25620), AOM_ICDF(26653),
-            AOM_ICDF(27823), AOM_ICDF(30547), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10752), AOM_ICDF(13504), AOM_ICDF(15536), AOM_ICDF(19057),
-            AOM_ICDF(21753), AOM_ICDF(23883), AOM_ICDF(25202), AOM_ICDF(26266),
-            AOM_ICDF(28196), AOM_ICDF(30589), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10496), AOM_ICDF(13193), AOM_ICDF(16787), AOM_ICDF(21011),
-            AOM_ICDF(23929), AOM_ICDF(25651), AOM_ICDF(27958), AOM_ICDF(29330),
-            AOM_ICDF(31022), AOM_ICDF(32761), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(9968), AOM_ICDF(11749), AOM_ICDF(18062),
-            AOM_ICDF(21841), AOM_ICDF(22669), AOM_ICDF(23852), AOM_ICDF(24444),
-            AOM_ICDF(28118), AOM_ICDF(30007), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9728), AOM_ICDF(11168), AOM_ICDF(12602), AOM_ICDF(20819),
-            AOM_ICDF(22194), AOM_ICDF(22764), AOM_ICDF(24366), AOM_ICDF(25022),
-            AOM_ICDF(26414), AOM_ICDF(30460), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9216), AOM_ICDF(12712), AOM_ICDF(14357), AOM_ICDF(18346),
-            AOM_ICDF(20486), AOM_ICDF(21549), AOM_ICDF(23170), AOM_ICDF(25794),
-            AOM_ICDF(27129), AOM_ICDF(29574), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7808), AOM_ICDF(10733), AOM_ICDF(13057), AOM_ICDF(20252),
-            AOM_ICDF(21906), AOM_ICDF(21912), AOM_ICDF(23057), AOM_ICDF(24233),
-            AOM_ICDF(25700), AOM_ICDF(27439), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(11352), AOM_ICDF(13778), AOM_ICDF(23877),
-            AOM_ICDF(24995), AOM_ICDF(25424), AOM_ICDF(26830), AOM_ICDF(27688),
-            AOM_ICDF(28779), AOM_ICDF(30368), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(12288), AOM_ICDF(13728), AOM_ICDF(16480), AOM_ICDF(19841),
-            AOM_ICDF(21570), AOM_ICDF(22715), AOM_ICDF(25385), AOM_ICDF(27000),
-            AOM_ICDF(28329), AOM_ICDF(29994), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9344), AOM_ICDF(10991), AOM_ICDF(18817), AOM_ICDF(20972),
-            AOM_ICDF(22137), AOM_ICDF(23231), AOM_ICDF(26025), AOM_ICDF(27711),
-            AOM_ICDF(28244), AOM_ICDF(29428), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9344), AOM_ICDF(10900), AOM_ICDF(13206), AOM_ICDF(21344),
-            AOM_ICDF(22332), AOM_ICDF(22987), AOM_ICDF(25127), AOM_ICDF(26440),
-            AOM_ICDF(27231), AOM_ICDF(29502), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12928), AOM_ICDF(14478), AOM_ICDF(15978), AOM_ICDF(18630),
-            AOM_ICDF(19852), AOM_ICDF(20897), AOM_ICDF(24699), AOM_ICDF(26464),
-            AOM_ICDF(27030), AOM_ICDF(30482), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(10476), AOM_ICDF(13350), AOM_ICDF(15237),
-            AOM_ICDF(18175), AOM_ICDF(20252), AOM_ICDF(23283), AOM_ICDF(25321),
-            AOM_ICDF(26426), AOM_ICDF(29349), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10240), AOM_ICDF(11912), AOM_ICDF(15008), AOM_ICDF(17177),
-            AOM_ICDF(19979), AOM_ICDF(23056), AOM_ICDF(26395), AOM_ICDF(29681),
-            AOM_ICDF(30790), AOM_ICDF(32760), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(9738), AOM_ICDF(11717), AOM_ICDF(15480),
-            AOM_ICDF(18656), AOM_ICDF(20022), AOM_ICDF(22611), AOM_ICDF(24357),
-            AOM_ICDF(27150), AOM_ICDF(29257), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12928), AOM_ICDF(13548), AOM_ICDF(17978), AOM_ICDF(20602),
-            AOM_ICDF(21814), AOM_ICDF(22427), AOM_ICDF(24568), AOM_ICDF(25881),
-            AOM_ICDF(26823), AOM_ICDF(30817), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10496), AOM_ICDF(12149), AOM_ICDF(14082), AOM_ICDF(18054),
-            AOM_ICDF(19032), AOM_ICDF(19994), AOM_ICDF(24086), AOM_ICDF(28427),
-            AOM_ICDF(29156), AOM_ICDF(30680), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(10158), AOM_ICDF(13867), AOM_ICDF(16506),
-            AOM_ICDF(18584), AOM_ICDF(18592), AOM_ICDF(21472), AOM_ICDF(23767),
-            AOM_ICDF(24646), AOM_ICDF(27279), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7296), AOM_ICDF(9684), AOM_ICDF(13471), AOM_ICDF(17701),
-            AOM_ICDF(18934), AOM_ICDF(19878), AOM_ICDF(25115), AOM_ICDF(27238),
-            AOM_ICDF(27972), AOM_ICDF(29583), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(10880), AOM_ICDF(12163), AOM_ICDF(14497), AOM_ICDF(17112),
-            AOM_ICDF(20859), AOM_ICDF(22562), AOM_ICDF(23599), AOM_ICDF(24638),
-            AOM_ICDF(26861), AOM_ICDF(29399), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(12476), AOM_ICDF(16360), AOM_ICDF(18889),
-            AOM_ICDF(21414), AOM_ICDF(23474), AOM_ICDF(24563), AOM_ICDF(25909),
-            AOM_ICDF(27195), AOM_ICDF(28828), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(9268), AOM_ICDF(10737), AOM_ICDF(20063),
-            AOM_ICDF(22315), AOM_ICDF(23302), AOM_ICDF(24152), AOM_ICDF(25195),
-            AOM_ICDF(26645), AOM_ICDF(28845), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(10727), AOM_ICDF(12449), AOM_ICDF(14263),
-            AOM_ICDF(16523), AOM_ICDF(17608), AOM_ICDF(23352), AOM_ICDF(24676),
-            AOM_ICDF(26478), AOM_ICDF(28886), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9856), AOM_ICDF(11109), AOM_ICDF(13309), AOM_ICDF(14975),
-            AOM_ICDF(19055), AOM_ICDF(21670), AOM_ICDF(23144), AOM_ICDF(24460),
-            AOM_ICDF(26212), AOM_ICDF(28107), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(11586), AOM_ICDF(14565), AOM_ICDF(16562),
-            AOM_ICDF(21107), AOM_ICDF(25444), AOM_ICDF(27218), AOM_ICDF(29429),
-            AOM_ICDF(31451), AOM_ICDF(32763), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(8268), AOM_ICDF(9704), AOM_ICDF(13144),
-            AOM_ICDF(18443), AOM_ICDF(20065), AOM_ICDF(21653), AOM_ICDF(23607),
-            AOM_ICDF(26506), AOM_ICDF(28854), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11520), AOM_ICDF(13014), AOM_ICDF(14866), AOM_ICDF(18136),
-            AOM_ICDF(20231), AOM_ICDF(21509), AOM_ICDF(23004), AOM_ICDF(24186),
-            AOM_ICDF(25728), AOM_ICDF(29468), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10240), AOM_ICDF(12264), AOM_ICDF(14507), AOM_ICDF(16388),
-            AOM_ICDF(18888), AOM_ICDF(20927), AOM_ICDF(22731), AOM_ICDF(24691),
-            AOM_ICDF(26142), AOM_ICDF(28394), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8064), AOM_ICDF(10187), AOM_ICDF(12921), AOM_ICDF(15952),
-            AOM_ICDF(19960), AOM_ICDF(19976), AOM_ICDF(21275), AOM_ICDF(23205),
-            AOM_ICDF(25110), AOM_ICDF(26636), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(11488), AOM_ICDF(14065), AOM_ICDF(19113),
-            AOM_ICDF(21604), AOM_ICDF(22978), AOM_ICDF(24508), AOM_ICDF(25895),
-            AOM_ICDF(27398), AOM_ICDF(29055), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(10368), AOM_ICDF(11768), AOM_ICDF(16772), AOM_ICDF(19842),
-            AOM_ICDF(22940), AOM_ICDF(27394), AOM_ICDF(28528), AOM_ICDF(30267),
-            AOM_ICDF(31371), AOM_ICDF(32763), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9472), AOM_ICDF(11292), AOM_ICDF(18507), AOM_ICDF(20777),
-            AOM_ICDF(23357), AOM_ICDF(27587), AOM_ICDF(28902), AOM_ICDF(30850),
-            AOM_ICDF(31607), AOM_ICDF(32763), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8064), AOM_ICDF(9512), AOM_ICDF(13782), AOM_ICDF(20645),
-            AOM_ICDF(24493), AOM_ICDF(26242), AOM_ICDF(28001), AOM_ICDF(29435),
-            AOM_ICDF(30438), AOM_ICDF(32759), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(10541), AOM_ICDF(15664), AOM_ICDF(17639),
-            AOM_ICDF(19646), AOM_ICDF(22145), AOM_ICDF(25216), AOM_ICDF(28815),
-            AOM_ICDF(30050), AOM_ICDF(32757), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(11141), AOM_ICDF(15365), AOM_ICDF(16746),
-            AOM_ICDF(21186), AOM_ICDF(25766), AOM_ICDF(27817), AOM_ICDF(30022),
-            AOM_ICDF(31309), AOM_ICDF(32762), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9216), AOM_ICDF(10688), AOM_ICDF(16639), AOM_ICDF(17735),
-            AOM_ICDF(21499), AOM_ICDF(26657), AOM_ICDF(28161), AOM_ICDF(30572),
-            AOM_ICDF(31490), AOM_ICDF(32763), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(9303), AOM_ICDF(13611), AOM_ICDF(16636),
-            AOM_ICDF(20555), AOM_ICDF(23414), AOM_ICDF(24912), AOM_ICDF(27613),
-            AOM_ICDF(29727), AOM_ICDF(32756), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(11052), AOM_ICDF(16142), AOM_ICDF(19312),
-            AOM_ICDF(21680), AOM_ICDF(23870), AOM_ICDF(25504), AOM_ICDF(28200),
-            AOM_ICDF(29324), AOM_ICDF(32755), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10496), AOM_ICDF(12323), AOM_ICDF(16955), AOM_ICDF(18839),
-            AOM_ICDF(21144), AOM_ICDF(24861), AOM_ICDF(26838), AOM_ICDF(29988),
-            AOM_ICDF(30976), AOM_ICDF(32761), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2944), AOM_ICDF(5973), AOM_ICDF(8904), AOM_ICDF(11875),
-            AOM_ICDF(14864), AOM_ICDF(17853), AOM_ICDF(20824), AOM_ICDF(23810),
-            AOM_ICDF(26784), AOM_ICDF(29776), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7424), AOM_ICDF(10097), AOM_ICDF(15588), AOM_ICDF(20217),
-            AOM_ICDF(23899), AOM_ICDF(26460), AOM_ICDF(28308), AOM_ICDF(30155),
-            AOM_ICDF(30951), AOM_ICDF(32761), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(11648), AOM_ICDF(13133), AOM_ICDF(15050), AOM_ICDF(20481),
-            AOM_ICDF(22470), AOM_ICDF(23425), AOM_ICDF(24337), AOM_ICDF(25160),
-            AOM_ICDF(28964), AOM_ICDF(30480), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10240), AOM_ICDF(12616), AOM_ICDF(16631), AOM_ICDF(20485),
-            AOM_ICDF(22290), AOM_ICDF(23628), AOM_ICDF(25235), AOM_ICDF(26353),
-            AOM_ICDF(28107), AOM_ICDF(29655), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(8002), AOM_ICDF(9066), AOM_ICDF(20038),
-            AOM_ICDF(22926), AOM_ICDF(23324), AOM_ICDF(23951), AOM_ICDF(24537),
-            AOM_ICDF(26916), AOM_ICDF(30231), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11904), AOM_ICDF(14105), AOM_ICDF(15782), AOM_ICDF(19896),
-            AOM_ICDF(22283), AOM_ICDF(23147), AOM_ICDF(24763), AOM_ICDF(25983),
-            AOM_ICDF(27812), AOM_ICDF(29980), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10624), AOM_ICDF(11922), AOM_ICDF(13632), AOM_ICDF(15941),
-            AOM_ICDF(20469), AOM_ICDF(22453), AOM_ICDF(24065), AOM_ICDF(25187),
-            AOM_ICDF(27349), AOM_ICDF(29296), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12032), AOM_ICDF(13085), AOM_ICDF(15468), AOM_ICDF(17768),
-            AOM_ICDF(20613), AOM_ICDF(24388), AOM_ICDF(26385), AOM_ICDF(28430),
-            AOM_ICDF(30938), AOM_ICDF(32761), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9728), AOM_ICDF(10538), AOM_ICDF(11493), AOM_ICDF(14765),
-            AOM_ICDF(18460), AOM_ICDF(19471), AOM_ICDF(20302), AOM_ICDF(20935),
-            AOM_ICDF(28192), AOM_ICDF(29926), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(9890), AOM_ICDF(10962), AOM_ICDF(16685),
-            AOM_ICDF(18880), AOM_ICDF(19480), AOM_ICDF(20674), AOM_ICDF(21477),
-            AOM_ICDF(23815), AOM_ICDF(29341), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(14592), AOM_ICDF(16367), AOM_ICDF(17712), AOM_ICDF(20293),
-            AOM_ICDF(22544), AOM_ICDF(23829), AOM_ICDF(24877), AOM_ICDF(26326),
-            AOM_ICDF(27660), AOM_ICDF(29875), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(10448), AOM_ICDF(12279), AOM_ICDF(16206),
-            AOM_ICDF(18672), AOM_ICDF(18682), AOM_ICDF(20058), AOM_ICDF(21547),
-            AOM_ICDF(25097), AOM_ICDF(27165), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11136), AOM_ICDF(13840), AOM_ICDF(15762), AOM_ICDF(21710),
-            AOM_ICDF(23038), AOM_ICDF(23734), AOM_ICDF(24863), AOM_ICDF(25882),
-            AOM_ICDF(27765), AOM_ICDF(30071), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(12544), AOM_ICDF(14124), AOM_ICDF(16964), AOM_ICDF(21907),
-            AOM_ICDF(23808), AOM_ICDF(24496), AOM_ICDF(25724), AOM_ICDF(26715),
-            AOM_ICDF(27992), AOM_ICDF(30455), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10368), AOM_ICDF(13606), AOM_ICDF(18247), AOM_ICDF(20869),
-            AOM_ICDF(22590), AOM_ICDF(23749), AOM_ICDF(25088), AOM_ICDF(26378),
-            AOM_ICDF(27277), AOM_ICDF(29808), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(11031), AOM_ICDF(12899), AOM_ICDF(23497),
-            AOM_ICDF(24465), AOM_ICDF(24851), AOM_ICDF(25995), AOM_ICDF(26815),
-            AOM_ICDF(27885), AOM_ICDF(30555), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11520), AOM_ICDF(14342), AOM_ICDF(15710), AOM_ICDF(19196),
-            AOM_ICDF(21250), AOM_ICDF(21907), AOM_ICDF(24665), AOM_ICDF(26153),
-            AOM_ICDF(27212), AOM_ICDF(30750), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(11764), AOM_ICDF(13979), AOM_ICDF(16405),
-            AOM_ICDF(19279), AOM_ICDF(20658), AOM_ICDF(23354), AOM_ICDF(25266),
-            AOM_ICDF(26702), AOM_ICDF(29380), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10112), AOM_ICDF(12325), AOM_ICDF(15918), AOM_ICDF(19060),
-            AOM_ICDF(21829), AOM_ICDF(23882), AOM_ICDF(26277), AOM_ICDF(27697),
-            AOM_ICDF(30114), AOM_ICDF(32758), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9344), AOM_ICDF(10534), AOM_ICDF(12184), AOM_ICDF(16208),
-            AOM_ICDF(19764), AOM_ICDF(20627), AOM_ICDF(22524), AOM_ICDF(23644),
-            AOM_ICDF(26887), AOM_ICDF(29782), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(12928), AOM_ICDF(14013), AOM_ICDF(15625), AOM_ICDF(19107),
-            AOM_ICDF(20654), AOM_ICDF(21451), AOM_ICDF(22910), AOM_ICDF(23873),
-            AOM_ICDF(24776), AOM_ICDF(30239), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10368), AOM_ICDF(12818), AOM_ICDF(14610), AOM_ICDF(17350),
-            AOM_ICDF(19568), AOM_ICDF(20710), AOM_ICDF(22971), AOM_ICDF(25114),
-            AOM_ICDF(26340), AOM_ICDF(29127), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(11192), AOM_ICDF(13720), AOM_ICDF(18429),
-            AOM_ICDF(20409), AOM_ICDF(20417), AOM_ICDF(22250), AOM_ICDF(23318),
-            AOM_ICDF(24647), AOM_ICDF(27248), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7808), AOM_ICDF(11416), AOM_ICDF(13918), AOM_ICDF(19028),
-            AOM_ICDF(20181), AOM_ICDF(20839), AOM_ICDF(24380), AOM_ICDF(26018),
-            AOM_ICDF(26967), AOM_ICDF(29845), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(9856), AOM_ICDF(11020), AOM_ICDF(14928), AOM_ICDF(18159),
-            AOM_ICDF(19421), AOM_ICDF(20921), AOM_ICDF(23466), AOM_ICDF(26664),
-            AOM_ICDF(27475), AOM_ICDF(28881), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(10302), AOM_ICDF(17323), AOM_ICDF(18907),
-            AOM_ICDF(19868), AOM_ICDF(21184), AOM_ICDF(24171), AOM_ICDF(28033),
-            AOM_ICDF(28625), AOM_ICDF(29353), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7936), AOM_ICDF(9197), AOM_ICDF(12604), AOM_ICDF(20616),
-            AOM_ICDF(21514), AOM_ICDF(22371), AOM_ICDF(24239), AOM_ICDF(26138),
-            AOM_ICDF(26863), AOM_ICDF(29239), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11264), AOM_ICDF(12524), AOM_ICDF(16083), AOM_ICDF(18574),
-            AOM_ICDF(19858), AOM_ICDF(20841), AOM_ICDF(24242), AOM_ICDF(27606),
-            AOM_ICDF(28352), AOM_ICDF(29853), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(10208), AOM_ICDF(13292), AOM_ICDF(15170),
-            AOM_ICDF(17277), AOM_ICDF(19226), AOM_ICDF(22083), AOM_ICDF(25046),
-            AOM_ICDF(26041), AOM_ICDF(27802), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(10568), AOM_ICDF(15511), AOM_ICDF(17246),
-            AOM_ICDF(20170), AOM_ICDF(22791), AOM_ICDF(25558), AOM_ICDF(30740),
-            AOM_ICDF(31635), AOM_ICDF(32764), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7040), AOM_ICDF(8045), AOM_ICDF(10653), AOM_ICDF(13145),
-            AOM_ICDF(15286), AOM_ICDF(16614), AOM_ICDF(19075), AOM_ICDF(23140),
-            AOM_ICDF(26224), AOM_ICDF(28652), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(10240), AOM_ICDF(11032), AOM_ICDF(14258), AOM_ICDF(17629),
-            AOM_ICDF(18914), AOM_ICDF(19898), AOM_ICDF(22412), AOM_ICDF(24961),
-            AOM_ICDF(25815), AOM_ICDF(29156), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(11008), AOM_ICDF(12028), AOM_ICDF(14702), AOM_ICDF(16147),
-            AOM_ICDF(17209), AOM_ICDF(18160), AOM_ICDF(21812), AOM_ICDF(27547),
-            AOM_ICDF(28709), AOM_ICDF(30120), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7168), AOM_ICDF(9068), AOM_ICDF(14160), AOM_ICDF(16937),
-            AOM_ICDF(18515), AOM_ICDF(18521), AOM_ICDF(20636), AOM_ICDF(24617),
-            AOM_ICDF(25317), AOM_ICDF(26365), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(8510), AOM_ICDF(14195), AOM_ICDF(17148),
-            AOM_ICDF(18158), AOM_ICDF(19201), AOM_ICDF(23070), AOM_ICDF(27351),
-            AOM_ICDF(27901), AOM_ICDF(29422), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(10112), AOM_ICDF(11528), AOM_ICDF(15345), AOM_ICDF(19296),
-            AOM_ICDF(21394), AOM_ICDF(21402), AOM_ICDF(22379), AOM_ICDF(23840),
-            AOM_ICDF(24851), AOM_ICDF(26150), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8064), AOM_ICDF(10187), AOM_ICDF(17949), AOM_ICDF(20052),
-            AOM_ICDF(22051), AOM_ICDF(22059), AOM_ICDF(23147), AOM_ICDF(24688),
-            AOM_ICDF(25351), AOM_ICDF(26365), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6528), AOM_ICDF(8373), AOM_ICDF(11041), AOM_ICDF(21963),
-            AOM_ICDF(23089), AOM_ICDF(23093), AOM_ICDF(24076), AOM_ICDF(24925),
-            AOM_ICDF(25691), AOM_ICDF(27764), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9600), AOM_ICDF(11229), AOM_ICDF(14847), AOM_ICDF(17527),
-            AOM_ICDF(19738), AOM_ICDF(19747), AOM_ICDF(21629), AOM_ICDF(23761),
-            AOM_ICDF(24957), AOM_ICDF(27673), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8960), AOM_ICDF(10262), AOM_ICDF(13339), AOM_ICDF(15480),
-            AOM_ICDF(19925), AOM_ICDF(19942), AOM_ICDF(21445), AOM_ICDF(23037),
-            AOM_ICDF(24329), AOM_ICDF(25977), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(2944), AOM_ICDF(5973), AOM_ICDF(8904), AOM_ICDF(11875),
-            AOM_ICDF(14864), AOM_ICDF(17853), AOM_ICDF(20824), AOM_ICDF(23810),
-            AOM_ICDF(26784), AOM_ICDF(29776), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9472), AOM_ICDF(10564), AOM_ICDF(13426), AOM_ICDF(16561),
-            AOM_ICDF(19685), AOM_ICDF(19697), AOM_ICDF(21076), AOM_ICDF(22583),
-            AOM_ICDF(24891), AOM_ICDF(26983), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(9493), AOM_ICDF(12221), AOM_ICDF(16542),
-            AOM_ICDF(18394), AOM_ICDF(18401), AOM_ICDF(19580), AOM_ICDF(20971),
-            AOM_ICDF(22031), AOM_ICDF(26770), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8704), AOM_ICDF(10772), AOM_ICDF(14209), AOM_ICDF(16381),
-            AOM_ICDF(18911), AOM_ICDF(18921), AOM_ICDF(20436), AOM_ICDF(23374),
-            AOM_ICDF(24475), AOM_ICDF(26095), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(7680), AOM_ICDF(9444), AOM_ICDF(13453), AOM_ICDF(16320),
-            AOM_ICDF(18650), AOM_ICDF(18659), AOM_ICDF(19651), AOM_ICDF(21291),
-            AOM_ICDF(22277), AOM_ICDF(23916), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(9920), AOM_ICDF(14740), AOM_ICDF(19864),
-            AOM_ICDF(21495), AOM_ICDF(21501), AOM_ICDF(22953), AOM_ICDF(24372),
-            AOM_ICDF(25192), AOM_ICDF(26760), AOM_ICDF(32768), 0 },
-      },
-      {
-          { AOM_ICDF(9728), AOM_ICDF(13958), AOM_ICDF(18881), AOM_ICDF(23624),
-            AOM_ICDF(24754), AOM_ICDF(25553), AOM_ICDF(26709), AOM_ICDF(27940),
-            AOM_ICDF(28977), AOM_ICDF(30413), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8832), AOM_ICDF(12572), AOM_ICDF(22433), AOM_ICDF(24653),
-            AOM_ICDF(25676), AOM_ICDF(26551), AOM_ICDF(27571), AOM_ICDF(28688),
-            AOM_ICDF(29198), AOM_ICDF(30174), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(5888), AOM_ICDF(8828), AOM_ICDF(11353), AOM_ICDF(23482),
-            AOM_ICDF(24310), AOM_ICDF(24737), AOM_ICDF(25804), AOM_ICDF(26375),
-            AOM_ICDF(27174), AOM_ICDF(29840), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9984), AOM_ICDF(13099), AOM_ICDF(16249), AOM_ICDF(19443),
-            AOM_ICDF(20990), AOM_ICDF(22637), AOM_ICDF(24576), AOM_ICDF(25952),
-            AOM_ICDF(26884), AOM_ICDF(29435), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8448), AOM_ICDF(11108), AOM_ICDF(15085), AOM_ICDF(18134),
-            AOM_ICDF(20319), AOM_ICDF(21992), AOM_ICDF(23549), AOM_ICDF(24989),
-            AOM_ICDF(27177), AOM_ICDF(29208), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9856), AOM_ICDF(13168), AOM_ICDF(18987), AOM_ICDF(22481),
-            AOM_ICDF(24282), AOM_ICDF(26200), AOM_ICDF(27868), AOM_ICDF(30203),
-            AOM_ICDF(31085), AOM_ICDF(32761), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6784), AOM_ICDF(9119), AOM_ICDF(12629), AOM_ICDF(16877),
-            AOM_ICDF(20262), AOM_ICDF(21125), AOM_ICDF(22307), AOM_ICDF(23615),
-            AOM_ICDF(27727), AOM_ICDF(29972), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(8320), AOM_ICDF(10230), AOM_ICDF(12783), AOM_ICDF(19005),
-            AOM_ICDF(20213), AOM_ICDF(20668), AOM_ICDF(22039), AOM_ICDF(23045),
-            AOM_ICDF(24146), AOM_ICDF(30478), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(9088), AOM_ICDF(11308), AOM_ICDF(15416), AOM_ICDF(18118),
-            AOM_ICDF(19762), AOM_ICDF(20906), AOM_ICDF(22574), AOM_ICDF(25162),
-            AOM_ICDF(25994), AOM_ICDF(28455), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6912), AOM_ICDF(10548), AOM_ICDF(15148), AOM_ICDF(20026),
-            AOM_ICDF(21612), AOM_ICDF(21618), AOM_ICDF(22707), AOM_ICDF(24200),
-            AOM_ICDF(24869), AOM_ICDF(26844), AOM_ICDF(32768), 0 },
-          { AOM_ICDF(6656), AOM_ICDF(12164), AOM_ICDF(16993), AOM_ICDF(21568),
-            AOM_ICDF(22933), AOM_ICDF(23648), AOM_ICDF(25322), AOM_ICDF(26602),
-            AOM_ICDF(27806), AOM_ICDF(29841), AOM_ICDF(32768), 0 },
+          AOM_ICDF(8960), AOM_ICDF(14083), AOM_ICDF(16058), AOM_ICDF(19129),
+          AOM_ICDF(21136), AOM_ICDF(23635), AOM_ICDF(24870), AOM_ICDF(27577),
+          AOM_ICDF(31176), AOM_ICDF(31187), AOM_ICDF(32768), 0,
       },
+      {
+          AOM_ICDF(9984), AOM_ICDF(14208), AOM_ICDF(15589), AOM_ICDF(17640),
+          AOM_ICDF(22080), AOM_ICDF(26660), AOM_ICDF(27947), AOM_ICDF(29400),
+          AOM_ICDF(31605), AOM_ICDF(31611), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9216), AOM_ICDF(15167), AOM_ICDF(16263), AOM_ICDF(17767),
+          AOM_ICDF(21531), AOM_ICDF(26689), AOM_ICDF(27607), AOM_ICDF(28880),
+          AOM_ICDF(31291), AOM_ICDF(31296), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(12756), AOM_ICDF(15781), AOM_ICDF(17279),
+          AOM_ICDF(21198), AOM_ICDF(24057), AOM_ICDF(26171), AOM_ICDF(29200),
+          AOM_ICDF(31901), AOM_ICDF(31913), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9984), AOM_ICDF(15074), AOM_ICDF(18244), AOM_ICDF(19878),
+          AOM_ICDF(22246), AOM_ICDF(24436), AOM_ICDF(25560), AOM_ICDF(28991),
+          AOM_ICDF(31687), AOM_ICDF(31700), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10496), AOM_ICDF(15128), AOM_ICDF(17012), AOM_ICDF(18989),
+          AOM_ICDF(21294), AOM_ICDF(25011), AOM_ICDF(25999), AOM_ICDF(27784),
+          AOM_ICDF(30934), AOM_ICDF(30941), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2944), AOM_ICDF(5875), AOM_ICDF(8846), AOM_ICDF(11817),
+          AOM_ICDF(14806), AOM_ICDF(17795), AOM_ICDF(20769), AOM_ICDF(23761),
+          AOM_ICDF(26747), AOM_ICDF(29739), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7424), AOM_ICDF(12915), AOM_ICDF(17544), AOM_ICDF(19392),
+          AOM_ICDF(23074), AOM_ICDF(25635), AOM_ICDF(26431), AOM_ICDF(28241),
+          AOM_ICDF(30088), AOM_ICDF(30095), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(11648), AOM_ICDF(13565), AOM_ICDF(18996), AOM_ICDF(19908),
+          AOM_ICDF(21897), AOM_ICDF(22852), AOM_ICDF(26656), AOM_ICDF(28172),
+          AOM_ICDF(28995), AOM_ICDF(31283), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10240), AOM_ICDF(14255), AOM_ICDF(18109), AOM_ICDF(19716),
+          AOM_ICDF(21521), AOM_ICDF(22859), AOM_ICDF(24613), AOM_ICDF(26161),
+          AOM_ICDF(27279), AOM_ICDF(30392), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(7848), AOM_ICDF(18820), AOM_ICDF(19447),
+          AOM_ICDF(22335), AOM_ICDF(22733), AOM_ICDF(25112), AOM_ICDF(28427),
+          AOM_ICDF(29013), AOM_ICDF(31550), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11904), AOM_ICDF(13581), AOM_ICDF(17695), AOM_ICDF(19311),
+          AOM_ICDF(21698), AOM_ICDF(22562), AOM_ICDF(24391), AOM_ICDF(26559),
+          AOM_ICDF(27779), AOM_ICDF(30567), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10624), AOM_ICDF(12334), AOM_ICDF(14643), AOM_ICDF(16255),
+          AOM_ICDF(20783), AOM_ICDF(22767), AOM_ICDF(24929), AOM_ICDF(26876),
+          AOM_ICDF(27998), AOM_ICDF(31470), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12032), AOM_ICDF(14415), AOM_ICDF(16715), AOM_ICDF(18712),
+          AOM_ICDF(21557), AOM_ICDF(25332), AOM_ICDF(27840), AOM_ICDF(29663),
+          AOM_ICDF(31708), AOM_ICDF(31715), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9728), AOM_ICDF(10683), AOM_ICDF(13955), AOM_ICDF(14786),
+          AOM_ICDF(18481), AOM_ICDF(19492), AOM_ICDF(26749), AOM_ICDF(28483),
+          AOM_ICDF(29116), AOM_ICDF(31958), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(10032), AOM_ICDF(15755), AOM_ICDF(16949),
+          AOM_ICDF(19144), AOM_ICDF(19744), AOM_ICDF(22082), AOM_ICDF(27608),
+          AOM_ICDF(28411), AOM_ICDF(31838), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(14592), AOM_ICDF(15937), AOM_ICDF(18518), AOM_ICDF(19566),
+          AOM_ICDF(21817), AOM_ICDF(23102), AOM_ICDF(24436), AOM_ICDF(26651),
+          AOM_ICDF(28100), AOM_ICDF(30993), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(10791), AOM_ICDF(14718), AOM_ICDF(16094),
+          AOM_ICDF(18560), AOM_ICDF(18570), AOM_ICDF(22120), AOM_ICDF(24188),
+          AOM_ICDF(25677), AOM_ICDF(31280), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11136), AOM_ICDF(13058), AOM_ICDF(19006), AOM_ICDF(20135),
+          AOM_ICDF(21463), AOM_ICDF(22159), AOM_ICDF(24042), AOM_ICDF(26348),
+          AOM_ICDF(27367), AOM_ICDF(30064), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(12544), AOM_ICDF(15384), AOM_ICDF(20327), AOM_ICDF(21555),
+          AOM_ICDF(23456), AOM_ICDF(24144), AOM_ICDF(25421), AOM_ICDF(27884),
+          AOM_ICDF(28875), AOM_ICDF(31188), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10368), AOM_ICDF(15009), AOM_ICDF(17631), AOM_ICDF(18970),
+          AOM_ICDF(20691), AOM_ICDF(21850), AOM_ICDF(22749), AOM_ICDF(25280),
+          AOM_ICDF(26570), AOM_ICDF(29530), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(10956), AOM_ICDF(21554), AOM_ICDF(22698),
+          AOM_ICDF(23666), AOM_ICDF(24052), AOM_ICDF(25122), AOM_ICDF(27792),
+          AOM_ICDF(28612), AOM_ICDF(30825), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11520), AOM_ICDF(12888), AOM_ICDF(16374), AOM_ICDF(19132),
+          AOM_ICDF(21186), AOM_ICDF(21843), AOM_ICDF(22902), AOM_ICDF(26440),
+          AOM_ICDF(27928), AOM_ICDF(29946), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9984), AOM_ICDF(12199), AOM_ICDF(14625), AOM_ICDF(17321),
+          AOM_ICDF(20195), AOM_ICDF(21574), AOM_ICDF(23010), AOM_ICDF(25688),
+          AOM_ICDF(27600), AOM_ICDF(30988), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10112), AOM_ICDF(13705), AOM_ICDF(16847), AOM_ICDF(19242),
+          AOM_ICDF(22011), AOM_ICDF(24064), AOM_ICDF(26481), AOM_ICDF(29125),
+          AOM_ICDF(30545), AOM_ICDF(30555), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9344), AOM_ICDF(10994), AOM_ICDF(15018), AOM_ICDF(16915),
+          AOM_ICDF(20471), AOM_ICDF(21334), AOM_ICDF(24577), AOM_ICDF(27472),
+          AOM_ICDF(28592), AOM_ICDF(31578), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(12928), AOM_ICDF(14540), AOM_ICDF(18022), AOM_ICDF(19481),
+          AOM_ICDF(21028), AOM_ICDF(21825), AOM_ICDF(22728), AOM_ICDF(28191),
+          AOM_ICDF(29154), AOM_ICDF(31683), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10368), AOM_ICDF(12160), AOM_ICDF(14900), AOM_ICDF(17161),
+          AOM_ICDF(19379), AOM_ICDF(20521), AOM_ICDF(21747), AOM_ICDF(24534),
+          AOM_ICDF(26677), AOM_ICDF(30318), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(11488), AOM_ICDF(16197), AOM_ICDF(18030),
+          AOM_ICDF(20010), AOM_ICDF(20018), AOM_ICDF(21347), AOM_ICDF(23948),
+          AOM_ICDF(25016), AOM_ICDF(30536), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7808), AOM_ICDF(10310), AOM_ICDF(15420), AOM_ICDF(18961),
+          AOM_ICDF(20114), AOM_ICDF(20772), AOM_ICDF(21721), AOM_ICDF(24599),
+          AOM_ICDF(26237), AOM_ICDF(29160), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(9856), AOM_ICDF(13764), AOM_ICDF(16995), AOM_ICDF(19540),
+          AOM_ICDF(20802), AOM_ICDF(22302), AOM_ICDF(23113), AOM_ICDF(24519),
+          AOM_ICDF(27717), AOM_ICDF(31604), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(15725), AOM_ICDF(17309), AOM_ICDF(20296),
+          AOM_ICDF(21257), AOM_ICDF(22573), AOM_ICDF(23165), AOM_ICDF(23893),
+          AOM_ICDF(27755), AOM_ICDF(31170), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7936), AOM_ICDF(11343), AOM_ICDF(19355), AOM_ICDF(21223),
+          AOM_ICDF(22121), AOM_ICDF(22978), AOM_ICDF(23703), AOM_ICDF(26079),
+          AOM_ICDF(27978), AOM_ICDF(31507), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11264), AOM_ICDF(14823), AOM_ICDF(17314), AOM_ICDF(20715),
+          AOM_ICDF(21999), AOM_ICDF(22982), AOM_ICDF(23728), AOM_ICDF(25229),
+          AOM_ICDF(28593), AOM_ICDF(31508), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(11788), AOM_ICDF(13666), AOM_ICDF(16523),
+          AOM_ICDF(18630), AOM_ICDF(20579), AOM_ICDF(21574), AOM_ICDF(23335),
+          AOM_ICDF(26298), AOM_ICDF(31264), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(14031), AOM_ICDF(15766), AOM_ICDF(18533),
+          AOM_ICDF(21457), AOM_ICDF(24078), AOM_ICDF(24973), AOM_ICDF(26102),
+          AOM_ICDF(31284), AOM_ICDF(31288), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7040), AOM_ICDF(9648), AOM_ICDF(12140), AOM_ICDF(14601),
+          AOM_ICDF(16742), AOM_ICDF(18070), AOM_ICDF(21154), AOM_ICDF(23582),
+          AOM_ICDF(27647), AOM_ICDF(31763), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(10240), AOM_ICDF(13466), AOM_ICDF(16837), AOM_ICDF(19351),
+          AOM_ICDF(20636), AOM_ICDF(21620), AOM_ICDF(22474), AOM_ICDF(25815),
+          AOM_ICDF(28364), AOM_ICDF(31976), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(11008), AOM_ICDF(13682), AOM_ICDF(15127), AOM_ICDF(18779),
+          AOM_ICDF(19841), AOM_ICDF(20792), AOM_ICDF(21954), AOM_ICDF(23365),
+          AOM_ICDF(29100), AOM_ICDF(31748), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7168), AOM_ICDF(12260), AOM_ICDF(15037), AOM_ICDF(17152),
+          AOM_ICDF(18730), AOM_ICDF(18736), AOM_ICDF(19436), AOM_ICDF(20484),
+          AOM_ICDF(24465), AOM_ICDF(30868), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(12469), AOM_ICDF(15422), AOM_ICDF(19291),
+          AOM_ICDF(20301), AOM_ICDF(21344), AOM_ICDF(21894), AOM_ICDF(23415),
+          AOM_ICDF(27696), AOM_ICDF(31042), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(10112), AOM_ICDF(13929), AOM_ICDF(17880), AOM_ICDF(18857),
+          AOM_ICDF(20955), AOM_ICDF(20963), AOM_ICDF(21974), AOM_ICDF(23273),
+          AOM_ICDF(24734), AOM_ICDF(31352), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8064), AOM_ICDF(15826), AOM_ICDF(17929), AOM_ICDF(19017),
+          AOM_ICDF(21016), AOM_ICDF(21024), AOM_ICDF(21687), AOM_ICDF(22701),
+          AOM_ICDF(24242), AOM_ICDF(30645), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6528), AOM_ICDF(9196), AOM_ICDF(20118), AOM_ICDF(21101),
+          AOM_ICDF(22227), AOM_ICDF(22231), AOM_ICDF(22997), AOM_ICDF(25070),
+          AOM_ICDF(25919), AOM_ICDF(30923), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9600), AOM_ICDF(13218), AOM_ICDF(15898), AOM_ICDF(17780),
+          AOM_ICDF(19991), AOM_ICDF(20000), AOM_ICDF(21196), AOM_ICDF(23912),
+          AOM_ICDF(26044), AOM_ICDF(31139), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8960), AOM_ICDF(12037), AOM_ICDF(14178), AOM_ICDF(15681),
+          AOM_ICDF(20126), AOM_ICDF(20143), AOM_ICDF(21435), AOM_ICDF(23083),
+          AOM_ICDF(24675), AOM_ICDF(31466), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(2944), AOM_ICDF(5875), AOM_ICDF(8846), AOM_ICDF(11817),
+          AOM_ICDF(14806), AOM_ICDF(17795), AOM_ICDF(20769), AOM_ICDF(23761),
+          AOM_ICDF(26747), AOM_ICDF(29739), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9472), AOM_ICDF(12334), AOM_ICDF(15469), AOM_ICDF(16848),
+          AOM_ICDF(19972), AOM_ICDF(19984), AOM_ICDF(22292), AOM_ICDF(24384),
+          AOM_ICDF(25891), AOM_ICDF(31676), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(11176), AOM_ICDF(15497), AOM_ICDF(16676),
+          AOM_ICDF(18528), AOM_ICDF(18535), AOM_ICDF(19595), AOM_ICDF(24334),
+          AOM_ICDF(25725), AOM_ICDF(31723), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8704), AOM_ICDF(12141), AOM_ICDF(14313), AOM_ICDF(15828),
+          AOM_ICDF(18358), AOM_ICDF(18368), AOM_ICDF(19469), AOM_ICDF(21089),
+          AOM_ICDF(24027), AOM_ICDF(30700), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(7680), AOM_ICDF(11689), AOM_ICDF(14556), AOM_ICDF(15548),
+          AOM_ICDF(17878), AOM_ICDF(17887), AOM_ICDF(18873), AOM_ICDF(20512),
+          AOM_ICDF(22152), AOM_ICDF(31004), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(11476), AOM_ICDF(16600), AOM_ICDF(18052),
+          AOM_ICDF(19683), AOM_ICDF(19689), AOM_ICDF(20509), AOM_ICDF(22077),
+          AOM_ICDF(23496), AOM_ICDF(29504), AOM_ICDF(32768), 0,
+      },
+  },
+  {
+      {
+          AOM_ICDF(9728), AOM_ICDF(14651), AOM_ICDF(19394), AOM_ICDF(20550),
+          AOM_ICDF(21680), AOM_ICDF(22479), AOM_ICDF(23516), AOM_ICDF(24952),
+          AOM_ICDF(26183), AOM_ICDF(28538), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8832), AOM_ICDF(18693), AOM_ICDF(20913), AOM_ICDF(21933),
+          AOM_ICDF(22956), AOM_ICDF(23831), AOM_ICDF(24341), AOM_ICDF(25317),
+          AOM_ICDF(26434), AOM_ICDF(29028), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(5888), AOM_ICDF(8413), AOM_ICDF(20542), AOM_ICDF(21609),
+          AOM_ICDF(22437), AOM_ICDF(22864), AOM_ICDF(23663), AOM_ICDF(26329),
+          AOM_ICDF(26900), AOM_ICDF(29828), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9984), AOM_ICDF(13134), AOM_ICDF(16328), AOM_ICDF(18267),
+          AOM_ICDF(19814), AOM_ICDF(21461), AOM_ICDF(22393), AOM_ICDF(24944),
+          AOM_ICDF(26320), AOM_ICDF(29653), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8448), AOM_ICDF(12425), AOM_ICDF(15474), AOM_ICDF(17031),
+          AOM_ICDF(19216), AOM_ICDF(20889), AOM_ICDF(23077), AOM_ICDF(25108),
+          AOM_ICDF(26548), AOM_ICDF(30108), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9856), AOM_ICDF(15675), AOM_ICDF(19169), AOM_ICDF(20837),
+          AOM_ICDF(22638), AOM_ICDF(24556), AOM_ICDF(25438), AOM_ICDF(27114),
+          AOM_ICDF(29449), AOM_ICDF(29456), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6784), AOM_ICDF(10294), AOM_ICDF(14542), AOM_ICDF(15724),
+          AOM_ICDF(19109), AOM_ICDF(19972), AOM_ICDF(24084), AOM_ICDF(26329),
+          AOM_ICDF(27637), AOM_ICDF(30433), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(8320), AOM_ICDF(10873), AOM_ICDF(17095), AOM_ICDF(18466),
+          AOM_ICDF(19674), AOM_ICDF(20129), AOM_ICDF(21230), AOM_ICDF(27562),
+          AOM_ICDF(28568), AOM_ICDF(30858), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(9088), AOM_ICDF(13196), AOM_ICDF(15898), AOM_ICDF(17566),
+          AOM_ICDF(19210), AOM_ICDF(20354), AOM_ICDF(21186), AOM_ICDF(23647),
+          AOM_ICDF(26235), AOM_ICDF(30548), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6912), AOM_ICDF(11512), AOM_ICDF(16390), AOM_ICDF(17479),
+          AOM_ICDF(19065), AOM_ICDF(19071), AOM_ICDF(19740), AOM_ICDF(21715),
+          AOM_ICDF(23208), AOM_ICDF(29132), AOM_ICDF(32768), 0,
+      },
+      {
+          AOM_ICDF(6656), AOM_ICDF(11485), AOM_ICDF(16060), AOM_ICDF(17734),
+          AOM_ICDF(19099), AOM_ICDF(19814), AOM_ICDF(21018), AOM_ICDF(23053),
+          AOM_ICDF(24333), AOM_ICDF(27260), AOM_ICDF(32768), 0,
+      },
+  },
 #endif  // CONFIG_SMOOTH_HV
-#else   // CONFIG_ALT_INTRA
-      { { AOM_ICDF(17536), AOM_ICDF(19321), AOM_ICDF(21527), AOM_ICDF(25360),
-          AOM_ICDF(27516), AOM_ICDF(28026), AOM_ICDF(29323), AOM_ICDF(30023),
-          AOM_ICDF(30999), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(11776), AOM_ICDF(15466), AOM_ICDF(22360), AOM_ICDF(24865),
-          AOM_ICDF(26991), AOM_ICDF(27889), AOM_ICDF(29299), AOM_ICDF(30519),
-          AOM_ICDF(31398), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9344), AOM_ICDF(12272), AOM_ICDF(13793), AOM_ICDF(25813),
-          AOM_ICDF(27359), AOM_ICDF(27654), AOM_ICDF(28573), AOM_ICDF(29130),
-          AOM_ICDF(30551), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(11648), AOM_ICDF(14123), AOM_ICDF(16454), AOM_ICDF(19948),
-          AOM_ICDF(22780), AOM_ICDF(23846), AOM_ICDF(27087), AOM_ICDF(28995),
-          AOM_ICDF(30380), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9216), AOM_ICDF(12436), AOM_ICDF(15295), AOM_ICDF(17996),
-          AOM_ICDF(24006), AOM_ICDF(25465), AOM_ICDF(27405), AOM_ICDF(28725),
-          AOM_ICDF(30383), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9344), AOM_ICDF(12181), AOM_ICDF(14433), AOM_ICDF(16634),
-          AOM_ICDF(20355), AOM_ICDF(24317), AOM_ICDF(26133), AOM_ICDF(29295),
-          AOM_ICDF(31344), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(8576), AOM_ICDF(10750), AOM_ICDF(12556), AOM_ICDF(17996),
-          AOM_ICDF(22315), AOM_ICDF(23609), AOM_ICDF(25040), AOM_ICDF(26157),
-          AOM_ICDF(30573), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(11008), AOM_ICDF(13303), AOM_ICDF(15432), AOM_ICDF(20646),
-          AOM_ICDF(23506), AOM_ICDF(24100), AOM_ICDF(25624), AOM_ICDF(26824),
-          AOM_ICDF(28055), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9472), AOM_ICDF(12384), AOM_ICDF(14534), AOM_ICDF(17094),
-          AOM_ICDF(20257), AOM_ICDF(22155), AOM_ICDF(24767), AOM_ICDF(28955),
-          AOM_ICDF(30474), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7552), AOM_ICDF(14152), AOM_ICDF(17352), AOM_ICDF(22654),
-          AOM_ICDF(25123), AOM_ICDF(25783), AOM_ICDF(27911), AOM_ICDF(29182),
-          AOM_ICDF(30849), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(8064), AOM_ICDF(11538), AOM_ICDF(21987), AOM_ICDF(24941),
-          AOM_ICDF(26913), AOM_ICDF(28136), AOM_ICDF(29222), AOM_ICDF(30469),
-          AOM_ICDF(31331), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5504), AOM_ICDF(10403), AOM_ICDF(25080), AOM_ICDF(26762),
-          AOM_ICDF(27933), AOM_ICDF(29104), AOM_ICDF(30092), AOM_ICDF(31576),
-          AOM_ICDF(32004), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5632), AOM_ICDF(8706), AOM_ICDF(15097), AOM_ICDF(23714),
-          AOM_ICDF(25344), AOM_ICDF(26072), AOM_ICDF(27380), AOM_ICDF(28580),
-          AOM_ICDF(29840), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7424), AOM_ICDF(11186), AOM_ICDF(17593), AOM_ICDF(20154),
-          AOM_ICDF(22974), AOM_ICDF(24351), AOM_ICDF(26916), AOM_ICDF(29956),
-          AOM_ICDF(30967), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5888), AOM_ICDF(10193), AOM_ICDF(16895), AOM_ICDF(19031),
-          AOM_ICDF(23735), AOM_ICDF(25576), AOM_ICDF(27514), AOM_ICDF(29813),
-          AOM_ICDF(30471), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4864), AOM_ICDF(8352), AOM_ICDF(16459), AOM_ICDF(18062),
-          AOM_ICDF(21263), AOM_ICDF(25378), AOM_ICDF(26937), AOM_ICDF(30376),
-          AOM_ICDF(31619), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4992), AOM_ICDF(7922), AOM_ICDF(13842), AOM_ICDF(18004),
-          AOM_ICDF(21779), AOM_ICDF(23527), AOM_ICDF(25115), AOM_ICDF(27357),
-          AOM_ICDF(30232), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6656), AOM_ICDF(9716), AOM_ICDF(16379), AOM_ICDF(20053),
-          AOM_ICDF(22487), AOM_ICDF(23613), AOM_ICDF(25437), AOM_ICDF(27270),
-          AOM_ICDF(28516), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(9674), AOM_ICDF(16891), AOM_ICDF(18684),
-          AOM_ICDF(21147), AOM_ICDF(23093), AOM_ICDF(25512), AOM_ICDF(30132),
-          AOM_ICDF(30894), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4608), AOM_ICDF(11318), AOM_ICDF(21038), AOM_ICDF(23650),
-          AOM_ICDF(25303), AOM_ICDF(26262), AOM_ICDF(28295), AOM_ICDF(30479),
-          AOM_ICDF(31212), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(10496), AOM_ICDF(12758), AOM_ICDF(14790), AOM_ICDF(24547),
-          AOM_ICDF(26342), AOM_ICDF(26799), AOM_ICDF(27825), AOM_ICDF(28443),
-          AOM_ICDF(30217), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7040), AOM_ICDF(11462), AOM_ICDF(17121), AOM_ICDF(24215),
-          AOM_ICDF(26504), AOM_ICDF(27267), AOM_ICDF(28492), AOM_ICDF(29444),
-          AOM_ICDF(30846), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5376), AOM_ICDF(8158), AOM_ICDF(9215), AOM_ICDF(26451),
-          AOM_ICDF(27407), AOM_ICDF(27524), AOM_ICDF(27995), AOM_ICDF(28275),
-          AOM_ICDF(29767), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(8704), AOM_ICDF(12652), AOM_ICDF(14145), AOM_ICDF(20101),
-          AOM_ICDF(22879), AOM_ICDF(23675), AOM_ICDF(25629), AOM_ICDF(27079),
-          AOM_ICDF(28923), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7424), AOM_ICDF(12374), AOM_ICDF(14366), AOM_ICDF(18855),
-          AOM_ICDF(23842), AOM_ICDF(24358), AOM_ICDF(25639), AOM_ICDF(27087),
-          AOM_ICDF(29706), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6400), AOM_ICDF(10005), AOM_ICDF(12939), AOM_ICDF(17753),
-          AOM_ICDF(22206), AOM_ICDF(24790), AOM_ICDF(26785), AOM_ICDF(28164),
-          AOM_ICDF(30520), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5632), AOM_ICDF(8176), AOM_ICDF(9713), AOM_ICDF(19053),
-          AOM_ICDF(22343), AOM_ICDF(23222), AOM_ICDF(24453), AOM_ICDF(25070),
-          AOM_ICDF(29761), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7040), AOM_ICDF(9754), AOM_ICDF(10833), AOM_ICDF(21229),
-          AOM_ICDF(23540), AOM_ICDF(23943), AOM_ICDF(24839), AOM_ICDF(25675),
-          AOM_ICDF(27033), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6784), AOM_ICDF(11758), AOM_ICDF(13481), AOM_ICDF(17236),
-          AOM_ICDF(20210), AOM_ICDF(21768), AOM_ICDF(24303), AOM_ICDF(26948),
-          AOM_ICDF(28676), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4864), AOM_ICDF(12712), AOM_ICDF(14201), AOM_ICDF(23863),
-          AOM_ICDF(25952), AOM_ICDF(26386), AOM_ICDF(27632), AOM_ICDF(28635),
-          AOM_ICDF(30362), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(13184), AOM_ICDF(15173), AOM_ICDF(17647), AOM_ICDF(21576),
-          AOM_ICDF(24474), AOM_ICDF(25267), AOM_ICDF(27699), AOM_ICDF(29283),
-          AOM_ICDF(30549), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7552), AOM_ICDF(11295), AOM_ICDF(18257), AOM_ICDF(20811),
-          AOM_ICDF(23213), AOM_ICDF(24606), AOM_ICDF(27731), AOM_ICDF(30407),
-          AOM_ICDF(31237), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7936), AOM_ICDF(10846), AOM_ICDF(12816), AOM_ICDF(22436),
-          AOM_ICDF(24614), AOM_ICDF(25130), AOM_ICDF(26890), AOM_ICDF(28199),
-          AOM_ICDF(29091), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(8576), AOM_ICDF(11411), AOM_ICDF(13830), AOM_ICDF(15918),
-          AOM_ICDF(18996), AOM_ICDF(20044), AOM_ICDF(25114), AOM_ICDF(27835),
-          AOM_ICDF(28972), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13646), AOM_ICDF(15966),
-          AOM_ICDF(21162), AOM_ICDF(22012), AOM_ICDF(24701), AOM_ICDF(27506),
-          AOM_ICDF(29644), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6784), AOM_ICDF(9423), AOM_ICDF(12524), AOM_ICDF(14773),
-          AOM_ICDF(19447), AOM_ICDF(22804), AOM_ICDF(26073), AOM_ICDF(29211),
-          AOM_ICDF(30642), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6784), AOM_ICDF(8916), AOM_ICDF(11059), AOM_ICDF(15861),
-          AOM_ICDF(21174), AOM_ICDF(22338), AOM_ICDF(24620), AOM_ICDF(27071),
-          AOM_ICDF(30899), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(9856), AOM_ICDF(11557), AOM_ICDF(13960), AOM_ICDF(18525),
-          AOM_ICDF(21788), AOM_ICDF(22189), AOM_ICDF(24462), AOM_ICDF(26603),
-          AOM_ICDF(27470), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7808), AOM_ICDF(10636), AOM_ICDF(13143), AOM_ICDF(15844),
-          AOM_ICDF(18698), AOM_ICDF(20272), AOM_ICDF(24323), AOM_ICDF(30096),
-          AOM_ICDF(31787), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(10928), AOM_ICDF(14596), AOM_ICDF(18926),
-          AOM_ICDF(21586), AOM_ICDF(22688), AOM_ICDF(26626), AOM_ICDF(29001),
-          AOM_ICDF(30399), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(8832), AOM_ICDF(10983), AOM_ICDF(13451), AOM_ICDF(16582),
-          AOM_ICDF(21656), AOM_ICDF(23109), AOM_ICDF(24845), AOM_ICDF(26207),
-          AOM_ICDF(28796), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6784), AOM_ICDF(10844), AOM_ICDF(15554), AOM_ICDF(18073),
-          AOM_ICDF(22954), AOM_ICDF(24901), AOM_ICDF(26776), AOM_ICDF(28649),
-          AOM_ICDF(30419), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5120), AOM_ICDF(8252), AOM_ICDF(10072), AOM_ICDF(20108),
-          AOM_ICDF(23535), AOM_ICDF(24346), AOM_ICDF(25761), AOM_ICDF(26418),
-          AOM_ICDF(28675), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7680), AOM_ICDF(11012), AOM_ICDF(12627), AOM_ICDF(14595),
-          AOM_ICDF(19462), AOM_ICDF(20888), AOM_ICDF(23348), AOM_ICDF(25703),
-          AOM_ICDF(28159), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6656), AOM_ICDF(9818), AOM_ICDF(11790), AOM_ICDF(13813),
-          AOM_ICDF(22731), AOM_ICDF(24737), AOM_ICDF(26557), AOM_ICDF(28061),
-          AOM_ICDF(29697), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5632), AOM_ICDF(8918), AOM_ICDF(11620), AOM_ICDF(13802),
-          AOM_ICDF(19950), AOM_ICDF(23764), AOM_ICDF(25734), AOM_ICDF(28537),
-          AOM_ICDF(31809), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4480), AOM_ICDF(6580), AOM_ICDF(7808), AOM_ICDF(12281),
-          AOM_ICDF(19375), AOM_ICDF(20970), AOM_ICDF(22860), AOM_ICDF(24602),
-          AOM_ICDF(29929), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7040), AOM_ICDF(9553), AOM_ICDF(11457), AOM_ICDF(15102),
-          AOM_ICDF(20291), AOM_ICDF(21280), AOM_ICDF(22985), AOM_ICDF(24475),
-          AOM_ICDF(26613), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6528), AOM_ICDF(10423), AOM_ICDF(12605), AOM_ICDF(14621),
-          AOM_ICDF(19031), AOM_ICDF(21505), AOM_ICDF(24585), AOM_ICDF(27558),
-          AOM_ICDF(29532), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(11659), AOM_ICDF(14463), AOM_ICDF(18867),
-          AOM_ICDF(23653), AOM_ICDF(24903), AOM_ICDF(27115), AOM_ICDF(29389),
-          AOM_ICDF(31382), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(8192), AOM_ICDF(10016), AOM_ICDF(13304), AOM_ICDF(16362),
-          AOM_ICDF(21107), AOM_ICDF(25165), AOM_ICDF(26620), AOM_ICDF(28901),
-          AOM_ICDF(30910), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5888), AOM_ICDF(8723), AOM_ICDF(16237), AOM_ICDF(18318),
-          AOM_ICDF(22002), AOM_ICDF(25923), AOM_ICDF(27394), AOM_ICDF(29934),
-          AOM_ICDF(31428), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4608), AOM_ICDF(7138), AOM_ICDF(9841), AOM_ICDF(18442),
-          AOM_ICDF(22447), AOM_ICDF(24618), AOM_ICDF(26337), AOM_ICDF(27945),
-          AOM_ICDF(30168), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6784), AOM_ICDF(8916), AOM_ICDF(12270), AOM_ICDF(14851),
-          AOM_ICDF(19886), AOM_ICDF(22759), AOM_ICDF(25105), AOM_ICDF(28368),
-          AOM_ICDF(29760), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5120), AOM_ICDF(7928), AOM_ICDF(11324), AOM_ICDF(13340),
-          AOM_ICDF(21205), AOM_ICDF(24224), AOM_ICDF(25926), AOM_ICDF(28518),
-          AOM_ICDF(30560), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4480), AOM_ICDF(6580), AOM_ICDF(10058), AOM_ICDF(11237),
-          AOM_ICDF(16807), AOM_ICDF(25937), AOM_ICDF(27218), AOM_ICDF(30015),
-          AOM_ICDF(31348), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4608), AOM_ICDF(6808), AOM_ICDF(9445), AOM_ICDF(12446),
-          AOM_ICDF(18461), AOM_ICDF(21835), AOM_ICDF(23244), AOM_ICDF(26109),
-          AOM_ICDF(30115), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5760), AOM_ICDF(7659), AOM_ICDF(10798), AOM_ICDF(14720),
-          AOM_ICDF(19157), AOM_ICDF(21955), AOM_ICDF(23645), AOM_ICDF(26460),
-          AOM_ICDF(28702), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5760), AOM_ICDF(8503), AOM_ICDF(11157), AOM_ICDF(13071),
-          AOM_ICDF(17594), AOM_ICDF(22047), AOM_ICDF(24099), AOM_ICDF(29077),
-          AOM_ICDF(30850), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4864), AOM_ICDF(9660), AOM_ICDF(14264), AOM_ICDF(17105),
-          AOM_ICDF(21528), AOM_ICDF(24094), AOM_ICDF(26025), AOM_ICDF(28580),
-          AOM_ICDF(30559), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(9600), AOM_ICDF(11139), AOM_ICDF(12998), AOM_ICDF(18660),
-          AOM_ICDF(22158), AOM_ICDF(23501), AOM_ICDF(24659), AOM_ICDF(25736),
-          AOM_ICDF(30296), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7168), AOM_ICDF(11068), AOM_ICDF(15984), AOM_ICDF(19969),
-          AOM_ICDF(23169), AOM_ICDF(24704), AOM_ICDF(26216), AOM_ICDF(27572),
-          AOM_ICDF(31368), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4480), AOM_ICDF(6801), AOM_ICDF(8018), AOM_ICDF(20908),
-          AOM_ICDF(23071), AOM_ICDF(23583), AOM_ICDF(24301), AOM_ICDF(25062),
-          AOM_ICDF(29427), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7168), AOM_ICDF(10068), AOM_ICDF(11753), AOM_ICDF(15843),
-          AOM_ICDF(19742), AOM_ICDF(21358), AOM_ICDF(23809), AOM_ICDF(26189),
-          AOM_ICDF(29067), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(9047), AOM_ICDF(10622), AOM_ICDF(13931),
-          AOM_ICDF(22462), AOM_ICDF(23858), AOM_ICDF(25911), AOM_ICDF(27277),
-          AOM_ICDF(29722), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5888), AOM_ICDF(7568), AOM_ICDF(9931), AOM_ICDF(13533),
-          AOM_ICDF(18431), AOM_ICDF(22063), AOM_ICDF(23777), AOM_ICDF(26025),
-          AOM_ICDF(30555), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4352), AOM_ICDF(6239), AOM_ICDF(7379), AOM_ICDF(13739),
-          AOM_ICDF(16917), AOM_ICDF(18090), AOM_ICDF(18835), AOM_ICDF(19651),
-          AOM_ICDF(30360), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6528), AOM_ICDF(8988), AOM_ICDF(10288), AOM_ICDF(15534),
-          AOM_ICDF(19495), AOM_ICDF(20386), AOM_ICDF(21934), AOM_ICDF(23034),
-          AOM_ICDF(26988), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7040), AOM_ICDF(10055), AOM_ICDF(11652), AOM_ICDF(14757),
-          AOM_ICDF(19622), AOM_ICDF(21715), AOM_ICDF(23615), AOM_ICDF(26761),
-          AOM_ICDF(29483), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4736), AOM_ICDF(10102), AOM_ICDF(12315), AOM_ICDF(19078),
-          AOM_ICDF(21348), AOM_ICDF(22621), AOM_ICDF(24246), AOM_ICDF(26044),
-          AOM_ICDF(29931), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(10496), AOM_ICDF(12410), AOM_ICDF(14955), AOM_ICDF(19891),
-          AOM_ICDF(23137), AOM_ICDF(23792), AOM_ICDF(25159), AOM_ICDF(26378),
-          AOM_ICDF(28125), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7936), AOM_ICDF(12204), AOM_ICDF(17104), AOM_ICDF(20191),
-          AOM_ICDF(23468), AOM_ICDF(24630), AOM_ICDF(26156), AOM_ICDF(27628),
-          AOM_ICDF(28913), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(8629), AOM_ICDF(10232), AOM_ICDF(23591),
-          AOM_ICDF(25349), AOM_ICDF(25637), AOM_ICDF(26306), AOM_ICDF(27063),
-          AOM_ICDF(28980), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(8704), AOM_ICDF(12088), AOM_ICDF(13461), AOM_ICDF(16646),
-          AOM_ICDF(20516), AOM_ICDF(21455), AOM_ICDF(24062), AOM_ICDF(26579),
-          AOM_ICDF(28368), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7296), AOM_ICDF(11177), AOM_ICDF(13117), AOM_ICDF(16196),
-          AOM_ICDF(23378), AOM_ICDF(24708), AOM_ICDF(26440), AOM_ICDF(27997),
-          AOM_ICDF(29078), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6272), AOM_ICDF(9377), AOM_ICDF(12575), AOM_ICDF(15616),
-          AOM_ICDF(20919), AOM_ICDF(23697), AOM_ICDF(26603), AOM_ICDF(27566),
-          AOM_ICDF(29903), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6528), AOM_ICDF(9091), AOM_ICDF(10478), AOM_ICDF(16445),
-          AOM_ICDF(21081), AOM_ICDF(22320), AOM_ICDF(23871), AOM_ICDF(25087),
-          AOM_ICDF(29258), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(8704), AOM_ICDF(11148), AOM_ICDF(12499), AOM_ICDF(17340),
-          AOM_ICDF(20656), AOM_ICDF(21288), AOM_ICDF(22588), AOM_ICDF(23701),
-          AOM_ICDF(24693), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7552), AOM_ICDF(11394), AOM_ICDF(12980), AOM_ICDF(15562),
-          AOM_ICDF(19942), AOM_ICDF(21792), AOM_ICDF(25093), AOM_ICDF(28211),
-          AOM_ICDF(28959), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5120), AOM_ICDF(11708), AOM_ICDF(13847), AOM_ICDF(19377),
-          AOM_ICDF(22421), AOM_ICDF(23160), AOM_ICDF(25449), AOM_ICDF(27136),
-          AOM_ICDF(29182), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(9984), AOM_ICDF(12031), AOM_ICDF(15190), AOM_ICDF(18673),
-          AOM_ICDF(21422), AOM_ICDF(22812), AOM_ICDF(25690), AOM_ICDF(29118),
-          AOM_ICDF(30458), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6144), AOM_ICDF(9680), AOM_ICDF(17436), AOM_ICDF(19610),
-          AOM_ICDF(21820), AOM_ICDF(23485), AOM_ICDF(26313), AOM_ICDF(30826),
-          AOM_ICDF(31843), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6016), AOM_ICDF(8315), AOM_ICDF(10607), AOM_ICDF(19333),
-          AOM_ICDF(21572), AOM_ICDF(22553), AOM_ICDF(25266), AOM_ICDF(27288),
-          AOM_ICDF(28551), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7168), AOM_ICDF(9668), AOM_ICDF(12646), AOM_ICDF(16257),
-          AOM_ICDF(19648), AOM_ICDF(20899), AOM_ICDF(25304), AOM_ICDF(30465),
-          AOM_ICDF(31625), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6144), AOM_ICDF(9368), AOM_ICDF(11836), AOM_ICDF(14130),
-          AOM_ICDF(19153), AOM_ICDF(21157), AOM_ICDF(24876), AOM_ICDF(28452),
-          AOM_ICDF(29396), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5504), AOM_ICDF(8486), AOM_ICDF(11996), AOM_ICDF(14412),
-          AOM_ICDF(17968), AOM_ICDF(21814), AOM_ICDF(24424), AOM_ICDF(30682),
-          AOM_ICDF(32059), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5376), AOM_ICDF(7195), AOM_ICDF(9592), AOM_ICDF(13331),
-          AOM_ICDF(17569), AOM_ICDF(19460), AOM_ICDF(22371), AOM_ICDF(25458),
-          AOM_ICDF(28942), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7424), AOM_ICDF(9206), AOM_ICDF(11783), AOM_ICDF(16456),
-          AOM_ICDF(19253), AOM_ICDF(20390), AOM_ICDF(23775), AOM_ICDF(27007),
-          AOM_ICDF(28425), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5888), AOM_ICDF(8303), AOM_ICDF(11361), AOM_ICDF(13440),
-          AOM_ICDF(15848), AOM_ICDF(17549), AOM_ICDF(21532), AOM_ICDF(29564),
-          AOM_ICDF(30665), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4608), AOM_ICDF(8788), AOM_ICDF(13284), AOM_ICDF(16621),
-          AOM_ICDF(18983), AOM_ICDF(20286), AOM_ICDF(24577), AOM_ICDF(28960),
-          AOM_ICDF(30314), AOM_ICDF(32768), 0 } },
-      { { AOM_ICDF(8320), AOM_ICDF(15005), AOM_ICDF(19168), AOM_ICDF(24282),
-          AOM_ICDF(26707), AOM_ICDF(27402), AOM_ICDF(28681), AOM_ICDF(29639),
-          AOM_ICDF(30629), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5632), AOM_ICDF(13900), AOM_ICDF(22376), AOM_ICDF(24867),
-          AOM_ICDF(26804), AOM_ICDF(27734), AOM_ICDF(29130), AOM_ICDF(30722),
-          AOM_ICDF(31465), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(4992), AOM_ICDF(9115), AOM_ICDF(11055), AOM_ICDF(24893),
-          AOM_ICDF(26316), AOM_ICDF(26661), AOM_ICDF(27663), AOM_ICDF(28301),
-          AOM_ICDF(29418), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(7424), AOM_ICDF(12077), AOM_ICDF(14987), AOM_ICDF(19596),
-          AOM_ICDF(22615), AOM_ICDF(23600), AOM_ICDF(26465), AOM_ICDF(28484),
-          AOM_ICDF(29789), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6272), AOM_ICDF(11447), AOM_ICDF(14362), AOM_ICDF(18204),
-          AOM_ICDF(23418), AOM_ICDF(24715), AOM_ICDF(26697), AOM_ICDF(28547),
-          AOM_ICDF(29520), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5248), AOM_ICDF(10946), AOM_ICDF(15379), AOM_ICDF(18167),
-          AOM_ICDF(22197), AOM_ICDF(25432), AOM_ICDF(27295), AOM_ICDF(30031),
-          AOM_ICDF(30576), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5120), AOM_ICDF(9008), AOM_ICDF(11607), AOM_ICDF(18210),
-          AOM_ICDF(22327), AOM_ICDF(23427), AOM_ICDF(24887), AOM_ICDF(26580),
-          AOM_ICDF(29892), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(6656), AOM_ICDF(10124), AOM_ICDF(12689), AOM_ICDF(19922),
-          AOM_ICDF(22480), AOM_ICDF(22807), AOM_ICDF(24441), AOM_ICDF(25579),
-          AOM_ICDF(26787), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5376), AOM_ICDF(10084), AOM_ICDF(13983), AOM_ICDF(17113),
-          AOM_ICDF(19996), AOM_ICDF(21614), AOM_ICDF(24403), AOM_ICDF(28651),
-          AOM_ICDF(29938), AOM_ICDF(32768), 0 },
-        { AOM_ICDF(5504), AOM_ICDF(14131), AOM_ICDF(17989), AOM_ICDF(23324),
-          AOM_ICDF(25513), AOM_ICDF(26071), AOM_ICDF(27850), AOM_ICDF(29464),
-          AOM_ICDF(30393), AOM_ICDF(32768), 0 } },
-#endif  // CONFIG_ALT_INTRA
+};
+#endif  // CONFIG_KF_CTX
+
+#if CONFIG_LPF_SB
+static const aom_cdf_prob default_lpf_reuse_cdf[LPF_REUSE_CONTEXT][CDF_SIZE(
+    2)] = { { AOM_ICDF(8192), AOM_ICDF(32768), 0 },
+            { AOM_ICDF(4096), AOM_ICDF(32768), 0 } };
+
+static const aom_cdf_prob
+    default_lpf_delta_cdf[LPF_DELTA_CONTEXT][CDF_SIZE(DELTA_RANGE)] = {
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 },
+      { AOM_ICDF(4096), AOM_ICDF(7680), AOM_ICDF(10816), AOM_ICDF(13560),
+        AOM_ICDF(15961), AOM_ICDF(18062), AOM_ICDF(19900), AOM_ICDF(32768), 0 }
     };
 
+static const aom_cdf_prob
+    default_lpf_sign_cdf[LPF_REUSE_CONTEXT][LPF_SIGN_CONTEXT][CDF_SIZE(2)] = {
+      { { AOM_ICDF(6554), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(26214), AOM_ICDF(32768), 0 } },
+      { { AOM_ICDF(16384), AOM_ICDF(32768), 0 },
+        { AOM_ICDF(16384), AOM_ICDF(32768), 0 } }
+    };
+#endif  // CONFIG_LPF_SB
+
 static void init_mode_probs(FRAME_CONTEXT *fc) {
-  av1_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
   av1_copy(fc->partition_prob, default_partition_probs);
   av1_copy(fc->intra_inter_prob, default_intra_inter_p);
   av1_copy(fc->comp_inter_prob, default_comp_inter_p);
-#if CONFIG_PALETTE
   av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf);
   av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf);
   av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf);
   av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf);
-#endif  // CONFIG_PALETTE
+  av1_copy(fc->kf_y_cdf, default_kf_y_mode_cdf);
+#if CONFIG_MRC_TX
+  av1_copy(fc->mrc_mask_inter_cdf, default_mrc_mask_inter_cdf);
+  av1_copy(fc->mrc_mask_intra_cdf, default_mrc_mask_intra_cdf);
+#endif  // CONFIG_MRC_TX
 #if CONFIG_NEW_MULTISYMBOL
   av1_copy(fc->comp_inter_cdf, default_comp_inter_cdf);
 #endif  // CONFIG_NEW_MULTISYMBOL
@@ -4928,6 +5699,8 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #endif  // CONFIG_EXT_COMP_REFS
   av1_copy(fc->comp_ref_prob, default_comp_ref_p);
 #if CONFIG_NEW_MULTISYMBOL
+  av1_copy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf);
+  av1_copy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf);
   av1_copy(fc->comp_ref_cdf, default_comp_ref_cdf);
 #endif
 #if CONFIG_LV_MAP
@@ -4937,6 +5710,18 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
   av1_copy(fc->dc_sign, default_dc_sign);
   av1_copy(fc->coeff_base, default_coeff_base);
   av1_copy(fc->coeff_lps, default_coeff_lps);
+#if BR_NODE
+  av1_copy(fc->coeff_br, default_coeff_br);
+#endif
+#if CONFIG_CTX1D
+  av1_copy(fc->eob_mode, default_eob_mode);
+  av1_copy(fc->empty_line, default_empty_line);
+  av1_copy(fc->hv_eob, default_hv_eob);
+#endif  // CONFIG_CTX1D
+
+#if LV_MAP_PROB
+  av1_init_txb_probs(fc);
+#endif  // LV_MAP_PROB
 #endif
 #if CONFIG_EXT_REFS
   av1_copy(fc->comp_bwdref_prob, default_comp_bwdref_p);
@@ -4948,12 +5733,14 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #if CONFIG_NEW_MULTISYMBOL
   av1_copy(fc->single_ref_cdf, default_single_ref_cdf);
 #endif
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   av1_copy(fc->comp_inter_mode_prob, default_comp_inter_mode_p);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  av1_copy(fc->tx_size_probs, default_tx_size_prob);
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   fc->quarter_tx_size_prob = default_quarter_tx_size_prob;
+#if CONFIG_NEW_MULTISYMBOL
+  av1_copy(fc->quarter_tx_size_cdf, default_quarter_tx_size_cdf);
+#endif  // CONFIG_NEW_MULTISYMBOL
 #endif
 #if CONFIG_VAR_TX
   av1_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
@@ -4981,12 +5768,15 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #endif
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
   av1_copy(fc->obmc_prob, default_obmc_prob);
-#if CONFIG_NEW_MULTISYMBOL
+#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
   av1_copy(fc->obmc_cdf, default_obmc_cdf);
 #endif
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  av1_copy(fc->ncobmc_prob, default_ncobmc_prob);
+  av1_copy(fc->ncobmc_cdf, default_ncobmc_cdf);
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_EXT_INTER
   av1_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
   av1_copy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf);
 #if CONFIG_COMPOUND_SINGLEREF
@@ -4996,7 +5786,9 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
            default_inter_singleref_comp_mode_cdf);
 #endif  // CONFIG_COMPOUND_SINGLEREF
   av1_copy(fc->compound_type_prob, default_compound_type_probs);
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   av1_copy(fc->compound_type_cdf, default_compound_type_cdf);
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 #if CONFIG_INTERINTRA
   av1_copy(fc->interintra_prob, default_interintra_prob);
   av1_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob);
@@ -5007,7 +5799,6 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
   av1_copy(fc->interintra_mode_prob, default_interintra_mode_prob);
   av1_copy(fc->interintra_mode_cdf, default_interintra_mode_cdf);
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_SUPERTX
   av1_copy(fc->supertx_prob, default_supertx_prob);
 #endif  // CONFIG_SUPERTX
@@ -5024,8 +5815,10 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #if CONFIG_FILTER_INTRA
   av1_copy(fc->filter_intra_probs, default_filter_intra_probs);
 #endif  // CONFIG_FILTER_INTRA
-  av1_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
-  av1_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+#if CONFIG_LGT_FROM_PRED
+  av1_copy(fc->intra_lgt_prob, default_intra_lgt_prob);
+  av1_copy(fc->inter_lgt_prob, default_inter_lgt_prob);
+#endif  // CONFIG_LGT_FROM_PRED
 #if CONFIG_LOOP_RESTORATION
   av1_copy(fc->switchable_restore_prob, default_switchable_restore_prob);
 #endif  // CONFIG_LOOP_RESTORATION
@@ -5044,36 +5837,29 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
   av1_copy(fc->seg.tree_cdf, default_seg_tree_cdf);
   av1_copy(fc->tx_size_cdf, default_tx_size_cdf);
-#if CONFIG_DELTA_Q
   av1_copy(fc->delta_q_prob, default_delta_q_probs);
   av1_copy(fc->delta_q_cdf, default_delta_q_cdf);
 #if CONFIG_EXT_DELTA_Q
   av1_copy(fc->delta_lf_prob, default_delta_lf_probs);
   av1_copy(fc->delta_lf_cdf, default_delta_lf_cdf);
+#if CONFIG_LOOPFILTER_LEVEL
+  av1_copy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf);
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
-#endif  // CONFIG_DELTA_Q
 #if CONFIG_CFL
+  av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf);
   av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
 #endif
 #if CONFIG_INTRABC
-  fc->intrabc_prob = INTRABC_PROB_DEFAULT;
+  av1_copy(fc->intrabc_cdf, default_intrabc_cdf);
 #endif
+#if CONFIG_LPF_SB
+  av1_copy(fc->lpf_reuse_cdf, default_lpf_reuse_cdf);
+  av1_copy(fc->lpf_delta_cdf, default_lpf_delta_cdf);
+  av1_copy(fc->lpf_sign_cdf, default_lpf_sign_cdf);
+#endif  // CONFIG_LPF_SB
 }
 
-int av1_switchable_interp_ind[SWITCHABLE_FILTERS];
-int av1_switchable_interp_inv[SWITCHABLE_FILTERS];
-
-#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
-const aom_tree_index av1_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] =
-    {
-      -EIGHTTAP_REGULAR, 2, 4, -MULTITAP_SHARP, -EIGHTTAP_SMOOTH,
-      -EIGHTTAP_SMOOTH2,
-    };
-#else
-const aom_tree_index av1_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] =
-    { -EIGHTTAP_REGULAR, 2, -EIGHTTAP_SMOOTH, -MULTITAP_SHARP };
-#endif  // CONFIG_DUAL_FILTER
-
 void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
   int i, j;
   FRAME_CONTEXT *fc = cm->fc;
@@ -5120,12 +5906,12 @@ void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
       fc->single_ref_prob[i][j] = av1_mode_mv_merge_probs(
           pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
     fc->comp_inter_mode_prob[i] = av1_mode_mv_merge_probs(
         pre_fc->comp_inter_mode_prob[i], counts->comp_inter_mode[i]);
 
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
   for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
     fc->newmv_prob[i] =
@@ -5149,7 +5935,12 @@ void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
   for (i = 0; i < ADAPT_OVERLAP_BLOCKS; ++i)
     aom_tree_merge_probs(av1_ncobmc_mode_tree, pre_fc->ncobmc_mode_prob[i],
                          counts->ncobmc_mode[i], fc->ncobmc_mode_prob[i]);
+#if CONFIG_WARPED_MOTION
+  for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; ++i)
+    aom_tree_merge_probs(av1_ncobmc_tree, pre_fc->ncobmc_prob[i],
+                         counts->ncobmc[i], fc->ncobmc_prob[i]);
 #endif
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
   for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; ++i)
     fc->obmc_prob[i] =
@@ -5166,7 +5957,6 @@ void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
   }
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_EXT_INTER
   for (i = 0; i < INTER_MODE_CONTEXTS; i++)
     aom_tree_merge_probs(
         av1_inter_compound_mode_tree, pre_fc->inter_compound_mode_probs[i],
@@ -5209,28 +5999,15 @@ void av1_adapt_inter_frame_probs(AV1_COMMON *cm) {
     }
   }
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
-
-  if (cm->interp_filter == SWITCHABLE) {
-    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
-      aom_tree_merge_probs(
-          av1_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
-          counts->switchable_interp[i], fc->switchable_interp_prob[i]);
-  }
 }
 
 void av1_adapt_intra_frame_probs(AV1_COMMON *cm) {
-  int i, j;
+  int i;
   FRAME_CONTEXT *fc = cm->fc;
   const FRAME_CONTEXT *pre_fc = cm->pre_fc;
   const FRAME_COUNTS *counts = &cm->counts;
 
   if (cm->tx_mode == TX_MODE_SELECT) {
-    for (i = 0; i < MAX_TX_DEPTH; ++i) {
-      for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
-        aom_tree_merge_probs(av1_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
-                             counts->tx_size[i][j], fc->tx_size_probs[i][j]);
-    }
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     fc->quarter_tx_size_prob = av1_mode_mv_merge_probs(
         pre_fc->quarter_tx_size_prob, counts->quarter_tx_size);
@@ -5249,38 +6026,22 @@ void av1_adapt_intra_frame_probs(AV1_COMMON *cm) {
     fc->skip_probs[i] =
         av1_mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]);
 
-#if CONFIG_EXT_TX
-  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    int s;
-    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
-      if (use_inter_ext_tx_for_txsize[s][i]) {
-        aom_tree_merge_probs(
-            av1_ext_tx_inter_tree[s], pre_fc->inter_ext_tx_prob[s][i],
-            counts->inter_ext_tx[s][i], fc->inter_ext_tx_prob[s][i]);
-      }
-    }
-    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
-      if (use_intra_ext_tx_for_txsize[s][i]) {
-        for (j = 0; j < INTRA_MODES; ++j)
-          aom_tree_merge_probs(
-              av1_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j],
-              counts->intra_ext_tx[s][i][j], fc->intra_ext_tx_prob[s][i][j]);
-      }
+#if CONFIG_LGT_FROM_PRED
+  int j;
+  if (LGT_FROM_PRED_INTRA) {
+    for (i = TX_4X4; i < LGT_SIZES; ++i) {
+      for (j = 0; j < INTRA_MODES; ++j)
+        fc->intra_lgt_prob[i][j] = av1_mode_mv_merge_probs(
+            pre_fc->intra_lgt_prob[i][j], counts->intra_lgt[i][j]);
     }
   }
-#else
-  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    for (j = 0; j < TX_TYPES; ++j) {
-      aom_tree_merge_probs(av1_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j],
-                           counts->intra_ext_tx[i][j],
-                           fc->intra_ext_tx_prob[i][j]);
+  if (LGT_FROM_PRED_INTER) {
+    for (i = TX_4X4; i < LGT_SIZES; ++i) {
+      fc->inter_lgt_prob[i] = av1_mode_mv_merge_probs(pre_fc->inter_lgt_prob[i],
+                                                      counts->inter_lgt[i]);
     }
   }
-  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    aom_tree_merge_probs(av1_ext_tx_tree, pre_fc->inter_ext_tx_prob[i],
-                         counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]);
-  }
-#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_LGT_FROM_PRED
 
   if (cm->seg.temporal_update) {
     for (i = 0; i < PREDICTION_PROBS; i++)
@@ -5331,16 +6092,20 @@ void av1_adapt_intra_frame_probs(AV1_COMMON *cm) {
         av1_mode_mv_merge_probs(pre_fc->partition_prob[i][PARTITION_HORZ], ct);
   }
 #endif
-#if CONFIG_DELTA_Q
   for (i = 0; i < DELTA_Q_PROBS; ++i)
     fc->delta_q_prob[i] =
         mode_mv_merge_probs(pre_fc->delta_q_prob[i], counts->delta_q[i]);
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  for (i = 0; i < FRAME_LF_COUNT; ++i)
+    for (int j = 0; j < DELTA_LF_PROBS; ++j)
+      fc->delta_lf_multi_prob[i][j] = mode_mv_merge_probs(
+          pre_fc->delta_lf_multi_prob[i][j], counts->delta_lf_multi[i][j]);
+#endif  // CONFIG_LOOPFILTER_LEVEL
   for (i = 0; i < DELTA_LF_PROBS; ++i)
     fc->delta_lf_prob[i] =
         mode_mv_merge_probs(pre_fc->delta_lf_prob[i], counts->delta_lf[i]);
 #endif  // CONFIG_EXT_DELTA_Q
-#endif
 #if CONFIG_EXT_INTRA
 #if CONFIG_INTRA_INTERP
   for (i = 0; i < INTRA_FILTERS + 1; ++i) {
@@ -5369,13 +6134,16 @@ static void set_default_lf_deltas(struct loopfilter *lf) {
   lf->ref_deltas[BWDREF_FRAME] = lf->ref_deltas[LAST_FRAME];
 #endif  // CONFIG_EXT_REFS
   lf->ref_deltas[GOLDEN_FRAME] = -1;
-#if CONFIG_ALTREF2
+#if CONFIG_EXT_REFS
   lf->ref_deltas[ALTREF2_FRAME] = -1;
-#endif  // CONFIG_ALTREF2
+#endif  // CONFIG_EXT_REFS
   lf->ref_deltas[ALTREF_FRAME] = -1;
 
   lf->mode_deltas[0] = 0;
   lf->mode_deltas[1] = 0;
+
+  av1_copy(lf->last_ref_deltas, lf->ref_deltas);
+  av1_copy(lf->last_mode_deltas, lf->mode_deltas);
 }
 
 void av1_setup_past_independence(AV1_COMMON *cm) {
@@ -5404,6 +6172,9 @@ void av1_setup_past_independence(AV1_COMMON *cm) {
   av1_default_coef_probs(cm);
   init_mode_probs(cm->fc);
   av1_init_mv_probs(cm);
+#if CONFIG_LV_MAP
+  av1_init_lv_map(cm);
+#endif
 #if CONFIG_PVQ
   av1_default_pvq_probs(cm);
 #endif  // CONFIG_PVQ
@@ -5413,19 +6184,34 @@ void av1_setup_past_independence(AV1_COMMON *cm) {
   av1_convolve_init(cm);
   cm->fc->initialized = 1;
 
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  if (cm->frame_type == KEY_FRAME) {
+    // Reset all frame contexts, as all reference frames will be lost.
+    for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+  }
+#else
   if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
       cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
     // Reset all frame contexts.
     for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
   } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    // Reset the frame context of the first specified ref frame.
+    if (cm->frame_refs[0].idx >= 0) {
+      cm->frame_contexts[cm->frame_refs[0].idx] = *cm->fc;
+    }
+#else
     // Reset only the frame context specified in the frame header.
     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
   }
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
 
   // prev_mip will only be allocated in encoder.
   if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
     memset(cm->prev_mip, 0,
            cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
-
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   cm->frame_context_idx = 0;
+#endif  // !CONFIG_NO_FRAME_CONTEXT_SIGNALING
 }
diff --git a/third_party/aom/av1/common/entropymode.h b/third_party/aom/av1/common/entropymode.h
index 32532471a..3452241b0 100644
--- a/third_party/aom/av1/common/entropymode.h
+++ b/third_party/aom/av1/common/entropymode.h
@@ -33,14 +33,11 @@ extern "C" {
 #define TX_SIZE_CONTEXTS 2
 
 #define INTER_OFFSET(mode) ((mode)-NEARESTMV)
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
 #define INTER_SINGLEREF_COMP_OFFSET(mode) ((mode)-SR_NEAREST_NEARMV)
 #endif  // CONFIG_COMPOUND_SINGLEREF
 #define INTER_COMPOUND_OFFSET(mode) ((mode)-NEAREST_NEARESTMV)
-#endif  // CONFIG_EXT_INTER
 
-#if CONFIG_PALETTE
 // Number of possible contexts for a color index.
 // As can be seen from av1_get_palette_color_index_context(), the possible
 // contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0). These are mapped to
@@ -70,11 +67,10 @@ extern "C" {
 #define PALETTE_UV_MODE_CONTEXTS 2
 
 #define PALETTE_MAX_BLOCK_SIZE (64 * 64)
-#endif  // CONFIG_PALETTE
 
-#if CONFIG_INTRABC
-#define INTRABC_PROB_DEFAULT 192
-#endif  // CONFIG_INTRABC
+#if CONFIG_KF_CTX
+#define KF_MODE_CONTEXTS 5
+#endif
 
 struct AV1Common;
 
@@ -98,12 +94,8 @@ typedef struct frame_contexts {
 #else
   aom_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
 #endif
-  av1_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
   coeff_cdf_model coef_tail_cdfs[TX_SIZES][PLANE_TYPES];
   coeff_cdf_model coef_head_cdfs[TX_SIZES][PLANE_TYPES];
-  aom_prob blockzero_probs[TX_SIZES][PLANE_TYPES][REF_TYPES][BLOCKZ_CONTEXTS];
-  aom_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
-                                 [SWITCHABLE_FILTERS - 1];
 #if CONFIG_ADAPT_SCAN
 // TODO(angiebird): try aom_prob
 #if CONFIG_CHROMA_2X2
@@ -179,6 +171,38 @@ typedef struct frame_contexts {
   aom_prob coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
                      [COEFF_BASE_CONTEXTS];
   aom_prob coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS];
+#if BR_NODE
+  aom_prob coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS];
+#endif
+#if CONFIG_CTX1D
+  aom_prob eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES];
+  aom_prob empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES][EMPTY_LINE_CONTEXTS];
+  aom_prob hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS];
+#endif  // CONFIG_CTX1D
+
+#if LV_MAP_PROB
+  aom_cdf_prob txb_skip_cdf[TX_SIZES][TXB_SKIP_CONTEXTS][CDF_SIZE(2)];
+  aom_cdf_prob nz_map_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
+                         [CDF_SIZE(2)];
+  aom_cdf_prob eob_flag_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]
+                           [CDF_SIZE(2)];
+  aom_cdf_prob dc_sign_cdf[PLANE_TYPES][DC_SIGN_CONTEXTS][CDF_SIZE(2)];
+  aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
+                             [COEFF_BASE_CONTEXTS][CDF_SIZE(2)];
+  aom_cdf_prob coeff_lps_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
+                            [CDF_SIZE(2)];
+#if BR_NODE
+  aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS]
+                           [LEVEL_CONTEXTS][CDF_SIZE(2)];
+#endif
+#if CONFIG_CTX1D
+  aom_cdf_prob eob_mode_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][CDF_SIZE(2)];
+  aom_cdf_prob empty_line_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]
+                             [EMPTY_LINE_CONTEXTS][CDF_SIZE(2)];
+  aom_cdf_prob hv_eob_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS]
+                         [CDF_SIZE(2)];
+#endif  // CONFIG_CTX1D
+#endif  // LV_MAP_PROB
 #endif
 
   aom_prob newmv_prob[NEWMV_MODE_CONTEXTS];
@@ -192,7 +216,6 @@ typedef struct frame_contexts {
   aom_cdf_prob drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)];
 #endif
 
-#if CONFIG_EXT_INTER
   aom_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS]
                                     [INTER_COMPOUND_MODES - 1];
   aom_cdf_prob inter_compound_mode_cdf[INTER_MODE_CONTEXTS]
@@ -204,7 +227,9 @@ typedef struct frame_contexts {
       INTER_SINGLEREF_COMP_MODES)];
 #endif  // CONFIG_COMPOUND_SINGLEREF
   aom_prob compound_type_prob[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   aom_cdf_prob compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)];
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 #if CONFIG_INTERINTRA
   aom_prob interintra_prob[BLOCK_SIZE_GROUPS];
   aom_prob wedge_interintra_prob[BLOCK_SIZES_ALL];
@@ -216,7 +241,6 @@ typedef struct frame_contexts {
   aom_cdf_prob interintra_mode_cdf[BLOCK_SIZE_GROUPS]
                                   [CDF_SIZE(INTERINTRA_MODES)];
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   aom_prob motion_mode_prob[BLOCK_SIZES_ALL][MOTION_MODES - 1];
   aom_cdf_prob motion_mode_cdf[BLOCK_SIZES_ALL][CDF_SIZE(MOTION_MODES)];
@@ -226,15 +250,18 @@ typedef struct frame_contexts {
                               [CDF_SIZE(MAX_NCOBMC_MODES)];
 #endif
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  aom_prob ncobmc_prob[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES - 1];
+  aom_cdf_prob ncobmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(OBMC_FAMILY_MODES)];
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
   aom_prob obmc_prob[BLOCK_SIZES_ALL];
-#if CONFIG_NEW_MULTISYMBOL
+#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
   aom_cdf_prob obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
 #endif  // CONFIG_NEW_MULTISYMBOL
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   aom_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
   aom_prob comp_inter_prob[COMP_INTER_CONTEXTS];
-#if CONFIG_PALETTE
   aom_cdf_prob palette_y_size_cdf[PALETTE_BLOCK_SIZES][CDF_SIZE(PALETTE_SIZES)];
   aom_cdf_prob palette_uv_size_cdf[PALETTE_BLOCK_SIZES]
                                   [CDF_SIZE(PALETTE_SIZES)];
@@ -244,8 +271,16 @@ typedef struct frame_contexts {
   aom_cdf_prob palette_uv_color_index_cdf[PALETTE_SIZES]
                                          [PALETTE_COLOR_INDEX_CONTEXTS]
                                          [CDF_SIZE(PALETTE_COLORS)];
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  aom_cdf_prob mrc_mask_inter_cdf[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                                 [CDF_SIZE(PALETTE_COLORS)];
+  aom_cdf_prob mrc_mask_intra_cdf[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                                 [CDF_SIZE(PALETTE_COLORS)];
+#endif  // CONFIG_MRC_TX
 #if CONFIG_NEW_MULTISYMBOL
+  aom_cdf_prob palette_y_mode_cdf[PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS]
+                                 [CDF_SIZE(2)];
+  aom_cdf_prob palette_uv_mode_cdf[PALETTE_UV_MODE_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob comp_inter_cdf[COMP_INTER_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob single_ref_cdf[REF_CONTEXTS][SINGLE_REFS - 1][CDF_SIZE(2)];
 #endif
@@ -273,12 +308,14 @@ typedef struct frame_contexts {
   aom_cdf_prob comp_ref_cdf[REF_CONTEXTS][COMP_REFS - 1][CDF_SIZE(2)];
 #endif  // CONFIG_EXT_REFS
 #endif
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   aom_prob comp_inter_mode_prob[COMP_INTER_MODE_CONTEXTS];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  aom_prob tx_size_probs[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH];
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   aom_prob quarter_tx_size_prob;
+#if CONFIG_NEW_MULTISYMBOL
+  aom_cdf_prob quarter_tx_size_cdf[CDF_SIZE(2)];
+#endif
 #endif
 #if CONFIG_VAR_TX
   aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
@@ -294,17 +331,9 @@ typedef struct frame_contexts {
   nmv_context nmvc[NMV_CONTEXTS];
 #if CONFIG_INTRABC
   nmv_context ndvc;
-  aom_prob intrabc_prob;
+  aom_cdf_prob intrabc_cdf[CDF_SIZE(2)];
 #endif
   int initialized;
-#if CONFIG_EXT_TX
-  aom_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
-  aom_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                            [TX_TYPES - 1];
-#else
-  aom_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
-  aom_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
-#endif  // CONFIG_EXT_TX
 #if CONFIG_SUPERTX
   aom_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
 #endif  // CONFIG_SUPERTX
@@ -329,19 +358,25 @@ typedef struct frame_contexts {
 #endif
   aom_cdf_prob switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]
                                     [CDF_SIZE(SWITCHABLE_FILTERS)];
-  /* kf_y_cdf is discarded after use, so does not require persistent storage.
-     However, we keep it with the other CDFs in this struct since it needs to
-     be copied to each tile to support parallelism just like the others.
-   */
+/* kf_y_cdf is discarded after use, so does not require persistent storage.
+   However, we keep it with the other CDFs in this struct since it needs to
+   be copied to each tile to support parallelism just like the others.
+*/
+#if CONFIG_KF_CTX
+  aom_cdf_prob kf_y_cdf[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS]
+                       [CDF_SIZE(INTRA_MODES)];
+#else
   aom_cdf_prob kf_y_cdf[INTRA_MODES][INTRA_MODES][CDF_SIZE(INTRA_MODES)];
+#endif
   aom_cdf_prob tx_size_cdf[MAX_TX_DEPTH][TX_SIZE_CONTEXTS]
                           [CDF_SIZE(MAX_TX_DEPTH + 1)];
-#if CONFIG_DELTA_Q
   aom_cdf_prob delta_q_cdf[CDF_SIZE(DELTA_Q_PROBS + 1)];
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  aom_cdf_prob delta_lf_multi_cdf[FRAME_LF_COUNT][CDF_SIZE(DELTA_LF_PROBS + 1)];
+#endif  // CONFIG_LOOPFILTER_LEVEL
   aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)];
 #endif
-#endif  // CONFIG_DELTA_Q
 #if CONFIG_EXT_TX
   aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                                [CDF_SIZE(TX_TYPES)];
@@ -351,23 +386,34 @@ typedef struct frame_contexts {
   aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SIZES][TX_TYPES][CDF_SIZE(TX_TYPES)];
   aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)];
 #endif  // CONFIG_EXT_TX
+#if CONFIG_LGT_FROM_PRED
+  aom_prob intra_lgt_prob[LGT_SIZES][INTRA_MODES];
+  aom_prob inter_lgt_prob[LGT_SIZES];
+#endif  // CONFIG_LGT_FROM_PRED
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
   aom_cdf_prob intra_filter_cdf[INTRA_FILTERS + 1][CDF_SIZE(INTRA_FILTERS)];
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-#if CONFIG_DELTA_Q
   aom_prob delta_q_prob[DELTA_Q_PROBS];
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  aom_prob delta_lf_multi_prob[FRAME_LF_COUNT][DELTA_LF_PROBS];
+#endif  // CONFIG_LOOPFILTER_LEVEL
   aom_prob delta_lf_prob[DELTA_LF_PROBS];
 #endif
-#endif
 #if CONFIG_PVQ
   // TODO(any): If PVQ is enabled, most of coefficient related cdf,
   // such as coef_cdfs[], coef_tail_cdfs[], and coef_heaf_cdfs[] can be removed.
   od_adapt_ctx pvq_context;
 #endif  // CONFIG_PVQ
 #if CONFIG_CFL
-  aom_cdf_prob cfl_alpha_cdf[CDF_SIZE(CFL_ALPHABET_SIZE)];
-#endif
+  aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)];
+  aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)];
+#endif
+#if CONFIG_LPF_SB
+  aom_cdf_prob lpf_reuse_cdf[LPF_REUSE_CONTEXT][CDF_SIZE(2)];
+  aom_cdf_prob lpf_delta_cdf[LPF_DELTA_CONTEXT][CDF_SIZE(DELTA_RANGE)];
+  aom_cdf_prob lpf_sign_cdf[LPF_REUSE_CONTEXT][LPF_SIGN_CONTEXT][CDF_SIZE(2)];
+#endif  // CONFIG_LPF_SB
 } FRAME_CONTEXT;
 
 typedef struct FRAME_COUNTS {
@@ -383,9 +429,6 @@ typedef struct FRAME_COUNTS {
 #else
   unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
 #endif
-  av1_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
-  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
-                         [COEFF_CONTEXTS];
   unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
                                 [SWITCHABLE_FILTERS];
 #if CONFIG_ADAPT_SCAN
@@ -415,16 +458,26 @@ typedef struct FRAME_COUNTS {
   unsigned int coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_LEVELS]
                          [COEFF_BASE_CONTEXTS][2];
   unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS][2];
+  unsigned int coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_SETS][LEVEL_CONTEXTS]
+                       [2];
+#if CONFIG_CTX1D
+  unsigned int eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES][2];
+  unsigned int empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]
+                         [EMPTY_LINE_CONTEXTS][2];
+  unsigned int hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS][2];
+#endif  // CONFIG_CTX1D
 #endif  // CONFIG_LV_MAP
 
-  av1_blockz_count_model blockz_count[TX_SIZES][PLANE_TYPES];
+#if CONFIG_SYMBOLRATE
+  unsigned int coeff_num[2];   // 0: zero coeff 1: non-zero coeff
+  unsigned int symbol_num[2];  // 0: entropy symbol 1: non-entropy symbol
+#endif
 
   unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
   unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
   unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
   unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
 
-#if CONFIG_EXT_INTER
   unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
 #if CONFIG_COMPOUND_SINGLEREF
   unsigned int inter_singleref_comp_mode[INTER_MODE_CONTEXTS]
@@ -436,13 +489,15 @@ typedef struct FRAME_COUNTS {
   unsigned int wedge_interintra[BLOCK_SIZES_ALL][2];
 #endif  // CONFIG_INTERINTRA
   unsigned int compound_interinter[BLOCK_SIZES_ALL][COMPOUND_TYPES];
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES];
 #if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
   unsigned int ncobmc_mode[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
 #endif
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  unsigned int ncobmc[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES];
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
   unsigned int obmc[BLOCK_SIZES_ALL][2];
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -459,13 +514,11 @@ typedef struct FRAME_COUNTS {
 #else
   unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2];
 #endif  // CONFIG_EXT_REFS
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   unsigned int comp_inter_mode[COMP_INTER_MODE_CONTEXTS][2];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  // TODO(any): tx_size_totals is only used by the encoder to decide whether
-  // to use forward updates for the coeff probs, and as such it does not really
-  // belong into this structure.
-  unsigned int tx_size_totals[TX_SIZES];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  // TODO(urvang): Only needed for !CONFIG_VAR_TX case. So can be removed when
+  // CONFIG_VAR_TX flag is removed.
   unsigned int tx_size[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH + 1];
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   unsigned int quarter_tx_size[2];
@@ -479,16 +532,22 @@ typedef struct FRAME_COUNTS {
   unsigned int intrabc[2];
   nmv_context_counts dv;
 #endif
-#if CONFIG_DELTA_Q
+#if CONFIG_LGT_FROM_PRED
+  unsigned int intra_lgt[LGT_SIZES][INTRA_MODES][2];
+  unsigned int inter_lgt[LGT_SIZES][2];
+#endif  // CONFIG_LGT_FROM_PRED
   unsigned int delta_q[DELTA_Q_PROBS][2];
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+  unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2];
+#endif  // CONFIG_LOOPFILTER_LEVEL
   unsigned int delta_lf[DELTA_LF_PROBS][2];
 #endif
-#endif
-#if CONFIG_EXT_TX
-#if CONFIG_RECT_TX
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
   unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
-#endif  // CONFIG_RECT_TX
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
+#if CONFIG_ENTROPY_STATS
+#if CONFIG_EXT_TX
   unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
   unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                            [TX_TYPES];
@@ -496,6 +555,7 @@ typedef struct FRAME_COUNTS {
   unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
   unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
 #endif  // CONFIG_EXT_TX
+#endif  // CONFIG_ENTROPY_STATS
 #if CONFIG_SUPERTX
   unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
   unsigned int supertx_size[TX_SIZES];
@@ -509,29 +569,103 @@ typedef struct FRAME_COUNTS {
 #if CONFIG_FILTER_INTRA
   unsigned int filter_intra[PLANE_TYPES][2];
 #endif  // CONFIG_FILTER_INTRA
+#if CONFIG_LPF_SB
+  unsigned int lpf_reuse[LPF_REUSE_CONTEXT][2];
+  unsigned int lpf_delta[LPF_DELTA_CONTEXT][DELTA_RANGE];
+  unsigned int lpf_sign[LPF_SIGN_CONTEXT][2];
+#endif  // CONFIG_LPF_SB
 } FRAME_COUNTS;
 
-// CDF version of 'av1_kf_y_mode_prob'.
-extern const aom_cdf_prob av1_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES]
-                                           [CDF_SIZE(INTRA_MODES)];
+#if CONFIG_KF_CTX
+extern const aom_cdf_prob default_kf_y_mode_cdf[KF_MODE_CONTEXTS]
+                                               [KF_MODE_CONTEXTS]
+                                               [CDF_SIZE(INTRA_MODES)];
+#else
+extern const aom_cdf_prob default_kf_y_mode_cdf[INTRA_MODES][INTRA_MODES]
+                                               [CDF_SIZE(INTRA_MODES)];
+#endif
 
-#if CONFIG_PALETTE
 extern const aom_prob av1_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES]
                                                      [PALETTE_Y_MODE_CONTEXTS];
 extern const aom_prob
     av1_default_palette_uv_mode_prob[PALETTE_UV_MODE_CONTEXTS];
-#endif  // CONFIG_PALETTE
 
-extern const int av1_intra_mode_ind[INTRA_MODES];
-extern const int av1_intra_mode_inv[INTRA_MODES];
 #if CONFIG_EXT_TX
-extern int av1_ext_tx_intra_ind[EXT_TX_SETS_INTRA][TX_TYPES];
-extern int av1_ext_tx_intra_inv[EXT_TX_SETS_INTRA][TX_TYPES];
-extern int av1_ext_tx_inter_ind[EXT_TX_SETS_INTER][TX_TYPES];
-extern int av1_ext_tx_inter_inv[EXT_TX_SETS_INTER][TX_TYPES];
-#endif
+static const int av1_ext_tx_ind[EXT_TX_SET_TYPES][TX_TYPES] = {
+  {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+#if CONFIG_MRC_TX
+  {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+  },
+  {
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+  },
+#endif  // CONFIG_MRC_TX
+  {
+      1, 3, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      1, 5, 6, 4, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0,
+  },
+  {
+      3, 4, 5, 8, 6, 7, 9, 10, 11, 0, 1, 2, 0, 0, 0, 0,
+  },
+  {
+      7, 8, 9, 12, 10, 11, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6,
+  },
+};
+
+static const int av1_ext_tx_inv[EXT_TX_SET_TYPES][TX_TYPES] = {
+  {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+#if CONFIG_MRC_TX
+  {
+      0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      9, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+#endif  // CONFIG_MRC_TX
+  {
+      9, 0, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      9, 0, 10, 11, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  },
+  {
+      9, 10, 11, 0, 1, 2, 4, 5, 3, 6, 7, 8, 0, 0, 0, 0,
+  },
+  {
+      9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8,
+  },
+};
+#else
+#if CONFIG_MRC_TX
+static const int av1_ext_tx_ind[TX_TYPES] = {
+  0, 3, 4, 2, 1,
+};
+static const int av1_ext_tx_inv[TX_TYPES] = {
+  0, 4, 3, 1, 2,
+};
+#else
+static const int av1_ext_tx_ind[TX_TYPES] = {
+  0, 2, 3, 1,
+};
+static const int av1_ext_tx_inv[TX_TYPES] = {
+  0, 3, 1, 2,
+};
+#endif  // CONFIG_MRC_TX
+#endif  // CONFIG_EXT_TX
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
 extern const aom_tree_index
     av1_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)];
@@ -543,36 +677,31 @@ extern const aom_tree_index
     av1_inter_singleref_comp_mode_tree[TREE_SIZE(INTER_SINGLEREF_COMP_MODES)];
 #endif  // CONFIG_COMPOUND_SINGLEREF
 extern const aom_tree_index av1_compound_type_tree[TREE_SIZE(COMPOUND_TYPES)];
-#endif  // CONFIG_EXT_INTER
 extern const aom_tree_index av1_partition_tree[TREE_SIZE(PARTITION_TYPES)];
 #if CONFIG_EXT_PARTITION_TYPES
 extern const aom_tree_index
     av1_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)];
 #endif
 extern const aom_tree_index
-    av1_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)];
-#if CONFIG_PALETTE
-extern const aom_tree_index
     av1_palette_color_index_tree[PALETTE_SIZES][TREE_SIZE(PALETTE_COLORS)];
-#endif  // CONFIG_PALETTE
-extern const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)];
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 extern const aom_tree_index av1_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
 #if CONFIG_EXT_TX
-extern const aom_tree_index av1_ext_tx_inter_tree[EXT_TX_SETS_INTER]
-                                                 [TREE_SIZE(TX_TYPES)];
-extern const aom_tree_index av1_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
-                                                 [TREE_SIZE(TX_TYPES)];
+extern const aom_tree_index av1_ext_tx_tree[EXT_TX_SET_TYPES]
+                                           [TREE_SIZE(TX_TYPES)];
 #else
 extern const aom_tree_index av1_ext_tx_tree[TREE_SIZE(TX_TYPES)];
 #endif  // CONFIG_EXT_TX
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 extern const aom_tree_index av1_motion_mode_tree[TREE_SIZE(MOTION_MODES)];
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
 extern const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)];
-#endif
+#if CONFIG_WARPED_MOTION
+extern const aom_tree_index av1_ncobmc_tree[TREE_SIZE(OBMC_FAMILY_MODES)];
+#endif  // CONFIG_WARPED_MOTION
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #if CONFIG_LOOP_RESTORATION
 #define RESTORE_NONE_SGRPROJ_PROB 64
 #define RESTORE_NONE_BILATERAL_PROB 16
@@ -581,17 +710,11 @@ extern const aom_tree_index av1_ncobmc_mode_tree[TREE_SIZE(MAX_NCOBMC_MODES)];
 extern const aom_tree_index
     av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)];
 #endif  // CONFIG_LOOP_RESTORATION
-extern int av1_switchable_interp_ind[SWITCHABLE_FILTERS];
-extern int av1_switchable_interp_inv[SWITCHABLE_FILTERS];
 
 void av1_setup_past_independence(struct AV1Common *cm);
 
 void av1_adapt_intra_frame_probs(struct AV1Common *cm);
 void av1_adapt_inter_frame_probs(struct AV1Common *cm);
-#if !CONFIG_EXT_TX
-extern int av1_ext_tx_ind[TX_TYPES];
-extern int av1_ext_tx_inv[TX_TYPES];
-#endif
 
 static INLINE int av1_ceil_log2(int n) {
   int i = 1, p = 2;
@@ -602,14 +725,12 @@ static INLINE int av1_ceil_log2(int n) {
   return i;
 }
 
-#if CONFIG_PALETTE
 // Returns the context for palette color index at row 'r' and column 'c',
 // along with the 'color_order' of neighbors and the 'color_idx'.
 // The 'color_map' is a 2D array with the given 'stride'.
 int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
                                         int r, int c, int palette_size,
                                         uint8_t *color_order, int *color_idx);
-#endif  // CONFIG_PALETTE
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/aom/av1/common/entropymv.c b/third_party/aom/av1/common/entropymv.c
index 4737915a4..2d0191366 100644
--- a/third_party/aom/av1/common/entropymv.c
+++ b/third_party/aom/av1/common/entropymv.c
@@ -68,6 +68,12 @@ static const nmv_context default_nmv_context = {
         { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
         { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
         { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
+        { { AOM_ICDF(128 * 196), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 198), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 208), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 224), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 245), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 } },  // bits_cdf
 #endif
     },
     {
@@ -93,6 +99,12 @@ static const nmv_context default_nmv_context = {
         { AOM_ICDF(160 * 128), AOM_ICDF(32768), 0 },
         { AOM_ICDF(128 * 128), AOM_ICDF(32768), 0 },
         { AOM_ICDF(216 * 128), AOM_ICDF(32768), 0 },
+        { { AOM_ICDF(128 * 196), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 198), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 208), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 224), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 245), AOM_ICDF(32768), 0 },
+          { AOM_ICDF(128 * 240), AOM_ICDF(32768), 0 } },  // bits_cdf
 #endif
     } },
 };
@@ -169,7 +181,7 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
 
   if (c == MV_CLASS_0) {
     comp_counts->class0[d] += incr;
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
     if (precision > MV_SUBPEL_NONE)
 #endif
       comp_counts->class0_fp[d][f] += incr;
@@ -178,7 +190,7 @@ static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
     int i;
     int b = c + CLASS0_BITS - 1;  // number of bits
     for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr;
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
     if (precision > MV_SUBPEL_NONE)
 #endif
       comp_counts->fp[f] += incr;
@@ -222,18 +234,23 @@ void av1_adapt_mv_probs(AV1_COMMON *cm, int allow_hp) {
 
       for (j = 0; j < MV_OFFSET_BITS; ++j)
         comp->bits[j] = av1_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+#if CONFIG_AMVR
+      if (cm->cur_frame_mv_precision_level == 0) {
+#endif
+        for (j = 0; j < CLASS0_SIZE; ++j)
+          aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->class0_fp[j],
+                               c->class0_fp[j], comp->class0_fp[j]);
 
-      for (j = 0; j < CLASS0_SIZE; ++j)
-        aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->class0_fp[j],
-                             c->class0_fp[j], comp->class0_fp[j]);
-
-      aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+        aom_tree_merge_probs(av1_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
 
-      if (allow_hp) {
-        comp->class0_hp =
-            av1_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
-        comp->hp = av1_mode_mv_merge_probs(pre_comp->hp, c->hp);
+        if (allow_hp) {
+          comp->class0_hp =
+              av1_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+          comp->hp = av1_mode_mv_merge_probs(pre_comp->hp, c->hp);
+        }
+#if CONFIG_AMVR
       }
+#endif
     }
   }
 }
diff --git a/third_party/aom/av1/common/entropymv.h b/third_party/aom/av1/common/entropymv.h
index bea5c6724..9ce089f7d 100644
--- a/third_party/aom/av1/common/entropymv.h
+++ b/third_party/aom/av1/common/entropymv.h
@@ -66,6 +66,9 @@ typedef enum {
 #define CLASS0_BITS 1 /* bits at integer precision for class 0 */
 #define CLASS0_SIZE (1 << CLASS0_BITS)
 #define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+#if CONFIG_NEW_MULTISYMBOL
+#define MV_BITS_CONTEXTS 6
+#endif
 #define MV_FP_SIZE 4
 
 #define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
@@ -97,6 +100,7 @@ typedef struct {
   aom_cdf_prob class0_hp_cdf[CDF_SIZE(2)];
   aom_cdf_prob hp_cdf[CDF_SIZE(2)];
   aom_cdf_prob class0_cdf[CDF_SIZE(CLASS0_SIZE)];
+  aom_cdf_prob bits_cdf[MV_BITS_CONTEXTS][CDF_SIZE(2)];
 #endif
 } nmv_component;
 
@@ -133,7 +137,7 @@ typedef struct {
 } nmv_context_counts;
 
 typedef enum {
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
   MV_SUBPEL_NONE = -1,
 #endif
   MV_SUBPEL_LOW_PRECISION = 0,
diff --git a/third_party/aom/av1/common/enums.h b/third_party/aom/av1/common/enums.h
index 2b18d32a9..e8c4003cc 100644
--- a/third_party/aom/av1/common/enums.h
+++ b/third_party/aom/av1/common/enums.h
@@ -22,6 +22,16 @@ extern "C" {
 
 #undef MAX_SB_SIZE
 
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+#define TWO_MODE
+#endif
+
+#if CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT
+#define NC_MODE_INFO 1
+#else
+#define NC_MODE_INFO 0
+#endif
+
 // Max superblock size
 #if CONFIG_EXT_PARTITION
 #define MAX_SB_SIZE_LOG2 7
@@ -57,16 +67,45 @@ extern "C" {
 #define MAX_TILE_ROWS 1024
 #define MAX_TILE_COLS 1024
 #else
+#if CONFIG_MAX_TILE
+#define MAX_TILE_ROWS 64
+#define MAX_TILE_COLS 64
+#else
 #define MAX_TILE_ROWS 4
 #define MAX_TILE_COLS 64
+#endif
 #endif  // CONFIG_EXT_TILE
 
 #if CONFIG_VAR_TX
 #define MAX_VARTX_DEPTH 2
+#define SQR_VARTX_DEPTH_INIT 0
+#define RECT_VARTX_DEPTH_INIT 0
 #endif
 
 #define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
 
+#if CONFIG_LOOPFILTER_LEVEL
+// 4 frame filter levels: y plane vertical, y plane horizontal,
+// u plane, and v plane
+#define FRAME_LF_COUNT 4
+#define DEFAULT_DELTA_LF_MULTI 0
+#endif  // CONFIG_LOOPFILTER_LEVEL
+
+#if CONFIG_LPF_SB
+#define LPF_DELTA_BITS 3
+#define LPF_STEP 2
+#define DELTA_RANGE (1 << LPF_DELTA_BITS)
+#define MAX_LPF_OFFSET (LPF_STEP * ((1 << LPF_DELTA_BITS) - 1))
+
+#define LPF_REUSE_CONTEXT 2
+#define LPF_DELTA_CONTEXT DELTA_RANGE
+#define LPF_SIGN_CONTEXT 2
+
+// Half of maximum loop filter length (15-tap)
+#define FILT_BOUNDARY_OFFSET 8
+#define FILT_BOUNDARY_MI_OFFSET (FILT_BOUNDARY_OFFSET >> MI_SIZE_LOG2)
+#endif  // CONFIG_LPF_SB
+
 // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
 // 00: Profile 0.  8-bit 4:2:0 only.
 // 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
@@ -113,6 +152,12 @@ typedef enum ATTRIBUTE_PACKED {
   BLOCK_16X4,
   BLOCK_8X32,
   BLOCK_32X8,
+  BLOCK_16X64,
+  BLOCK_64X16,
+#if CONFIG_EXT_PARTITION
+  BLOCK_32X128,
+  BLOCK_128X32,
+#endif  // CONFIG_EXT_PARTITION
   BLOCK_SIZES_ALL,
   BLOCK_SIZES = BLOCK_4X16,
   BLOCK_INVALID = 255,
@@ -125,10 +170,10 @@ typedef enum {
   PARTITION_VERT,
   PARTITION_SPLIT,
 #if CONFIG_EXT_PARTITION_TYPES
-  PARTITION_HORZ_A,  // HORZ split and the left partition is split again
-  PARTITION_HORZ_B,  // HORZ split and the right partition is split again
-  PARTITION_VERT_A,  // VERT split and the top partition is split again
-  PARTITION_VERT_B,  // VERT split and the bottom partition is split again
+  PARTITION_HORZ_A,  // HORZ split and the top partition is split again
+  PARTITION_HORZ_B,  // HORZ split and the bottom partition is split again
+  PARTITION_VERT_A,  // VERT split and the left partition is split again
+  PARTITION_VERT_B,  // VERT split and the right partition is split again
   PARTITION_HORZ_4,  // 4:1 horizontal partition
   PARTITION_VERT_4,  // 4:1 vertical partition
   EXT_PARTITION_TYPES,
@@ -142,6 +187,7 @@ typedef char PARTITION_CONTEXT;
 #define PARTITION_BLOCK_SIZES (4 + CONFIG_EXT_PARTITION)
 #define PARTITION_CONTEXTS_PRIMARY (PARTITION_BLOCK_SIZES * PARTITION_PLOFFSET)
 #if CONFIG_UNPOISON_PARTITION_CTX
+#define INVALID_PARTITION_CTX (-1)
 #define PARTITION_CONTEXTS \
   (PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES)
 #else
@@ -158,14 +204,18 @@ typedef enum ATTRIBUTE_PACKED {
   TX_16X16,  // 16x16 transform
   TX_32X32,  // 32x32 transform
 #if CONFIG_TX64X64
-  TX_64X64,           // 64x64 transform
+  TX_64X64,  // 64x64 transform
+#endif       // CONFIG_TX64X64
+  TX_4X8,    // 4x8 transform
+  TX_8X4,    // 8x4 transform
+  TX_8X16,   // 8x16 transform
+  TX_16X8,   // 16x8 transform
+  TX_16X32,  // 16x32 transform
+  TX_32X16,  // 32x16 transform
+#if CONFIG_TX64X64
+  TX_32X64,           // 32x64 transform
+  TX_64X32,           // 64x32 transform
 #endif                // CONFIG_TX64X64
-  TX_4X8,             // 4x8 transform
-  TX_8X4,             // 8x4 transform
-  TX_8X16,            // 8x16 transform
-  TX_16X8,            // 16x8 transform
-  TX_16X32,           // 16x32 transform
-  TX_32X16,           // 32x16 transform
   TX_4X16,            // 4x16 transform
   TX_16X4,            // 16x4 transform
   TX_8X32,            // 8x32 transform
@@ -182,6 +232,10 @@ typedef enum ATTRIBUTE_PACKED {
 
 #define MAX_TX_DEPTH (TX_SIZES - TX_SIZE_CTX_MIN)
 
+#if CONFIG_CTX1D
+#define MAX_HVTX_SIZE (1 << 5)
+#endif  // CONFIG_CTX1D
+
 #define MAX_TX_SIZE_LOG2 (5 + CONFIG_TX64X64)
 #define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
 #define MIN_TX_SIZE_LOG2 2
@@ -192,11 +246,9 @@ typedef enum ATTRIBUTE_PACKED {
 #define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
 #define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
 
-#define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2))
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
 typedef enum ATTRIBUTE_PACKED {
-  NO_OVERLAP,
+  NCOBMC_MODE_0,
   NCOBMC_MODE_1,
   NCOBMC_MODE_2,
   NCOBMC_MODE_3,
@@ -204,20 +256,33 @@ typedef enum ATTRIBUTE_PACKED {
   NCOBMC_MODE_5,
   NCOBMC_MODE_6,
   NCOBMC_MODE_7,
-  NCOBMC_MODE_8,
-  MAX_NCOBMC_MODES
-} NCOBMC_MODE;
-// #define MAX_INTRPL_MODES 9
+  ALL_NCOBMC_MODES,
+#ifdef TWO_MODE
+  MAX_NCOBMC_MODES = NCOBMC_MODE_1 + 1,
+#else
+  MAX_NCOBMC_MODES = ALL_NCOBMC_MODES,
 #endif
+  NO_OVERLAP = MAX_NCOBMC_MODES + 1
+} NCOBMC_MODE;
+
+typedef enum {
+  ADAPT_OVERLAP_BLOCK_8X8,
+  ADAPT_OVERLAP_BLOCK_16X16,
+  ADAPT_OVERLAP_BLOCK_32X32,
+  ADAPT_OVERLAP_BLOCK_64X64,
+  ADAPT_OVERLAP_BLOCKS,
+  ADAPT_OVERLAP_BLOCK_INVALID = 255
+} ADAPT_OVERLAP_BLOCK;
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 
 // frame transform mode
 typedef enum {
-  ONLY_4X4 = 0,     // only 4x4 transform used
-  ALLOW_8X8 = 1,    // allow block transform size up to 8x8
-  ALLOW_16X16 = 2,  // allow block transform size up to 16x16
-  ALLOW_32X32 = 3,  // allow block transform size up to 32x32
+  ONLY_4X4,     // only 4x4 transform used
+  ALLOW_8X8,    // allow block transform size up to 8x8
+  ALLOW_16X16,  // allow block transform size up to 16x16
+  ALLOW_32X32,  // allow block transform size up to 32x32
 #if CONFIG_TX64X64
-  ALLOW_64X64 = 4,  // allow block transform size up to 64x64
+  ALLOW_64X64,  // allow block transform size up to 64x64
 #endif
   TX_MODE_SELECT,  // transform specified for each block
   TX_MODES,
@@ -225,33 +290,33 @@ typedef enum {
 
 // 1D tx types
 typedef enum {
-  DCT_1D = 0,
-  ADST_1D = 1,
-  FLIPADST_1D = 2,
-  IDTX_1D = 3,
+  DCT_1D,
+  ADST_1D,
+  FLIPADST_1D,
+  IDTX_1D,
   // TODO(sarahparker) need to eventually put something here for the
   // mrc experiment to make this work with the ext-tx pruning functions
-  TX_TYPES_1D = 4,
+  TX_TYPES_1D,
 } TX_TYPE_1D;
 
 typedef enum {
-  DCT_DCT = 0,    // DCT  in both horizontal and vertical
-  ADST_DCT = 1,   // ADST in vertical, DCT in horizontal
-  DCT_ADST = 2,   // DCT  in vertical, ADST in horizontal
-  ADST_ADST = 3,  // ADST in both directions
+  DCT_DCT,    // DCT  in both horizontal and vertical
+  ADST_DCT,   // ADST in vertical, DCT in horizontal
+  DCT_ADST,   // DCT  in vertical, ADST in horizontal
+  ADST_ADST,  // ADST in both directions
 #if CONFIG_EXT_TX
-  FLIPADST_DCT = 4,
-  DCT_FLIPADST = 5,
-  FLIPADST_FLIPADST = 6,
-  ADST_FLIPADST = 7,
-  FLIPADST_ADST = 8,
-  IDTX = 9,
-  V_DCT = 10,
-  H_DCT = 11,
-  V_ADST = 12,
-  H_ADST = 13,
-  V_FLIPADST = 14,
-  H_FLIPADST = 15,
+  FLIPADST_DCT,
+  DCT_FLIPADST,
+  FLIPADST_FLIPADST,
+  ADST_FLIPADST,
+  FLIPADST_ADST,
+  IDTX,
+  V_DCT,
+  H_DCT,
+  V_ADST,
+  H_ADST,
+  V_FLIPADST,
+  H_FLIPADST,
 #endif  // CONFIG_EXT_TX
 #if CONFIG_MRC_TX
   MRC_DCT,  // DCT in both directions with mrc based bitmask
@@ -260,6 +325,28 @@ typedef enum {
 } TX_TYPE;
 
 #if CONFIG_EXT_TX
+typedef enum {
+  // DCT only
+  EXT_TX_SET_DCTONLY,
+  // DCT + Identity only
+  EXT_TX_SET_DCT_IDTX,
+#if CONFIG_MRC_TX
+  // DCT + MRC_DCT
+  EXT_TX_SET_MRC_DCT,
+  // DCT + MRC_DCT + IDTX
+  EXT_TX_SET_MRC_DCT_IDTX,
+#endif  // CONFIG_MRC_TX
+  // Discrete Trig transforms w/o flip (4) + Identity (1)
+  EXT_TX_SET_DTT4_IDTX,
+  // Discrete Trig transforms w/o flip (4) + Identity (1) + 1D Hor/vert DCT (2)
+  EXT_TX_SET_DTT4_IDTX_1DDCT,
+  // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver DCT (2)
+  EXT_TX_SET_DTT9_IDTX_1DDCT,
+  // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6)
+  EXT_TX_SET_ALL16,
+  EXT_TX_SET_TYPES
+} TxSetType;
+
 #define IS_2D_TRANSFORM(tx_type) (tx_type < IDTX)
 #else
 #define IS_2D_TRANSFORM(tx_type) 1
@@ -304,14 +391,9 @@ typedef enum {
   AOM_LAST3_FLAG = 1 << 2,
   AOM_GOLD_FLAG = 1 << 3,
   AOM_BWD_FLAG = 1 << 4,
-#if CONFIG_ALTREF2
   AOM_ALT2_FLAG = 1 << 5,
   AOM_ALT_FLAG = 1 << 6,
   AOM_REFFRAME_ALL = (1 << 7) - 1
-#else  // !CONFIG_ALTREF2
-  AOM_ALT_FLAG = 1 << 5,
-  AOM_REFFRAME_ALL = (1 << 6) - 1
-#endif  // CONFIG_ALTREF2
 #else   // !CONFIG_EXT_REFS
   AOM_GOLD_FLAG = 1 << 1,
   AOM_ALT_FLAG = 1 << 2,
@@ -323,28 +405,56 @@ typedef enum {
 #define USE_UNI_COMP_REFS 1
 
 typedef enum {
-  UNIDIR_COMP_REFERENCE = 0,
-  BIDIR_COMP_REFERENCE = 1,
-  COMP_REFERENCE_TYPES = 2,
+  UNIDIR_COMP_REFERENCE,
+  BIDIR_COMP_REFERENCE,
+  COMP_REFERENCE_TYPES,
 } COMP_REFERENCE_TYPE;
 #else  // !CONFIG_EXT_COMP_REFS
 #define USE_UNI_COMP_REFS 0
 #endif  // CONFIG_EXT_COMP_REFS
 
-typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
+typedef enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } PLANE_TYPE;
 
 #if CONFIG_CFL
-// TODO(ltrudeau) this should change based on QP size
-#define CB_ALPHABET_SIZE 4
-#define CR_ALPHABET_SIZE 4
-#define CFL_ALPHABET_SIZE (CB_ALPHABET_SIZE * CR_ALPHABET_SIZE)
-#define CFL_MAGS_SIZE 7
-
-typedef enum { CFL_PRED_U = 0, CFL_PRED_V = 1, CFL_PRED_PLANES } CFL_PRED_TYPE;
-typedef enum { CFL_SIGN_NEG = 0, CFL_SIGN_POS = 1, CFL_SIGNS } CFL_SIGN_TYPE;
+#define CFL_ALPHABET_SIZE_LOG2 4
+#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
+#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
+#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
+#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1))
+
+typedef enum { CFL_PRED_U, CFL_PRED_V, CFL_PRED_PLANES } CFL_PRED_TYPE;
+
+typedef enum {
+  CFL_SIGN_ZERO,
+  CFL_SIGN_NEG,
+  CFL_SIGN_POS,
+  CFL_SIGNS
+} CFL_SIGN_TYPE;
+
+// CFL_SIGN_ZERO,CFL_SIGN_ZERO is invalid
+#define CFL_JOINT_SIGNS (CFL_SIGNS * CFL_SIGNS - 1)
+// CFL_SIGN_U is equivalent to (js + 1) / 3 for js in 0 to 8
+#define CFL_SIGN_U(js) (((js + 1) * 11) >> 5)
+// CFL_SIGN_V is equivalent to (js + 1) % 3 for js in 0 to 8
+#define CFL_SIGN_V(js) ((js + 1) - CFL_SIGNS * CFL_SIGN_U(js))
+
+// There is no context when the alpha for a given plane is zero.
+// So there are 2 fewer contexts than joint signs.
+#define CFL_ALPHA_CONTEXTS (CFL_JOINT_SIGNS + 1 - CFL_SIGNS)
+#define CFL_CONTEXT_U(js) (js + 1 - CFL_SIGNS)
+// Also, the contexts are symmetric under swapping the planes.
+#define CFL_CONTEXT_V(js) \
+  (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS)
 #endif
 
-#if CONFIG_PALETTE
+typedef enum {
+  PALETTE_MAP,
+#if CONFIG_MRC_TX
+  MRC_MAP,
+#endif  // CONFIG_MRC_TX
+  COLOR_MAP_TYPES,
+} COLOR_MAP_TYPE;
+
 typedef enum {
   TWO_COLORS,
   THREE_COLORS,
@@ -367,33 +477,29 @@ typedef enum {
   PALETTE_COLOR_EIGHT,
   PALETTE_COLORS
 } PALETTE_COLOR;
-#endif  // CONFIG_PALETTE
 
 // Note: All directional predictors must be between V_PRED and D63_PRED (both
 // inclusive).
 typedef enum ATTRIBUTE_PACKED {
-  DC_PRED,    // Average of above and left pixels
-  V_PRED,     // Vertical
-  H_PRED,     // Horizontal
-  D45_PRED,   // Directional 45  deg = round(arctan(1/1) * 180/pi)
-  D135_PRED,  // Directional 135 deg = 180 - 45
-  D117_PRED,  // Directional 117 deg = 180 - 63
-  D153_PRED,  // Directional 153 deg = 180 - 27
-  D207_PRED,  // Directional 207 deg = 180 + 27
-  D63_PRED,   // Directional 63  deg = round(arctan(2/1) * 180/pi)
-#if CONFIG_ALT_INTRA
+  DC_PRED,      // Average of above and left pixels
+  V_PRED,       // Vertical
+  H_PRED,       // Horizontal
+  D45_PRED,     // Directional 45  deg = round(arctan(1/1) * 180/pi)
+  D135_PRED,    // Directional 135 deg = 180 - 45
+  D117_PRED,    // Directional 117 deg = 180 - 63
+  D153_PRED,    // Directional 153 deg = 180 - 27
+  D207_PRED,    // Directional 207 deg = 180 + 27
+  D63_PRED,     // Directional 63  deg = round(arctan(2/1) * 180/pi)
   SMOOTH_PRED,  // Combination of horizontal and vertical interpolation
 #if CONFIG_SMOOTH_HV
   SMOOTH_V_PRED,  // Vertical interpolation
   SMOOTH_H_PRED,  // Horizontal interpolation
 #endif            // CONFIG_SMOOTH_HV
-#endif            // CONFIG_ALT_INTRA
   TM_PRED,        // True-motion
   NEARESTMV,
   NEARMV,
   ZEROMV,
   NEWMV,
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   // Single ref compound modes
   SR_NEAREST_NEARMV,
@@ -411,7 +517,6 @@ typedef enum ATTRIBUTE_PACKED {
   NEW_NEARMV,
   ZERO_ZEROMV,
   NEW_NEWMV,
-#endif  // CONFIG_EXT_INTER
   MB_MODE_COUNT,
   INTRA_MODES = TM_PRED + 1,     // TM_PRED has to be the last intra mode.
   INTRA_INVALID = MB_MODE_COUNT  // For uv_mode in inter blocks
@@ -421,23 +526,22 @@ typedef enum ATTRIBUTE_PACKED {
 // TODO(ltrudeau) Do we really want to pack this?
 // TODO(ltrudeau) Do we match with PREDICTION_MODE?
 typedef enum ATTRIBUTE_PACKED {
-  UV_DC_PRED,    // Average of above and left pixels
-  UV_V_PRED,     // Vertical
-  UV_H_PRED,     // Horizontal
-  UV_D45_PRED,   // Directional 45  deg = round(arctan(1/1) * 180/pi)
-  UV_D135_PRED,  // Directional 135 deg = 180 - 45
-  UV_D117_PRED,  // Directional 117 deg = 180 - 63
-  UV_D153_PRED,  // Directional 153 deg = 180 - 27
-  UV_D207_PRED,  // Directional 207 deg = 180 + 27
-  UV_D63_PRED,   // Directional 63  deg = round(arctan(2/1) * 180/pi)
-#if CONFIG_ALT_INTRA
+  UV_DC_PRED,      // Average of above and left pixels
+  UV_V_PRED,       // Vertical
+  UV_H_PRED,       // Horizontal
+  UV_D45_PRED,     // Directional 45  deg = round(arctan(1/1) * 180/pi)
+  UV_D135_PRED,    // Directional 135 deg = 180 - 45
+  UV_D117_PRED,    // Directional 117 deg = 180 - 63
+  UV_D153_PRED,    // Directional 153 deg = 180 - 27
+  UV_D207_PRED,    // Directional 207 deg = 180 + 27
+  UV_D63_PRED,     // Directional 63  deg = round(arctan(2/1) * 180/pi)
   UV_SMOOTH_PRED,  // Combination of horizontal and vertical interpolation
 #if CONFIG_SMOOTH_HV
   UV_SMOOTH_V_PRED,  // Vertical interpolation
   UV_SMOOTH_H_PRED,  // Horizontal interpolation
 #endif               // CONFIG_SMOOTH_HV
-#endif               // CONFIG_ALT_INTRA
   UV_TM_PRED,        // True-motion
+  UV_CFL_PRED,       // Chroma-from-Luma
   UV_INTRA_MODES,
   UV_MODE_INVALID,  // For uv_mode in inter blocks
 } UV_PREDICTION_MODE;
@@ -449,47 +553,35 @@ typedef enum ATTRIBUTE_PACKED {
 #endif  // CONFIG_CFL
 
 typedef enum {
-  SIMPLE_TRANSLATION = 0,
+  SIMPLE_TRANSLATION,
 #if CONFIG_MOTION_VAR
   OBMC_CAUSAL,  // 2-sided OBMC
-#endif          // CONFIG_MOTION_VAR
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  NCOBMC_ADAPT_WEIGHT,
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR
 #if CONFIG_WARPED_MOTION
   WARPED_CAUSAL,  // 2-sided WARPED
 #endif            // CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  NCOBMC_ADAPT_WEIGHT,
-#endif
   MOTION_MODES
-} MOTION_MODE;
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-typedef enum {
-  ADAPT_OVERLAP_BLOCK_8X8,
-  ADAPT_OVERLAP_BLOCK_16X16,
-  ADAPT_OVERLAP_BLOCK_32X32,
-  ADAPT_OVERLAP_BLOCK_64X64,
-  ADAPT_OVERLAP_BLOCKS,
-  ADAPT_OVERLAP_BLOCK_INVALID = 255
-} ADAPT_OVERLAP_BLOCK;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  ,
+  OBMC_FAMILY_MODES = NCOBMC_ADAPT_WEIGHT + 1
 #endif
+} MOTION_MODE;
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
 typedef enum {
-  II_DC_PRED = 0,
+  II_DC_PRED,
   II_V_PRED,
   II_H_PRED,
-#if CONFIG_ALT_INTRA
   II_SMOOTH_PRED,
-#else
-  II_TM_PRED,
-#endif  // CONFIG_ALT_INTRA
   INTERINTRA_MODES
 } INTERINTRA_MODE;
 #endif
 
 typedef enum {
-  COMPOUND_AVERAGE = 0,
+  COMPOUND_AVERAGE,
 #if CONFIG_WEDGE
   COMPOUND_WEDGE,
 #endif  // CONFIG_WEDGE
@@ -498,7 +590,6 @@ typedef enum {
 #endif  // CONFIG_COMPOUND_SEGMENT
   COMPOUND_TYPES,
 } COMPOUND_TYPE;
-#endif  // CONFIG_EXT_INTER
 
 // TODO(huisu): Consider adding FILTER_SMOOTH_PRED to "FILTER_INTRA_MODE".
 #if CONFIG_FILTER_INTRA
@@ -523,13 +614,11 @@ typedef enum {
 
 #define INTER_MODES (1 + NEWMV - NEARESTMV)
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
 #define INTER_SINGLEREF_COMP_MODES (1 + SR_NEW_NEWMV - SR_NEAREST_NEARMV)
 #endif  // CONFIG_COMPOUND_SINGLEREF
 
 #define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
-#endif  // CONFIG_EXT_INTER
 
 #define SKIP_CONTEXTS 3
 
@@ -553,7 +642,6 @@ typedef enum {
 #define SKIP_NEARESTMV_SUB8X8_OFFSET 11
 
 #define INTER_MODE_CONTEXTS 7
-#if CONFIG_DELTA_Q
 #define DELTA_Q_SMALL 3
 #define DELTA_Q_PROBS (DELTA_Q_SMALL)
 #define DEFAULT_DELTA_Q_RES 4
@@ -562,7 +650,6 @@ typedef enum {
 #define DELTA_LF_PROBS (DELTA_LF_SMALL)
 #define DEFAULT_DELTA_LF_RES 2
 #endif
-#endif
 
 /* Segment Feature Masks */
 #define MAX_MV_REF_CANDIDATES 2
@@ -583,9 +670,9 @@ typedef enum {
 #define UNI_COMP_REF_CONTEXTS 3
 #endif  // CONFIG_EXT_COMP_REFS
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 #define COMP_INTER_MODE_CONTEXTS 4
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if CONFIG_VAR_TX
 #define TXFM_PARTITION_CONTEXTS ((TX_SIZES - TX_8X8) * 6 - 2)
@@ -601,14 +688,8 @@ typedef uint8_t TXFM_CONTEXT;
 #define LAST3_FRAME 3
 #define GOLDEN_FRAME 4
 #define BWDREF_FRAME 5
-
-#if CONFIG_ALTREF2
 #define ALTREF2_FRAME 6
 #define ALTREF_FRAME 7
-#else  // !CONFIG_ALTREF2
-#define ALTREF_FRAME 6
-#endif  // CONFIG_ALTREF2
-
 #define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
 #else  // !CONFIG_EXT_REFS
 #define GOLDEN_FRAME 2
@@ -651,9 +732,9 @@ typedef enum {
 
 #if CONFIG_LOOP_RESTORATION
 typedef enum {
-  RESTORE_NONE = 0,
-  RESTORE_WIENER = 1,
-  RESTORE_SGRPROJ = 2,
+  RESTORE_NONE,
+  RESTORE_WIENER,
+  RESTORE_SGRPROJ,
   RESTORE_SWITCHABLE,
   RESTORE_SWITCHABLE_TYPES = RESTORE_SWITCHABLE,
   RESTORE_TYPES,
@@ -662,7 +743,7 @@ typedef enum {
 
 #if CONFIG_FRAME_SUPERRES
 #define SUPERRES_SCALE_BITS 3
-#define SUPERRES_SCALE_NUMERATOR_MIN 8
+#define SUPERRES_SCALE_DENOMINATOR_MIN 8
 #endif  // CONFIG_FRAME_SUPERRES
 
 #if CONFIG_LPF_DIRECT
@@ -678,6 +759,27 @@ typedef enum {
 } FILTER_DEGREE;
 #endif  // CONFIG_LPF_DIRECT
 
+#if CONFIG_OBU
+// R19
+typedef enum {
+  OBU_SEQUENCE_HEADER = 1,
+  OBU_TD = 2,
+  OBU_FRAME_HEADER = 3,
+  OBU_TILE_GROUP = 4,
+  OBU_METADATA = 5,
+  OBU_PADDING = 15,
+} OBU_TYPE;
+#endif
+
+#if CONFIG_LGT_FROM_PRED
+#define LGT_SIZES 2
+// Note: at least one of LGT_FROM_PRED_INTRA and LGT_FROM_PRED_INTER must be 1
+#define LGT_FROM_PRED_INTRA 1
+#define LGT_FROM_PRED_INTER 1
+// LGT_SL_INTRA: LGTs with a mode-dependent first self-loop and a break point
+#define LGT_SL_INTRA 0
+#endif  // CONFIG_LGT_FROM_PRED
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/filter.c b/third_party/aom/av1/common/filter.c
index c5555e34e..135132316 100644
--- a/third_party/aom/av1/common/filter.c
+++ b/third_party/aom/av1/common/filter.c
@@ -51,7 +51,6 @@ DECLARE_ALIGNED(16, static const int16_t,
 #if USE_EXTRA_FILTER
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },      { 0, 2, -6, 126, 8, -2, 0, 0 },
   { 0, 2, -10, 122, 18, -4, 0, 0 },  { 0, 2, -12, 116, 28, -8, 2, 0 },
   { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
@@ -60,22 +59,10 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 2, -12, 58, 94, -16, 2, 0 },  { 0, 2, -12, 48, 102, -14, 2, 0 },
   { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
   { 0, 0, -4, 18, 122, -10, 2, 0 },  { 0, 0, -2, 8, 126, -6, 2, 0 }
-#else
-  // intfilt 0.575
-  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
-  { -1, 3, -10, 123, 18, -6, 2, -1 },  { -1, 4, -14, 118, 27, -9, 3, 0 },
-  { -1, 5, -16, 112, 37, -12, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
-  { -1, 6, -19, 97, 58, -17, 5, -1 },  { -1, 6, -20, 88, 68, -18, 6, -1 },
-  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 6, -18, 68, 88, -20, 6, -1 },
-  { -1, 5, -17, 58, 97, -19, 6, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
-  { -1, 4, -12, 37, 112, -16, 5, -1 }, { 0, 3, -9, 27, 118, -14, 4, -1 },
-  { -1, 2, -6, 18, 123, -10, 3, -1 },  { 0, 1, -3, 8, 126, -5, 1, 0 },
-#endif
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_regular_uv[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },      { 0, 2, -6, 126, 8, -2, 0, 0 },
   { 0, 2, -10, 122, 18, -4, 0, 0 },  { 0, 2, -12, 116, 28, -8, 2, 0 },
   { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
@@ -84,17 +71,6 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 2, -12, 58, 94, -16, 2, 0 },  { 0, 2, -12, 48, 102, -14, 2, 0 },
   { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
   { 0, 0, -4, 18, 122, -10, 2, 0 },  { 0, 0, -2, 8, 126, -6, 2, 0 }
-#else
-  // intfilt 0.575
-  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
-  { -1, 3, -10, 123, 18, -6, 2, -1 },  { -1, 4, -14, 118, 27, -9, 3, 0 },
-  { -1, 5, -16, 112, 37, -12, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
-  { -1, 6, -19, 97, 58, -17, 5, -1 },  { -1, 6, -20, 88, 68, -18, 6, -1 },
-  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 6, -18, 68, 88, -20, 6, -1 },
-  { -1, 5, -17, 58, 97, -19, 6, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
-  { -1, 4, -12, 37, 112, -16, 5, -1 }, { 0, 3, -9, 27, 118, -14, 4, -1 },
-  { -1, 2, -6, 18, 123, -10, 3, -1 },  { 0, 1, -3, 8, 126, -5, 1, 0 },
-#endif
 };
 
 #if USE_12TAP_FILTER
@@ -134,7 +110,6 @@ DECLARE_ALIGNED(256, static const int16_t,
 #else
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },         { -2, 2, -6, 126, 8, -2, 2, 0 },
   { -2, 6, -12, 124, 16, -6, 4, -2 },   { -2, 8, -18, 120, 26, -10, 6, -2 },
   { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
@@ -143,16 +118,6 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
   { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
   { -2, 4, -6, 16, 124, -12, 6, -2 },   { 0, 2, -2, 8, 126, -6, 2, -2 }
-#else
-  { 0, 0, 0, 128, 0, 0, 0, 0 },         { -1, 3, -7, 127, 8, -3, 1, 0 },
-  { -2, 5, -13, 125, 17, -6, 3, -1 },   { -3, 7, -17, 121, 27, -10, 5, -2 },
-  { -4, 9, -20, 115, 37, -13, 6, -2 },  { -4, 10, -23, 108, 48, -16, 8, -3 },
-  { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 },
-  { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 },
-  { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 },
-  { -2, 6, -13, 37, 115, -20, 9, -4 },  { -2, 5, -10, 27, 121, -17, 7, -3 },
-  { -1, 3, -6, 17, 125, -13, 5, -2 },   { 0, 1, -3, 8, 127, -7, 3, -1 }
-#endif
 };
 #endif
 
@@ -184,7 +149,6 @@ DECLARE_ALIGNED(256, static const InterpKernel,
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },     { 0, 2, 28, 62, 34, 2, 0, 0 },
   { 0, 0, 26, 62, 36, 4, 0, 0 },    { 0, 0, 22, 62, 40, 4, 0, 0 },
   { 0, 0, 20, 60, 42, 6, 0, 0 },    { 0, 0, 18, 58, 44, 8, 0, 0 },
@@ -193,22 +157,10 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 0, 10, 46, 56, 16, 0, 0 },   { 0, 0, 8, 44, 58, 18, 0, 0 },
   { 0, 0, 6, 42, 60, 20, 0, 0 },    { 0, 0, 4, 40, 62, 22, 0, 0 },
   { 0, 0, 4, 36, 62, 26, 0, 0 },    { 0, 0, 2, 34, 62, 28, 2, 0 }
-#else
-  // freqmultiplier = 0.8
-  { 0, 0, 0, 128, 0, 0, 0, 0 },    { 0, -5, 13, 102, 24, -7, 1, 0 },
-  { 0, -4, 8, 100, 31, -8, 1, 0 }, { 0, -3, 4, 97, 37, -8, 1, 0 },
-  { 0, -2, 0, 94, 44, -9, 1, 0 },  { 0, -2, -3, 90, 51, -9, 1, 0 },
-  { 0, -1, -5, 84, 59, -9, 0, 0 }, { 0, 0, -7, 79, 65, -9, 0, 0 },
-  { 0, 0, -8, 72, 72, -8, 0, 0 },  { 0, 0, -9, 65, 79, -7, 0, 0 },
-  { 0, 0, -9, 59, 84, -5, -1, 0 }, { 0, 1, -9, 51, 90, -3, -2, 0 },
-  { 0, 1, -9, 44, 94, 0, -2, 0 },  { 0, 1, -8, 37, 97, 4, -3, 0 },
-  { 0, 1, -8, 31, 100, 8, -4, 0 }, { 0, 1, -7, 24, 102, 13, -5, 0 },
-#endif
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_smooth_uv[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },     { 0, 2, 28, 62, 34, 2, 0, 0 },
   { 0, 0, 26, 62, 36, 4, 0, 0 },    { 0, 0, 22, 62, 40, 4, 0, 0 },
   { 0, 0, 20, 60, 42, 6, 0, 0 },    { 0, 0, 18, 58, 44, 8, 0, 0 },
@@ -217,23 +169,11 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 0, 10, 46, 56, 16, 0, 0 },   { 0, 0, 8, 44, 58, 18, 0, 0 },
   { 0, 0, 6, 42, 60, 20, 0, 0 },    { 0, 0, 4, 40, 62, 22, 0, 0 },
   { 0, 0, 4, 36, 62, 26, 0, 0 },    { 0, 0, 2, 34, 62, 28, 2, 0 }
-#else
-  // freqmultiplier = 0.8
-  { 0, 0, 0, 128, 0, 0, 0, 0 },    { 0, -5, 13, 102, 24, -7, 1, 0 },
-  { 0, -4, 8, 100, 31, -8, 1, 0 }, { 0, -3, 4, 97, 37, -8, 1, 0 },
-  { 0, -2, 0, 94, 44, -9, 1, 0 },  { 0, -2, -3, 90, 51, -9, 1, 0 },
-  { 0, -1, -5, 84, 59, -9, 0, 0 }, { 0, 0, -7, 79, 65, -9, 0, 0 },
-  { 0, 0, -8, 72, 72, -8, 0, 0 },  { 0, 0, -9, 65, 79, -7, 0, 0 },
-  { 0, 0, -9, 59, 84, -5, -1, 0 }, { 0, 1, -9, 51, 90, -3, -2, 0 },
-  { 0, 1, -9, 44, 94, 0, -2, 0 },  { 0, 1, -8, 37, 97, 4, -3, 0 },
-  { 0, 1, -8, 31, 100, 8, -4, 0 }, { 0, 1, -7, 24, 102, 13, -5, 0 },
-#endif
 };
-#else  // USE_EXTRA_FILTER
+#else   // USE_EXTRA_FILTER
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },      { 0, 2, -6, 126, 8, -2, 0, 0 },
   { 0, 2, -10, 122, 18, -4, 0, 0 },  { 0, 2, -12, 116, 28, -8, 2, 0 },
   { 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
@@ -242,21 +182,10 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 2, -12, 58, 94, -16, 2, 0 },  { 0, 2, -12, 48, 102, -14, 2, 0 },
   { 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
   { 0, 0, -4, 18, 122, -10, 2, 0 },  { 0, 0, -2, 8, 126, -6, 2, 0 }
-#else
-  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -5, 126, 8, -3, 1, 0 },
-  { -1, 3, -10, 122, 18, -6, 2, 0 },   { -1, 4, -13, 118, 27, -9, 3, -1 },
-  { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
-  { -1, 5, -19, 97, 58, -16, 5, -1 },  { -1, 6, -19, 88, 68, -18, 5, -1 },
-  { -1, 6, -19, 78, 78, -19, 6, -1 },  { -1, 5, -18, 68, 88, -19, 6, -1 },
-  { -1, 5, -16, 58, 97, -19, 5, -1 },  { -1, 4, -14, 48, 105, -18, 5, -1 },
-  { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 },
-  { 0, 2, -6, 18, 122, -10, 3, -1 },   { 0, 1, -3, 8, 126, -5, 1, 0 }
-#endif
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },         { -2, 2, -6, 126, 8, -2, 2, 0 },
   { -2, 6, -12, 124, 16, -6, 4, -2 },   { -2, 8, -18, 120, 26, -10, 6, -2 },
   { -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
@@ -265,21 +194,10 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
   { -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
   { -2, 4, -6, 16, 124, -12, 6, -2 },   { 0, 2, -2, 8, 126, -6, 2, -2 }
-#else
-  { 0, 0, 0, 128, 0, 0, 0, 0 },         { -1, 3, -7, 127, 8, -3, 1, 0 },
-  { -2, 5, -13, 125, 17, -6, 3, -1 },   { -3, 7, -17, 121, 27, -10, 5, -2 },
-  { -4, 9, -20, 115, 37, -13, 6, -2 },  { -4, 10, -23, 108, 48, -16, 8, -3 },
-  { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 },
-  { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 },
-  { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 },
-  { -2, 6, -13, 37, 115, -20, 9, -4 },  { -2, 5, -10, 27, 121, -17, 7, -3 },
-  { -1, 3, -6, 17, 125, -13, 5, -2 },   { 0, 1, -3, 8, 127, -7, 3, -1 }
-#endif
 };
 
 DECLARE_ALIGNED(256, static const InterpKernel,
                 sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
-#if CONFIG_FILTER_7BIT
   { 0, 0, 0, 128, 0, 0, 0, 0 },     { 0, 2, 28, 62, 34, 2, 0, 0 },
   { 0, 0, 26, 62, 36, 4, 0, 0 },    { 0, 0, 22, 62, 40, 4, 0, 0 },
   { 0, 0, 20, 60, 42, 6, 0, 0 },    { 0, 0, 18, 58, 44, 8, 0, 0 },
@@ -288,16 +206,6 @@ DECLARE_ALIGNED(256, static const InterpKernel,
   { 0, 0, 10, 46, 56, 16, 0, 0 },   { 0, 0, 8, 44, 58, 18, 0, 0 },
   { 0, 0, 6, 42, 60, 20, 0, 0 },    { 0, 0, 4, 40, 62, 22, 0, 0 },
   { 0, 0, 4, 36, 62, 26, 0, 0 },    { 0, 0, 2, 34, 62, 28, 2, 0 }
-#else
-  { 0, 0, 0, 128, 0, 0, 0, 0 },       { -3, -1, 32, 64, 38, 1, -3, 0 },
-  { -2, -2, 29, 63, 41, 2, -3, 0 },   { -2, -2, 26, 63, 43, 4, -4, 0 },
-  { -2, -3, 24, 62, 46, 5, -4, 0 },   { -2, -3, 21, 60, 49, 7, -4, 0 },
-  { -1, -4, 18, 59, 51, 9, -4, 0 },   { -1, -4, 16, 57, 53, 12, -4, -1 },
-  { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 },
-  { 0, -4, 9, 51, 59, 18, -4, -1 },   { 0, -4, 7, 49, 60, 21, -3, -2 },
-  { 0, -4, 5, 46, 62, 24, -3, -2 },   { 0, -4, 4, 43, 63, 26, -2, -2 },
-  { 0, -3, 2, 41, 63, 29, -2, -2 },   { 0, -3, 1, 38, 64, 32, -1, -3 }
-#endif
 };
 #endif  // USE_EXTRA_FILTER
 
diff --git a/third_party/aom/av1/common/filter.h b/third_party/aom/av1/common/filter.h
index d791af173..343e87560 100644
--- a/third_party/aom/av1/common/filter.h
+++ b/third_party/aom/av1/common/filter.h
@@ -12,6 +12,8 @@
 #ifndef AV1_COMMON_FILTER_H_
 #define AV1_COMMON_FILTER_H_
 
+#include <assert.h>
+
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_filter.h"
@@ -30,10 +32,10 @@ extern "C" {
 typedef enum {
   EIGHTTAP_REGULAR,
   EIGHTTAP_SMOOTH,
-  MULTITAP_SHARP,
 #if USE_EXTRA_FILTER
   EIGHTTAP_SMOOTH2,
 #endif  // USE_EXTRA_FILTER
+  MULTITAP_SHARP,
   BILINEAR,
 #if USE_EXTRA_FILTER
   EIGHTTAP_SHARP,
@@ -51,6 +53,49 @@ typedef enum {
 #endif
 } InterpFilter;
 
+// With CONFIG_DUAL_FILTER, pack two InterpFilter's into a uint32_t: since
+// there are at most 10 filters, we can use 16 bits for each and have more than
+// enough space. This reduces argument passing and unifies the operation of
+// setting a (pair of) filters.
+//
+// Without CONFIG_DUAL_FILTER,
+#if CONFIG_DUAL_FILTER
+typedef uint32_t InterpFilters;
+static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters,
+                                                     int x_filter) {
+  return (InterpFilter)((filters >> (x_filter ? 16 : 0)) & 0xffff);
+}
+
+static INLINE InterpFilters av1_make_interp_filters(InterpFilter y_filter,
+                                                    InterpFilter x_filter) {
+  uint16_t y16 = y_filter & 0xffff;
+  uint16_t x16 = x_filter & 0xffff;
+  return y16 | ((uint32_t)x16 << 16);
+}
+
+static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) {
+  return av1_make_interp_filters(filter, filter);
+}
+#else
+typedef InterpFilter InterpFilters;
+static INLINE InterpFilter av1_extract_interp_filter(InterpFilters filters,
+                                                     int x_filter) {
+#ifdef NDEBUG
+  (void)x_filter;
+#endif
+  assert(!x_filter);
+  return filters;
+}
+
+static INLINE InterpFilters av1_broadcast_interp_filter(InterpFilter filter) {
+  return filter;
+}
+#endif
+
+static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) {
+  return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter;
+}
+
 #if USE_EXTRA_FILTER
 #define LOG_SWITCHABLE_FILTERS \
   3 /* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
diff --git a/third_party/aom/av1/common/idct.c b/third_party/aom/av1/common/idct.c
index 7915b7034..53c2ba1f0 100644
--- a/third_party/aom/av1/common/idct.c
+++ b/third_party/aom/av1/common/idct.c
@@ -19,15 +19,14 @@
 #include "av1/common/blockd.h"
 #include "av1/common/enums.h"
 #include "av1/common/idct.h"
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
+    CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
+#include "av1/common/daala_tx.h"
+#endif
 
 int av1_get_tx_scale(const TX_SIZE tx_size) {
-  if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
-#if CONFIG_TX64X64
-  else if (txsize_sqr_up_map[tx_size] == TX_64X64)
-    return 2;
-#endif  // CONFIG_TX64X64
-  else
-    return 0;
+  const int pels = tx_size_2d[tx_size];
+  return (pels > 256) + (pels > 1024) + (pels > 4096);
 }
 
 // NOTE: The implementation of all inverses need to be aware of the fact
@@ -37,41 +36,37 @@ int av1_get_tx_scale(const TX_SIZE tx_size) {
 static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
   int i;
   for (i = 0; i < 4; ++i) {
-#if CONFIG_DAALA_DCT4
-    output[i] = input[i];
-#else
     output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
-#endif
   }
 }
 
 static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
   int i;
   for (i = 0; i < 8; ++i) {
-#if CONFIG_DAALA_DCT8
-    output[i] = input[i];
-#else
     output[i] = input[i] * 2;
-#endif
   }
 }
 
 static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 16; ++i)
+  for (i = 0; i < 16; ++i) {
     output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+  }
 }
 
 static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+  for (i = 0; i < 32; ++i) {
+    output[i] = input[i] * 4;
+  }
 }
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
 static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 64; ++i)
+  for (i = 0; i < 64; ++i) {
     output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+  }
 }
 #endif  // CONFIG_TX64X64
 #endif  // CONFIG_EXT_TX
@@ -91,7 +86,7 @@ static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
   // Note overall scaling factor is 4 times orthogonal
 }
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
 static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
   int32_t in[64], out[64];
   int i;
@@ -127,15 +122,16 @@ static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
 // Inverse identity transform and add.
 #if CONFIG_EXT_TX
 static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                           int bs, int tx_type) {
+                           int bsx, int bsy, TX_TYPE tx_type) {
   int r, c;
-  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
+  const int pels = bsx * bsy;
+  const int shift = 3 - ((pels > 256) + (pels > 1024));
   if (tx_type == IDTX) {
-    for (r = 0; r < bs; ++r) {
-      for (c = 0; c < bs; ++c)
+    for (r = 0; r < bsy; ++r) {
+      for (c = 0; c < bsx; ++c)
         dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
       dest += stride;
-      input += bs;
+      input += bsx;
     }
   }
 }
@@ -149,7 +145,7 @@ static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 #if CONFIG_EXT_TX
 static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
-                               int *sstride, int tx_type, int sizey,
+                               int *sstride, TX_TYPE tx_type, int sizey,
                                int sizex) {
   // Note that the transpose of src will be added to dst. In order to LR
   // flip the addends (in dst coordinates), we UD flip the src. To UD flip
@@ -190,32 +186,41 @@ static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
 #if CONFIG_HIGHBITDEPTH
 #if CONFIG_EXT_TX && CONFIG_TX64X64
 static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
-                                  int stride, int bs, int tx_type, int bd) {
+                                  int stride, int bsx, int bsy, TX_TYPE tx_type,
+                                  int bd) {
   int r, c;
-  const int shift = bs < 32 ? 3 : 2;
+  const int pels = bsx * bsy;
+  const int shift = 3 - ((pels > 256) + (pels > 1024));
   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
 
   if (tx_type == IDTX) {
-    for (r = 0; r < bs; ++r) {
-      for (c = 0; c < bs; ++c)
+    for (r = 0; r < bsy; ++r) {
+      for (c = 0; c < bsx; ++c)
         dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
       dest += stride;
-      input += bs;
+      input += bsx;
     }
   }
 }
 #endif  // CONFIG_EXT_TX && CONFIG_TX64X64
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
 void ilgt4(const tran_low_t *input, tran_low_t *output,
            const tran_high_t *lgtmtx) {
-  if (!(input[0] | input[1] | input[2] | input[3])) {
-    output[0] = output[1] = output[2] = output[3] = 0;
+  if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+  // For DCT/ADST, use butterfly implementations
+  if (lgtmtx[0] == DCT4) {
+    aom_idct4_c(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST4) {
+    aom_iadst4_c(input, output);
     return;
   }
+#endif  // CONFIG_LGT_FROM_PRED
 
-  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,4
+  // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4
   tran_high_t s[4] = { 0 };
   for (int i = 0; i < 4; ++i)
     for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];
@@ -225,41 +230,485 @@ void ilgt4(const tran_low_t *input, tran_low_t *output,
 
 void ilgt8(const tran_low_t *input, tran_low_t *output,
            const tran_high_t *lgtmtx) {
-  // evaluate s[j] = sum of all lgtmtx[i][j]*input[i] over i=1,...,8
+  if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+  // For DCT/ADST, use butterfly implementations
+  if (lgtmtx[0] == DCT8) {
+    aom_idct8_c(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST8) {
+    aom_iadst8_c(input, output);
+    return;
+  }
+#endif  // CONFIG_LGT_FROM_PRED
+
+  // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8
   tran_high_t s[8] = { 0 };
   for (int i = 0; i < 8; ++i)
     for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];
 
   for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
 }
+#endif  // CONFIG_LGT || CONFIG_LGT_FROM_PRED
 
-// The get_inv_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
-int get_inv_lgt4(transform_1d tx_orig, const TxfmParam *txfm_param,
-                 const tran_high_t *lgtmtx[], int ntx) {
-  // inter/intra split
-  if (tx_orig == &aom_iadst4_c) {
-    for (int i = 0; i < ntx; ++i)
-      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
+#if CONFIG_LGT
+// get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to
+// apply. Otherwise they return 0
+int get_lgt4(const TxfmParam *txfm_param, int is_col,
+             const tran_high_t **lgtmtx) {
+  if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
+                 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
+    lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
+    return 1;
+  } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
+                         htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
+    lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
     return 1;
   }
+  lgtmtx[0] = NULL;
   return 0;
 }
 
-int get_inv_lgt8(transform_1d tx_orig, const TxfmParam *txfm_param,
-                 const tran_high_t *lgtmtx[], int ntx) {
-  // inter/intra split
-  if (tx_orig == &aom_iadst8_c) {
-    for (int i = 0; i < ntx; ++i)
-      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
+int get_lgt8(const TxfmParam *txfm_param, int is_col,
+             const tran_high_t **lgtmtx) {
+  if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
+                 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
+    lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
+    return 1;
+  } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
+                         htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
+    lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
     return 1;
   }
+  lgtmtx[0] = NULL;
   return 0;
 }
 #endif  // CONFIG_LGT
 
+#if CONFIG_LGT_FROM_PRED
+void ilgt16up(const tran_low_t *input, tran_low_t *output,
+              const tran_high_t *lgtmtx) {
+  if (lgtmtx[0] == DCT16) {
+    aom_idct16_c(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST16) {
+    aom_iadst16_c(input, output);
+    return;
+  } else if (lgtmtx[0] == DCT32) {
+    aom_idct32_c(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST32) {
+    ihalfright32_c(input, output);
+    return;
+  } else {
+    assert(0);
+  }
+}
+
+void get_discontinuity_1d(uint8_t *arr, int n, int *idx_max_diff) {
+  *idx_max_diff = -1;
+
+  int temp = 0, max_diff = 0, min_diff = INT_MAX;
+  for (int i = 1; i < n; ++i) {
+    temp = abs(arr[i] - arr[i - 1]);
+    if (temp > max_diff) {
+      max_diff = temp;
+      *idx_max_diff = i;
+    }
+    if (temp < min_diff) min_diff = temp;
+  }
+}
+
+void get_discontinuity_2d(uint8_t *dst, int stride, int n, int is_col,
+                          int *idx_max_diff, int ntx) {
+  *idx_max_diff = -1;
+
+  int diff = 0, temp = 0, max_diff = 0, min_diff = INT_MAX;
+  for (int i = 1; i < n; ++i) {
+    temp = 0;
+    for (int j = 0; j < ntx; ++j) {
+      if (is_col)  // vertical diff
+        diff = dst[i * stride + j] - dst[(i - 1) * stride + j];
+      else  // horizontal diff
+        diff = dst[j * stride + i] - dst[j * stride + i - 1];
+      temp += diff * diff;
+    }
+    // temp/w is the i-th avg square diff
+    if (temp > max_diff) {
+      max_diff = temp;
+      *idx_max_diff = i;
+    }
+    if (temp < min_diff) min_diff = temp;
+  }
+}
+
+int idx_selfloop_wrt_mode(PREDICTION_MODE mode, int is_col) {
+  // 0: no self-loop
+  // 1: small self-loop
+  // 2: medium self-loop
+  // 3: large self-loop
+  switch (mode) {
+    case DC_PRED:
+    case SMOOTH_PRED:
+      // predition is good for both directions: large SLs for row and col
+      return 3;
+    case TM_PRED: return 0;
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_H_PRED:
+#endif
+    case H_PRED:
+      // prediction is good for H direction: large SL for row only
+      return is_col ? 0 : 3;
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_V_PRED:
+#endif
+    case V_PRED:
+      // prediction is good for V direction: large SL for col only
+      return is_col ? 3 : 0;
+#if LGT_SL_INTRA
+    // directional mode: choose SL based on the direction
+    case D45_PRED: return is_col ? 2 : 0;
+    case D63_PRED: return is_col ? 3 : 0;
+    case D117_PRED: return is_col ? 3 : 1;
+    case D135_PRED: return 2;
+    case D153_PRED: return is_col ? 1 : 3;
+    case D207_PRED: return is_col ? 0 : 3;
+#else
+    case D45_PRED:
+    case D63_PRED:
+    case D117_PRED: return is_col ? 3 : 0;
+    case D135_PRED:
+    case D153_PRED:
+    case D207_PRED: return is_col ? 0 : 3;
+#endif
+    // inter: no SL
+    default: return 0;
+  }
+}
+
+void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
+                        const tran_high_t **lgtmtx, int ntx) {
+  PREDICTION_MODE mode = txfm_param->mode;
+  int stride = txfm_param->stride;
+  uint8_t *dst = txfm_param->dst;
+  int bp = -1;
+  uint8_t arr[4];
+
+  // Each lgt4mtx_arr[k][i] corresponds to a line graph with a self-loop on
+  // the first node, and possibly a weak edge within the line graph. i is
+  // the index of the weak edge (between the i-th and (i+1)-th pixels, i=0
+  // means no weak edge). k corresponds to the first self-loop's weight
+  const tran_high_t *lgt4mtx_arr[4][4] = {
+    { &lgt4_000[0][0], &lgt4_000w1[0][0], &lgt4_000w2[0][0],
+      &lgt4_000w3[0][0] },
+    { &lgt4_060[0][0], &lgt4_060_000w1[0][0], &lgt4_060_000w2[0][0],
+      &lgt4_060_000w3[0][0] },
+    { &lgt4_100[0][0], &lgt4_100_000w1[0][0], &lgt4_100_000w2[0][0],
+      &lgt4_100_000w3[0][0] },
+    { &lgt4_150[0][0], &lgt4_150_000w1[0][0], &lgt4_150_000w2[0][0],
+      &lgt4_150_000w3[0][0] },
+  };
+
+  // initialize to DCT or some LGTs, and then change later if necessary
+  int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
+  lgtmtx[0] = lgt4mtx_arr[idx_sl][0];
+
+  // find the break point and replace the line graph by the one with a
+  // break point
+  if (mode == DC_PRED || mode == SMOOTH_PRED) {
+    // Do not use break point, since 1) is_left_available and is_top_available
+    // in DC_PRED are not known by txfm_param for now, so accessing
+    // both boundaries anyway may cause a mismatch 2) DC prediciton
+    // typically yields very smooth residues so having the break point
+    // does not usually improve the RD result.
+    return;
+  } else if (mode == TM_PRED) {
+    // TM_PRED: use both 1D top boundary and 1D left boundary
+    if (is_col)
+      for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
+    else
+      for (int i = 0; i < 4; ++i) arr[i] = dst[i];
+    get_discontinuity_1d(&arr[0], 4, &bp);
+  } else if (mode == V_PRED) {
+    // V_PRED: use 1D top boundary only
+    if (is_col) return;
+    for (int i = 0; i < 4; ++i) arr[i] = dst[i];
+    get_discontinuity_1d(&arr[0], 4, &bp);
+  } else if (mode == H_PRED) {
+    // H_PRED: use 1D left boundary only
+    if (!is_col) return;
+    for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
+    get_discontinuity_1d(&arr[0], 4, &bp);
+#if CONFIG_SMOOTH_HV
+  } else if (mode == SMOOTH_V_PRED) {
+    if (is_col) return;
+    for (int i = 0; i < 4; ++i) arr[i] = dst[-stride + i];
+    get_discontinuity_1d(&arr[0], 4, &bp);
+  } else if (mode == SMOOTH_H_PRED) {
+    if (!is_col) return;
+    for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride - 1];
+    get_discontinuity_1d(&arr[0], 4, &bp);
+#endif
+  } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
+    // directional modes closer to vertical (maybe include D135 later)
+    if (!is_col) get_discontinuity_2d(dst, stride, 4, 0, &bp, ntx);
+  } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
+    // directional modes closer to horizontal
+    if (is_col) get_discontinuity_2d(dst, stride, 4, 1, &bp, ntx);
+  } else if (mode > TM_PRED) {
+    // inter
+    get_discontinuity_2d(dst, stride, 4, is_col, &bp, ntx);
+  }
+
+#if LGT_SL_INTRA
+  if (bp != -1) lgtmtx[0] = lgt4mtx_arr[idx_sl][bp];
+#else
+  if (bp != -1) lgtmtx[0] = lgt4mtx_arr[0][bp];
+#endif
+}
+
+void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
+                        const tran_high_t **lgtmtx, int ntx) {
+  PREDICTION_MODE mode = txfm_param->mode;
+  int stride = txfm_param->stride;
+  uint8_t *dst = txfm_param->dst;
+  int bp = -1;
+  uint8_t arr[8];
+
+  const tran_high_t *lgt8mtx_arr[4][8] = {
+    { &lgt8_000[0][0], &lgt8_000w1[0][0], &lgt8_000w2[0][0], &lgt8_000w3[0][0],
+      &lgt8_000w4[0][0], &lgt8_000w5[0][0], &lgt8_000w6[0][0],
+      &lgt8_000w7[0][0] },
+    { &lgt8_060[0][0], &lgt8_060_000w1[0][0], &lgt8_060_000w2[0][0],
+      &lgt8_060_000w3[0][0], &lgt8_060_000w4[0][0], &lgt8_060_000w5[0][0],
+      &lgt8_060_000w6[0][0], &lgt8_060_000w7[0][0] },
+    { &lgt8_100[0][0], &lgt8_100_000w1[0][0], &lgt8_100_000w2[0][0],
+      &lgt8_100_000w3[0][0], &lgt8_100_000w4[0][0], &lgt8_100_000w5[0][0],
+      &lgt8_100_000w6[0][0], &lgt8_100_000w7[0][0] },
+    { &lgt8_150[0][0], &lgt8_150_000w1[0][0], &lgt8_150_000w2[0][0],
+      &lgt8_150_000w3[0][0], &lgt8_150_000w4[0][0], &lgt8_150_000w5[0][0],
+      &lgt8_150_000w6[0][0], &lgt8_150_000w7[0][0] },
+  };
+
+  int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
+  lgtmtx[0] = lgt8mtx_arr[idx_sl][0];
+
+  if (mode == DC_PRED || mode == SMOOTH_PRED) {
+    return;
+  } else if (mode == TM_PRED) {
+    if (is_col)
+      for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
+    else
+      for (int i = 0; i < 8; ++i) arr[i] = dst[i];
+    get_discontinuity_1d(&arr[0], 8, &bp);
+  } else if (mode == V_PRED) {
+    if (is_col) return;
+    for (int i = 0; i < 8; ++i) arr[i] = dst[i];
+    get_discontinuity_1d(&arr[0], 8, &bp);
+  } else if (mode == H_PRED) {
+    if (!is_col) return;
+    for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
+    get_discontinuity_1d(&arr[0], 8, &bp);
+#if CONFIG_SMOOTH_HV
+  } else if (mode == SMOOTH_V_PRED) {
+    if (is_col) return;
+    for (int i = 0; i < 8; ++i) arr[i] = dst[-stride + i];
+    get_discontinuity_1d(&arr[0], 8, &bp);
+  } else if (mode == SMOOTH_H_PRED) {
+    if (!is_col) return;
+    for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride - 1];
+    get_discontinuity_1d(&arr[0], 8, &bp);
+#endif
+  } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
+    if (!is_col) get_discontinuity_2d(dst, stride, 8, 0, &bp, ntx);
+  } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
+    if (is_col) get_discontinuity_2d(dst, stride, 8, 1, &bp, ntx);
+  } else if (mode > TM_PRED) {
+    get_discontinuity_2d(dst, stride, 8, is_col, &bp, ntx);
+  }
+
+#if LGT_SL_INTRA
+  if (bp != -1) lgtmtx[0] = lgt8mtx_arr[idx_sl][bp];
+#else
+  if (bp != -1) lgtmtx[0] = lgt8mtx_arr[0][bp];
+#endif
+}
+
+// Since LGTs with length >8 are not implemented now, the following function
+// will just call DCT or ADST
+void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
+                           const tran_high_t **lgtmtx, int ntx) {
+  int tx_length = is_col ? tx_size_high[txfm_param->tx_size]
+                         : tx_size_wide[txfm_param->tx_size];
+  assert(tx_length == 16 || tx_length == 32);
+  PREDICTION_MODE mode = txfm_param->mode;
+
+  (void)ntx;
+  const tran_high_t *dctmtx =
+      tx_length == 16 ? &lgt16_000[0][0] : &lgt32_000[0][0];
+  const tran_high_t *adstmtx =
+      tx_length == 16 ? &lgt16_200[0][0] : &lgt32_200[0][0];
+
+  switch (mode) {
+    case DC_PRED:
+    case TM_PRED:
+    case SMOOTH_PRED:
+      // prediction from both top and left -> ADST
+      lgtmtx[0] = adstmtx;
+      break;
+    case V_PRED:
+    case D45_PRED:
+    case D63_PRED:
+    case D117_PRED:
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_V_PRED:
+#endif
+      // prediction from the top more than from the left -> ADST
+      lgtmtx[0] = is_col ? adstmtx : dctmtx;
+      break;
+    case H_PRED:
+    case D135_PRED:
+    case D153_PRED:
+    case D207_PRED:
+#if CONFIG_SMOOTH_HV
+    case SMOOTH_H_PRED:
+#endif
+      // prediction from the left more than from the top -> DCT
+      lgtmtx[0] = is_col ? dctmtx : adstmtx;
+      break;
+    default: lgtmtx[0] = dctmtx; break;
+  }
+}
+
+typedef void (*IlgtFunc)(const tran_low_t *input, tran_low_t *output,
+                         const tran_high_t *lgtmtx);
+
+static IlgtFunc ilgt_func[4] = { ilgt4, ilgt8, ilgt16up, ilgt16up };
+
+typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
+                           const tran_high_t **lgtmtx, int ntx);
+
+static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
+                                      get_lgt16up_from_pred,
+                                      get_lgt16up_from_pred };
+
+// this inline function corresponds to the up scaling before the transpose
+// operation in the av1_iht* functions
+static INLINE tran_low_t inv_upscale_wrt_txsize(const tran_high_t val,
+                                                const TX_SIZE tx_size) {
+  switch (tx_size) {
+    case TX_4X4:
+    case TX_8X8:
+    case TX_4X16:
+    case TX_16X4:
+    case TX_8X32:
+    case TX_32X8: return (tran_low_t)val;
+    case TX_4X8:
+    case TX_8X4:
+    case TX_8X16:
+    case TX_16X8: return (tran_low_t)dct_const_round_shift(val * Sqrt2);
+    default: assert(0); break;
+  }
+  return 0;
+}
+
+// This inline function corresponds to the bit shift before summing with the
+// destination in the av1_iht* functions
+static INLINE tran_low_t inv_downscale_wrt_txsize(const tran_low_t val,
+                                                  const TX_SIZE tx_size) {
+  switch (tx_size) {
+    case TX_4X4: return ROUND_POWER_OF_TWO(val, 4);
+    case TX_4X8:
+    case TX_8X4:
+    case TX_8X8:
+    case TX_4X16:
+    case TX_16X4: return ROUND_POWER_OF_TWO(val, 5);
+    case TX_8X16:
+    case TX_16X8:
+    case TX_8X32:
+    case TX_32X8: return ROUND_POWER_OF_TWO(val, 6);
+    default: assert(0); break;
+  }
+  return 0;
+}
+
+void ilgt2d_from_pred_add(const tran_low_t *input, uint8_t *dest, int stride,
+                          const TxfmParam *txfm_param) {
+  const TX_SIZE tx_size = txfm_param->tx_size;
+  const int w = tx_size_wide[tx_size];
+  const int h = tx_size_high[tx_size];
+  const int wlog2 = tx_size_wide_log2[tx_size];
+  const int hlog2 = tx_size_high_log2[tx_size];
+  assert(w <= 8 || h <= 8);
+
+  int i, j;
+  // largest 1D size allowed for LGT: 32
+  // largest 2D size allowed for LGT: 8x32=256
+  tran_low_t tmp[256], out[256], temp1d[32];
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
+  get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
+
+// for inverse transform, to be consistent with av1_iht functions, we always
+// apply row transforms first and column transforms second, but both
+// row-first and column-first versions are implemented here for future
+// tests (use different lgtmtx_col[i], and choose row or column tx first
+// depending on transforms).
+#if 1
+  // inverse column transforms
+  for (i = 0; i < w; ++i) {
+    // transpose
+    for (j = 0; j < h; ++j) tmp[i * h + j] = input[j * w + i];
+    ilgt_func[hlog2 - 2](&tmp[i * h], temp1d, lgtmtx_col[0]);
+    // upscale, and store in place
+    for (j = 0; j < h; ++j)
+      tmp[i * h + j] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
+  }
+  // inverse row transforms
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; ++j) temp1d[j] = tmp[j * h + i];
+    ilgt_func[wlog2 - 2](temp1d, &out[i * w], lgtmtx_row[0]);
+  }
+  // downscale + sum with the destination
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; ++j) {
+      int d = i * stride + j;
+      int s = i * w + j;
+      dest[d] =
+          clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
+    }
+  }
+#else
+  // inverse row transforms
+  for (i = 0; i < h; ++i) {
+    ilgt_func[wlog2 - 2](input, temp1d, lgtmtx_row[0]);
+    // upscale and transpose (tmp[j*h+i] <--> tmp[j][i])
+    for (j = 0; j < w; ++j)
+      tmp[j * h + i] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
+    input += w;
+  }
+  // inverse column transforms
+  for (i = 0; i < w; ++i)
+    ilgt_func[hlog2 - 2](&tmp[i * h], &out[i * h], lgtmtx_col[0]);
+  // here, out[] is the transpose of 2D block of transform coefficients
+
+  // downscale + transform + sum with dest
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; ++j) {
+      int d = i * stride + j;
+      int s = j * h + i;
+      dest[d] =
+          clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
+    }
+  }
+#endif
+}
+#endif  // CONFIG_LGT_FROM_PRED
+
 void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -270,6 +719,26 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   }
 #endif
   static const transform_2d IHT_4[] = {
+#if CONFIG_DAALA_DCT4
+    { daala_idct4, daala_idct4 },  // DCT_DCT  = 0
+    { daala_idst4, daala_idct4 },  // ADST_DCT = 1
+    { daala_idct4, daala_idst4 },  // DCT_ADST = 2
+    { daala_idst4, daala_idst4 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { daala_idst4, daala_idct4 },  // FLIPADST_DCT
+    { daala_idct4, daala_idst4 },  // DCT_FLIPADST
+    { daala_idst4, daala_idst4 },  // FLIPADST_FLIPADST
+    { daala_idst4, daala_idst4 },  // ADST_FLIPADST
+    { daala_idst4, daala_idst4 },  // FLIPADST_ADST
+    { daala_idtx4, daala_idtx4 },  // IDTX
+    { daala_idct4, daala_idtx4 },  // V_DCT
+    { daala_idtx4, daala_idct4 },  // H_DCT
+    { daala_idst4, daala_idtx4 },  // V_ADST
+    { daala_idtx4, daala_idst4 },  // H_ADST
+    { daala_idst4, daala_idtx4 },  // V_FLIPADST
+    { daala_idtx4, daala_idst4 },  // H_FLIPADST
+#endif
+#else
     { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
     { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
     { aom_idct4_c, aom_iadst4_c },   // DCT_ADST = 2
@@ -288,6 +757,7 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     { aom_iadst4_c, iidtx4_c },      // V_FLIPADST
     { iidtx4_c, aom_iadst4_c },      // H_FLIPADST
 #endif
+#endif
   };
 
   int i, j;
@@ -301,24 +771,22 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[4];
-  const tran_high_t *lgtmtx_row[4];
-  int use_lgt_col =
-      get_inv_lgt4(IHT_4[tx_type].cols, txfm_param, lgtmtx_col, 4);
-  int use_lgt_row =
-      get_inv_lgt4(IHT_4[tx_type].rows, txfm_param, lgtmtx_row, 4);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors
   for (i = 0; i < 4; ++i) {
 #if CONFIG_DAALA_DCT4
     tran_low_t temp_in[4];
-    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
+    for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
     IHT_4[tx_type].rows(temp_in, out[i]);
 #else
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt4(input, out[i], lgtmtx_row[i]);
+      ilgt4(input, out[i], lgtmtx_row[0]);
     else
 #endif
       IHT_4[tx_type].rows(input, out[i]);
@@ -337,7 +805,7 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < 4; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_4[tx_type].cols(tmp[i], out[i]);
@@ -363,7 +831,7 @@ void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -399,19 +867,17 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n2;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[4];
-  const tran_high_t *lgtmtx_row[8];
-  int use_lgt_col =
-      get_inv_lgt8(IHT_4x8[tx_type].cols, txfm_param, lgtmtx_col, 4);
-  int use_lgt_row =
-      get_inv_lgt4(IHT_4x8[tx_type].rows, txfm_param, lgtmtx_row, 8);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors and transpose
   for (i = 0; i < n2; ++i) {
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt4(input, outtmp, lgtmtx_row[i]);
+      ilgt4(input, outtmp, lgtmtx_row[0]);
     else
 #endif
       IHT_4x8[tx_type].rows(input, outtmp);
@@ -424,7 +890,7 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < n; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt8(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_4x8[tx_type].cols(tmp[i], out[i]);
@@ -446,7 +912,7 @@ void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -483,19 +949,17 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[8];
-  const tran_high_t *lgtmtx_row[4];
-  int use_lgt_col =
-      get_inv_lgt4(IHT_8x4[tx_type].cols, txfm_param, lgtmtx_col, 8);
-  int use_lgt_row =
-      get_inv_lgt8(IHT_8x4[tx_type].rows, txfm_param, lgtmtx_row, 4);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors and transpose
   for (i = 0; i < n; ++i) {
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt8(input, outtmp, lgtmtx_row[i]);
+      ilgt8(input, outtmp, lgtmtx_row[0]);
     else
 #endif
       IHT_8x4[tx_type].rows(input, outtmp);
@@ -508,7 +972,7 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < n2; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_8x4[tx_type].cols(tmp[i], out[i]);
@@ -530,7 +994,7 @@ void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -566,16 +1030,15 @@ void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n4;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[16];
-  int use_lgt_row =
-      get_inv_lgt4(IHT_4x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors and transpose
   for (i = 0; i < n4; ++i) {
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt4(input, outtmp, lgtmtx_row[i]);
+      ilgt4(input, outtmp, lgtmtx_row[0]);
     else
 #endif
       IHT_4x16[tx_type].rows(input, outtmp);
@@ -604,7 +1067,7 @@ void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                           const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -641,9 +1104,8 @@ void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[16];
-  int use_lgt_col =
-      get_inv_lgt4(IHT_16x4[tx_type].cols, txfm_param, lgtmtx_col, 16);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
 #endif
 
   // inverse transform row vectors and transpose
@@ -657,7 +1119,7 @@ void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < n4; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt4(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt4(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_16x4[tx_type].cols(tmp[i], out[i]);
@@ -679,7 +1141,7 @@ void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -715,16 +1177,15 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n2;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[16];
-  int use_lgt_row =
-      get_inv_lgt8(IHT_8x16[tx_type].rows, txfm_param, lgtmtx_row, 16);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors and transpose
   for (i = 0; i < n2; ++i) {
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt8(input, outtmp, lgtmtx_row[i]);
+      ilgt8(input, outtmp, lgtmtx_row[0]);
     else
 #endif
       IHT_8x16[tx_type].rows(input, outtmp);
@@ -754,7 +1215,7 @@ void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -791,9 +1252,8 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[16];
-  int use_lgt_col =
-      get_inv_lgt8(IHT_16x8[tx_type].cols, txfm_param, lgtmtx_col, 16);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
 #endif
 
   // inverse transform row vectors and transpose
@@ -808,7 +1268,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < n2; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt8(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_16x8[tx_type].cols(tmp[i], out[i]);
@@ -830,7 +1290,7 @@ void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -866,16 +1326,15 @@ void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n4;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[32];
-  int use_lgt_row =
-      get_inv_lgt8(IHT_8x32[tx_type].rows, txfm_param, lgtmtx_row, 32);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors and transpose
   for (i = 0; i < n4; ++i) {
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt8(input, outtmp, lgtmtx_row[i]);
+      ilgt8(input, outtmp, lgtmtx_row[0]);
     else
 #endif
       IHT_8x32[tx_type].rows(input, outtmp);
@@ -904,7 +1363,7 @@ void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -941,9 +1400,8 @@ void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = n;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[32];
-  int use_lgt_col =
-      get_inv_lgt4(IHT_32x8[tx_type].cols, txfm_param, lgtmtx_col, 32);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
 #endif
 
   // inverse transform row vectors and transpose
@@ -957,7 +1415,7 @@ void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < n4; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt8(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_32x8[tx_type].cols(tmp[i], out[i]);
@@ -979,7 +1437,7 @@ void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1041,7 +1499,7 @@ void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1103,7 +1561,7 @@ void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1111,6 +1569,26 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d IHT_8[] = {
+#if CONFIG_DAALA_DCT8
+    { daala_idct8, daala_idct8 },  // DCT_DCT  = 0
+    { daala_idst8, daala_idct8 },  // ADST_DCT = 1
+    { daala_idct8, daala_idst8 },  // DCT_ADST = 2
+    { daala_idst8, daala_idst8 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { daala_idst8, daala_idct8 },  // FLIPADST_DCT
+    { daala_idct8, daala_idst8 },  // DCT_FLIPADST
+    { daala_idst8, daala_idst8 },  // FLIPADST_FLIPADST
+    { daala_idst8, daala_idst8 },  // ADST_FLIPADST
+    { daala_idst8, daala_idst8 },  // FLIPADST_ADST
+    { daala_idtx8, daala_idtx8 },  // IDTX
+    { daala_idct8, daala_idtx8 },  // V_DCT
+    { daala_idtx8, daala_idct8 },  // H_DCT
+    { daala_idst8, daala_idtx8 },  // V_ADST
+    { daala_idtx8, daala_idst8 },  // H_ADST
+    { daala_idst8, daala_idtx8 },  // V_FLIPADST
+    { daala_idtx8, daala_idst8 },  // H_FLIPADST
+#endif
+#else
     { aom_idct8_c, aom_idct8_c },    // DCT_DCT  = 0
     { aom_iadst8_c, aom_idct8_c },   // ADST_DCT = 1
     { aom_idct8_c, aom_iadst8_c },   // DCT_ADST = 2
@@ -1129,6 +1607,7 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     { aom_iadst8_c, iidtx8_c },      // V_FLIPADST
     { iidtx8_c, aom_iadst8_c },      // H_FLIPADST
 #endif
+#endif
   };
 
   int i, j;
@@ -1138,12 +1617,10 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   int outstride = 8;
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[8];
-  const tran_high_t *lgtmtx_row[8];
-  int use_lgt_col =
-      get_inv_lgt8(IHT_8[tx_type].cols, txfm_param, lgtmtx_col, 8);
-  int use_lgt_row =
-      get_inv_lgt8(IHT_8[tx_type].rows, txfm_param, lgtmtx_row, 8);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // inverse transform row vectors
@@ -1155,7 +1632,7 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 #else
 #if CONFIG_LGT
     if (use_lgt_row)
-      ilgt8(input, out[i], lgtmtx_row[i]);
+      ilgt8(input, out[i], lgtmtx_row[0]);
     else
 #endif
       IHT_8[tx_type].rows(input, out[i]);
@@ -1174,7 +1651,7 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   for (i = 0; i < 8; ++i) {
 #if CONFIG_LGT
     if (use_lgt_col)
-      ilgt8(tmp[i], out[i], lgtmtx_col[i]);
+      ilgt8(tmp[i], out[i], lgtmtx_col[0]);
     else
 #endif
       IHT_8[tx_type].cols(tmp[i], out[i]);
@@ -1200,7 +1677,7 @@ void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
 void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1208,6 +1685,26 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d IHT_16[] = {
+#if CONFIG_DAALA_DCT16
+    { daala_idct16, daala_idct16 },  // DCT_DCT  = 0
+    { daala_idst16, daala_idct16 },  // ADST_DCT = 1
+    { daala_idct16, daala_idst16 },  // DCT_ADST = 2
+    { daala_idst16, daala_idst16 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { daala_idst16, daala_idct16 },  // FLIPADST_DCT
+    { daala_idct16, daala_idst16 },  // DCT_FLIPADST
+    { daala_idst16, daala_idst16 },  // FLIPADST_FLIPADST
+    { daala_idst16, daala_idst16 },  // ADST_FLIPADST
+    { daala_idst16, daala_idst16 },  // FLIPADST_ADST
+    { daala_idtx16, daala_idtx16 },  // IDTX
+    { daala_idct16, daala_idtx16 },  // V_DCT
+    { daala_idtx16, daala_idct16 },  // H_DCT
+    { daala_idst16, daala_idtx16 },  // V_ADST
+    { daala_idtx16, daala_idst16 },  // H_ADST
+    { daala_idst16, daala_idtx16 },  // V_FLIPADST
+    { daala_idtx16, daala_idst16 },  // H_FLIPADST
+#endif
+#else
     { aom_idct16_c, aom_idct16_c },    // DCT_DCT  = 0
     { aom_iadst16_c, aom_idct16_c },   // ADST_DCT = 1
     { aom_idct16_c, aom_iadst16_c },   // DCT_ADST = 2
@@ -1226,6 +1723,7 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     { aom_iadst16_c, iidtx16_c },      // V_FLIPADST
     { iidtx16_c, aom_iadst16_c },      // H_FLIPADST
 #endif
+#endif
   };
 
   int i, j;
@@ -1236,7 +1734,13 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
   // inverse transform row vectors
   for (i = 0; i < 16; ++i) {
+#if CONFIG_DAALA_DCT16
+    tran_low_t temp_in[16];
+    for (j = 0; j < 16; j++) temp_in[j] = input[j] * 2;
+    IHT_16[tx_type].rows(temp_in, out[i]);
+#else
     IHT_16[tx_type].rows(input, out[i]);
+#endif
     input += 16;
   }
 
@@ -1259,20 +1763,45 @@ void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     for (j = 0; j < 16; ++j) {
       int d = i * stride + j;
       int s = j * outstride + i;
+#if CONFIG_DAALA_DCT16
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
+#else
       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+#endif
     }
   }
 }
 
-#if CONFIG_EXT_TX
+#if CONFIG_EXT_TX || CONFIG_DAALA_DCT32
 void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                              const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_DCT_ONLY
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d IHT_32[] = {
+#if CONFIG_DAALA_DCT32
+    { daala_idct32, daala_idct32 },  // DCT_DCT
+#if CONFIG_EXT_TX
+    { daala_idst32, daala_idct32 },  // ADST_DCT
+    { daala_idct32, daala_idst32 },  // DCT_ADST
+    { daala_idst32, daala_idst32 },  // ADST_ADST
+    { daala_idst32, daala_idct32 },  // FLIPADST_DCT
+    { daala_idct32, daala_idst32 },  // DCT_FLIPADST
+    { daala_idst32, daala_idst32 },  // FLIPADST_FLIPADST
+    { daala_idst32, daala_idst32 },  // ADST_FLIPADST
+    { daala_idst32, daala_idst32 },  // FLIPADST_ADST
+    { daala_idtx32, daala_idtx32 },  // IDTX
+    { daala_idct32, daala_idtx32 },  // V_DCT
+    { daala_idtx32, daala_idct32 },  // H_DCT
+    { daala_idst32, daala_idtx32 },  // V_ADST
+    { daala_idtx32, daala_idst32 },  // H_ADST
+    { daala_idst32, daala_idtx32 },  // V_FLIPADST
+    { daala_idtx32, daala_idst32 },  // H_FLIPADST
+#endif
+#else
     { aom_idct32_c, aom_idct32_c },      // DCT_DCT
+#if CONFIG_EXT_TX
     { ihalfright32_c, aom_idct32_c },    // ADST_DCT
     { aom_idct32_c, ihalfright32_c },    // DCT_ADST
     { ihalfright32_c, ihalfright32_c },  // ADST_ADST
@@ -1288,6 +1817,8 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     { iidtx32_c, ihalfright32_c },       // H_ADST
     { ihalfright32_c, iidtx32_c },       // V_FLIPADST
     { iidtx32_c, ihalfright32_c },       // H_FLIPADST
+#endif
+#endif
   };
 
   int i, j;
@@ -1298,14 +1829,24 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
   // inverse transform row vectors
   for (i = 0; i < 32; ++i) {
+#if CONFIG_DAALA_DCT32
+    tran_low_t temp_in[32];
+    for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2;
+    IHT_32[tx_type].rows(temp_in, out[i]);
+#else
     IHT_32[tx_type].rows(input, out[i]);
+#endif
     input += 32;
   }
 
   // transpose
   for (i = 0; i < 32; i++) {
     for (j = 0; j < 32; j++) {
+#if CONFIG_DAALA_DCT32
+      tmp[j][i] = out[i][j] * 4;
+#else
       tmp[j][i] = out[i][j];
+#endif
     }
   }
 
@@ -1319,16 +1860,20 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     for (j = 0; j < 32; ++j) {
       int d = i * stride + j;
       int s = j * outstride + i;
+#if CONFIG_DAALA_DCT32
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+#else
       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+#endif
     }
   }
 }
-#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_EXT_TX || CONFIG_DAALA_DCT32
 
 #if CONFIG_TX64X64
 void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                              const TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1336,6 +1881,26 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d IHT_64[] = {
+#if CONFIG_DAALA_DCT64
+    { daala_idct64, daala_idct64 },  // DCT_DCT
+    { daala_idst64, daala_idct64 },  // ADST_DCT
+    { daala_idct64, daala_idst64 },  // DCT_ADST
+    { daala_idst64, daala_idst64 },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { daala_idst64, daala_idct64 },  // FLIPADST_DCT
+    { daala_idct64, daala_idst64 },  // DCT_FLIPADST
+    { daala_idst64, daala_idst64 },  // FLIPADST_FLIPADST
+    { daala_idst64, daala_idst64 },  // ADST_FLIPADST
+    { daala_idst64, daala_idst64 },  // FLIPADST_ADST
+    { daala_idtx64, daala_idtx64 },  // IDTX
+    { daala_idct64, daala_idtx64 },  // V_DCT
+    { daala_idtx64, daala_idct64 },  // H_DCT
+    { daala_idst64, daala_idtx64 },  // V_ADST
+    { daala_idtx64, daala_idst64 },  // H_ADST
+    { daala_idst64, daala_idtx64 },  // V_FLIPADST
+    { daala_idtx64, daala_idst64 },  // H_FLIPADST
+#endif
+#else
     { idct64_col_c, idct64_row_c },      // DCT_DCT
     { ihalfright64_c, idct64_row_c },    // ADST_DCT
     { idct64_col_c, ihalfright64_c },    // DCT_ADST
@@ -1354,6 +1919,7 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     { ihalfright64_c, iidtx64_c },       // V_FLIPADST
     { iidtx64_c, ihalfright64_c },       // H_FLIPADST
 #endif
+#endif
   };
 
   int i, j;
@@ -1364,8 +1930,15 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 
   // inverse transform row vectors
   for (i = 0; i < 64; ++i) {
+#if CONFIG_DAALA_DCT64
+    tran_low_t temp_in[64];
+    for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2;
+    IHT_64[tx_type].rows(temp_in, out[i]);
+// Do not rescale intermediate for Daala
+#else
     IHT_64[tx_type].rows(input, out[i]);
     for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
+#endif
     input += 64;
   }
 
@@ -1388,10 +1961,139 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     for (j = 0; j < 64; ++j) {
       int d = i * stride + j;
       int s = j * outstride + i;
+#if CONFIG_DAALA_DCT64
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2));
+#else
       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+#endif
     }
   }
 }
+
+void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                             const TxfmParam *txfm_param) {
+  const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif  // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+  assert(tx_type == DCT_DCT);
+#endif
+  static const transform_2d IHT_64x32[] = {
+    { aom_idct32_c, idct64_row_c },      // DCT_DCT
+    { ihalfright32_c, idct64_row_c },    // ADST_DCT
+    { aom_idct32_c, ihalfright64_c },    // DCT_ADST
+    { ihalfright32_c, ihalfright64_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { ihalfright32_c, idct64_row_c },    // FLIPADST_DCT
+    { aom_idct32_c, ihalfright64_c },    // DCT_FLIPADST
+    { ihalfright32_c, ihalfright64_c },  // FLIPADST_FLIPADST
+    { ihalfright32_c, ihalfright64_c },  // ADST_FLIPADST
+    { ihalfright32_c, ihalfright64_c },  // FLIPADST_ADST
+    { iidtx32_c, iidtx64_c },            // IDTX
+    { aom_idct32_c, iidtx64_c },         // V_DCT
+    { iidtx32_c, idct64_row_c },         // H_DCT
+    { ihalfright32_c, iidtx64_c },       // V_ADST
+    { iidtx32_c, ihalfright64_c },       // H_ADST
+    { ihalfright32_c, iidtx64_c },       // V_FLIPADST
+    { iidtx32_c, ihalfright64_c },       // H_FLIPADST
+#endif
+  };
+  const int n = 32;
+  const int n2 = 64;
+
+  int i, j;
+  tran_low_t out[64][32], tmp[64][32], outtmp[64];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n; ++i) {
+    IHT_64x32[tx_type].rows(input, outtmp);
+    for (j = 0; j < n2; ++j)
+      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
+    input += n2;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n2; ++i) IHT_64x32[tx_type].cols(tmp[i], out[i]);
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+    }
+  }
+}
+
+void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                             const TxfmParam *txfm_param) {
+  const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif  // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+  assert(tx_type == DCT_DCT);
+#endif
+  static const transform_2d IHT_32x64[] = {
+    { idct64_col_c, aom_idct32_c },      // DCT_DCT
+    { ihalfright64_c, aom_idct32_c },    // ADST_DCT
+    { idct64_col_c, ihalfright32_c },    // DCT_ADST
+    { ihalfright64_c, ihalfright32_c },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { ihalfright64_c, aom_idct32_c },    // FLIPADST_DCT
+    { idct64_col_c, ihalfright32_c },    // DCT_FLIPADST
+    { ihalfright64_c, ihalfright32_c },  // FLIPADST_FLIPADST
+    { ihalfright64_c, ihalfright32_c },  // ADST_FLIPADST
+    { ihalfright64_c, ihalfright32_c },  // FLIPADST_ADST
+    { iidtx64_c, iidtx32_c },            // IDTX
+    { idct64_col_c, iidtx32_c },         // V_DCT
+    { iidtx64_c, aom_idct32_c },         // H_DCT
+    { ihalfright64_c, iidtx32_c },       // V_ADST
+    { iidtx64_c, ihalfright32_c },       // H_ADST
+    { ihalfright64_c, iidtx32_c },       // V_FLIPADST
+    { iidtx64_c, ihalfright32_c },       // H_FLIPADST
+#endif
+  };
+
+  const int n = 32;
+  const int n2 = 64;
+  int i, j;
+  tran_low_t out[32][64], tmp[32][64], outtmp[32];
+  tran_low_t *outp = &out[0][0];
+  int outstride = n2;
+
+  // inverse transform row vectors and transpose
+  for (i = 0; i < n2; ++i) {
+    IHT_32x64[tx_type].rows(input, outtmp);
+    for (j = 0; j < n; ++j)
+      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
+    input += n;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < n; ++i) IHT_32x64[tx_type].cols(tmp[i], out[i]);
+
+#if CONFIG_EXT_TX
+  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+#endif
+
+  // Sum with the destination
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j) {
+      int d = i * stride + j;
+      int s = j * outstride + i;
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+    }
+  }
+}
+
 #endif  // CONFIG_TX64X64
 
 // idct
@@ -1440,6 +2142,7 @@ static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
 }
 #endif
 
+#if !CONFIG_DAALA_DCT16
 static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
                           const TxfmParam *txfm_param) {
 // The calculation can be simplified if there are not many non-zero dct
@@ -1462,6 +2165,7 @@ static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
   else
     aom_idct16x16_256_add(input, dest, stride);
 }
+#endif
 
 #if CONFIG_MRC_TX
 static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
@@ -1475,11 +2179,23 @@ static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 #endif
 
   const int eob = txfm_param->eob;
+  int n_masked_vals = 0;
+  uint8_t *mask;
+  uint8_t mask_tmp[32 * 32];
   if (eob == 1) {
     aom_idct32x32_1_add_c(input, dest, stride);
   } else {
-    tran_low_t mask[32 * 32];
-    get_mrc_mask(txfm_param->dst, txfm_param->stride, mask, 32, 32, 32);
+    if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
+        (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
+      mask = txfm_param->mask;
+    } else {
+      n_masked_vals =
+          get_mrc_pred_mask(txfm_param->dst, txfm_param->stride, mask_tmp, 32,
+                            32, 32, txfm_param->is_inter);
+      if (!is_valid_mrc_mask(n_masked_vals, 32, 32))
+        assert(0 && "Invalid MRC mask");
+      mask = mask_tmp;
+    }
     if (eob <= quarter)
       // non-zero coeff only in upper-left 8x8
       aom_imrc32x32_34_add_c(input, dest, stride, mask);
@@ -1492,6 +2208,7 @@ static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
 }
 #endif  // CONFIG_MRC_TX
 
+#if !CONFIG_DAALA_DCT32
 static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                           const TxfmParam *txfm_param) {
 #if CONFIG_ADAPT_SCAN
@@ -1514,14 +2231,15 @@ static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
   else
     aom_idct32x32_1024_add(input, dest, stride);
 }
+#endif
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
 static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
                           const TxfmParam *txfm_param) {
   (void)txfm_param;
-  av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
+  av1_iht64x64_4096_add(input, dest, stride, txfm_param);
 }
-#endif  // CONFIG_TX64X64
+#endif  // CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
 
 #if CONFIG_CHROMA_2X2
 static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
@@ -1568,7 +2286,7 @@ static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT4
       // LGT only exists in C verson
       av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
       break;
@@ -1582,7 +2300,7 @@ static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
     case FLIPADST_FLIPADST:
     case ADST_FLIPADST:
     case FLIPADST_ADST:
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT4
       av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
       break;
 #else
@@ -1598,7 +2316,7 @@ static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
       // Use C version since DST only exists in C code
       av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
       break;
-    case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 4, 4, tx_type); break;
 #endif  // CONFIG_EXT_TX
     default: assert(0); break;
   }
@@ -1689,6 +2407,18 @@ static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
   av1_iht32x16_512_add(input, dest, stride, txfm_param);
 }
 
+#if CONFIG_TX64X64
+static void inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
+                               int stride, const TxfmParam *txfm_param) {
+  av1_iht32x64_2048_add(input, dest, stride, txfm_param);
+}
+
+static void inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
+                               int stride, const TxfmParam *txfm_param) {
+  av1_iht64x32_2048_add(input, dest, stride, txfm_param);
+}
+#endif  // CONFIG_TX64X64
+
 static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
                              const TxfmParam *txfm_param) {
   const TX_TYPE tx_type = txfm_param->tx_type;
@@ -1701,7 +2431,7 @@ static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT8
       av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
       break;
 #else
@@ -1714,7 +2444,7 @@ static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
     case FLIPADST_FLIPADST:
     case ADST_FLIPADST:
     case FLIPADST_ADST:
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT8
       av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
       break;
 #else
@@ -1730,7 +2460,7 @@ static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
       // Use C version since DST only exists in C code
       av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
       break;
-    case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 8, 8, tx_type); break;
 #endif  // CONFIG_EXT_TX
     default: assert(0); break;
   }
@@ -1740,11 +2470,19 @@ static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
+#if !CONFIG_DAALA_DCT16
     case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
+#else
+    case DCT_DCT:
+#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
+#if CONFIG_DAALA_DCT16
+      av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
+#else
       av1_iht16x16_256_add(input, dest, stride, txfm_param);
+#endif  // CONFIG_DAALA_DCT16
       break;
 #if CONFIG_EXT_TX
     case FLIPADST_DCT:
@@ -1758,9 +2496,13 @@ static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
     case H_ADST:
     case V_FLIPADST:
     case H_FLIPADST:
+#if CONFIG_DAALA_DCT16
+      av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
+#else
       av1_iht16x16_256_add(input, dest, stride, txfm_param);
+#endif  // CONFIG_DAALA_DCT16
       break;
-    case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 16, 16, tx_type); break;
 #endif  // CONFIG_EXT_TX
 #if CONFIG_MRC_TX
     case MRC_DCT: assert(0 && "Invalid tx type for tx size");
@@ -1773,7 +2515,13 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
+#if !CONFIG_DAALA_DCT32
     case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
+#else
+    case DCT_DCT:
+      av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
+      break;
+#endif
 #if CONFIG_EXT_TX
     case ADST_DCT:
     case DCT_ADST:
@@ -1791,7 +2539,7 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
     case H_FLIPADST:
       av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
       break;
-    case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 32, 32, tx_type); break;
 #endif  // CONFIG_EXT_TX
 #if CONFIG_MRC_TX
     case MRC_DCT: imrc32x32_add_c(input, dest, stride, txfm_param); break;
@@ -1804,8 +2552,13 @@ static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
 static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   const TX_TYPE tx_type = txfm_param->tx_type;
+  assert(tx_type == DCT_DCT);
   switch (tx_type) {
+#if !CONFIG_DAALA_DCT64
     case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
+#else
+    case DCT_DCT:
+#endif
 #if CONFIG_EXT_TX
     case ADST_DCT:
     case DCT_ADST:
@@ -1823,7 +2576,7 @@ static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
     case H_FLIPADST:
       av1_iht64x64_4096_add_c(input, dest, stride, txfm_param);
       break;
-    case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
+    case IDTX: inv_idtx_add_c(input, dest, stride, 64, 64, tx_type); break;
 #endif  // CONFIG_EXT_TX
 #if CONFIG_MRC_TX
     case MRC_DCT: assert(0 && "Invalid tx type for tx size");
@@ -1847,7 +2600,7 @@ static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
   int eob = txfm_param->eob;
   int bd = txfm_param->bd;
   int lossless = txfm_param->lossless;
-  TX_TYPE tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
   tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
   tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
@@ -1876,13 +2629,18 @@ static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
 }
 #endif
 
+static const int32_t *cast_to_int32(const tran_low_t *input) {
+  assert(sizeof(int32_t) == sizeof(tran_low_t));
+  return (const int32_t *)input;
+}
+
 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
                                  int stride, const TxfmParam *txfm_param) {
   int eob = txfm_param->eob;
   int bd = txfm_param->bd;
   int lossless = txfm_param->lossless;
-  const int32_t *src = (const int32_t *)input;
-  TX_TYPE tx_type = txfm_param->tx_type;
+  const int32_t *src = cast_to_int32(input);
+  const TX_TYPE tx_type = txfm_param->tx_type;
   if (lossless) {
     assert(tx_type == DCT_DCT);
     av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
@@ -1923,51 +2681,67 @@ void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
 
 void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
                                  int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                            txfm_param->tx_type, txfm_param->bd);
 }
 
 void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
                                  int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                            txfm_param->tx_type, txfm_param->bd);
 }
 
 static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
                                      int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                             txfm_param->tx_type, txfm_param->bd);
 }
 
 static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
                                      int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                             txfm_param->tx_type, txfm_param->bd);
 }
 
 static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
                                       int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                              txfm_param->tx_type, txfm_param->bd);
 }
 
 static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
                                       int stride, const TxfmParam *txfm_param) {
-  const int32_t *src = (const int32_t *)input;
+  const int32_t *src = cast_to_int32(input);
   av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                              txfm_param->tx_type, txfm_param->bd);
 }
 
+#if CONFIG_TX64X64
+static void highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
+                                      int stride, const TxfmParam *txfm_param) {
+  const int32_t *src = cast_to_int32(input);
+  av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
+                             txfm_param->tx_type, txfm_param->bd);
+}
+
+static void highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
+                                      int stride, const TxfmParam *txfm_param) {
+  const int32_t *src = cast_to_int32(input);
+  av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
+                             txfm_param->tx_type, txfm_param->bd);
+}
+#endif  // CONFIG_TX64X64
+
 static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
                                     int stride, const TxfmParam *txfm_param) {
   int bd = txfm_param->bd;
-  TX_TYPE tx_type = txfm_param->tx_type;
-  const int32_t *src = (const int32_t *)input;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int32_t *src = cast_to_int32(input);
   switch (tx_type) {
     case DCT_DCT:
     case ADST_DCT:
@@ -2004,8 +2778,8 @@ static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
 static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
                                       int stride, const TxfmParam *txfm_param) {
   int bd = txfm_param->bd;
-  TX_TYPE tx_type = txfm_param->tx_type;
-  const int32_t *src = (const int32_t *)input;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int32_t *src = cast_to_int32(input);
   switch (tx_type) {
     case DCT_DCT:
     case ADST_DCT:
@@ -2042,37 +2816,37 @@ static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
 static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
                                       int stride, const TxfmParam *txfm_param) {
   int bd = txfm_param->bd;
-  TX_TYPE tx_type = txfm_param->tx_type;
-  const int32_t *src = (const int32_t *)input;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int32_t *src = cast_to_int32(input);
   switch (tx_type) {
     case DCT_DCT:
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
       av1_inv_txfm2d_add_32x32(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
                                bd);
       break;
+
+    // The optimised version only supports DCT_DCT, so force use of
+    // the C version for all other transform types.
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
 #if CONFIG_EXT_TX
     case FLIPADST_DCT:
     case DCT_FLIPADST:
     case FLIPADST_FLIPADST:
     case ADST_FLIPADST:
     case FLIPADST_ADST:
-      av1_inv_txfm2d_add_32x32(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
-                               bd);
-      break;
-    // use the c version for anything including identity for now
+    case IDTX:
     case V_DCT:
     case H_DCT:
     case V_ADST:
     case H_ADST:
     case V_FLIPADST:
     case H_FLIPADST:
-    case IDTX:
+#endif  // CONFIG_EXT_TX
       av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
                                  tx_type, bd);
       break;
-#endif  // CONFIG_EXT_TX
+
     default: assert(0);
   }
 }
@@ -2081,8 +2855,8 @@ static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
 static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
                                       int stride, const TxfmParam *txfm_param) {
   int bd = txfm_param->bd;
-  TX_TYPE tx_type = txfm_param->tx_type;
-  const int32_t *src = (const int32_t *)input;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int32_t *src = cast_to_int32(input);
   switch (tx_type) {
     case DCT_DCT:
       av1_inv_txfm2d_add_64x64(src, CONVERT_TO_SHORTPTR(dest), stride, DCT_DCT,
@@ -2113,7 +2887,7 @@ static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
                                  DCT_DCT, bd);
       break;
     case IDTX:
-      highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
+      highbd_inv_idtx_add_c(input, dest, stride, 64, 64, tx_type, bd);
       break;
 #endif  // CONFIG_EXT_TX
     default: assert(0); break;
@@ -2124,6 +2898,13 @@ static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
 void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
                       TxfmParam *txfm_param) {
   const TX_SIZE tx_size = txfm_param->tx_size;
+#if CONFIG_LGT_FROM_PRED
+  if (txfm_param->use_lgt) {
+    assert(is_lgt_allowed(txfm_param->mode, tx_size));
+    ilgt2d_from_pred_add(input, dest, stride, txfm_param);
+    return;
+  }
+#endif  // CONFIG_LGT_FROM_PRED
   switch (tx_size) {
 #if CONFIG_TX64X64
     case TX_64X64: inv_txfm_add_64x64(input, dest, stride, txfm_param); break;
@@ -2137,6 +2918,10 @@ void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
     case TX_16X8: inv_txfm_add_16x8(input, dest, stride, txfm_param); break;
     case TX_16X32: inv_txfm_add_16x32(input, dest, stride, txfm_param); break;
     case TX_32X16: inv_txfm_add_32x16(input, dest, stride, txfm_param); break;
+#if CONFIG_TX64X64
+    case TX_64X32: inv_txfm_add_64x32(input, dest, stride, txfm_param); break;
+    case TX_32X64: inv_txfm_add_32x64(input, dest, stride, txfm_param); break;
+#endif  // CONFIG_TX64X64
     case TX_4X4:
       // this is like av1_short_idct4x4 but has a special case around eob<=1
       // which is significant (not just an optimization) for the lossless
@@ -2162,32 +2947,35 @@ static void init_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
   txfm_param->tx_size = tx_size;
   txfm_param->eob = eob;
   txfm_param->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
-#if CONFIG_HIGHBITDEPTH
   txfm_param->bd = xd->bd;
-#endif
 #if CONFIG_LGT
   txfm_param->is_inter = is_inter_block(&xd->mi[0]->mbmi);
 #endif
+#if CONFIG_LGT_FROM_PRED
+  txfm_param->use_lgt = xd->mi[0]->mbmi.use_lgt;
+#endif
 #if CONFIG_ADAPT_SCAN
   txfm_param->eob_threshold =
       (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
 #endif
 }
 
+#if !CONFIG_TXMG
 typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
                             TxfmParam *txfm_param);
 
 static InvTxfmFunc inv_txfm_func[2] = { av1_inv_txfm_add,
                                         av1_highbd_inv_txfm_add };
+#endif
 
-// TODO(kslu) Change input arguments to TxfmParam, which contains mode,
-// tx_type, tx_size, dst, stride, eob. Thus, the additional argument when LGT
-// is on will no longer be needed.
 void av1_inverse_transform_block(const MACROBLOCKD *xd,
                                  const tran_low_t *dqcoeff,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                  PREDICTION_MODE mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                 uint8_t *mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                  TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
                                  int stride, int eob) {
   if (!eob) return;
@@ -2195,38 +2983,67 @@ void av1_inverse_transform_block(const MACROBLOCKD *xd,
   const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
   const int txb_width = block_size_wide[tx_bsize];
   const int txb_height = block_size_high[tx_bsize];
-  int r, c;
-#if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (r = 0; r < txb_height; r++)
-      for (c = 0; c < txb_width; c++)
+    for (int r = 0; r < txb_height; r++)
+      for (int c = 0; c < txb_width; c++)
         CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
   } else {
-#endif  // CONFIG_HIGHBITDEPTH
-    for (r = 0; r < txb_height; r++)
-      for (c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
-#if CONFIG_HIGHBITDEPTH
+    for (int r = 0; r < txb_height; r++)
+      for (int c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
   }
-#endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_PVQ
   TxfmParam txfm_param;
   init_txfm_param(xd, tx_size, tx_type, eob, &txfm_param);
 #if CONFIG_LGT || CONFIG_MRC_TX
+  txfm_param.is_inter = is_inter_block(&xd->mi[0]->mbmi);
+#endif  // CONFIG_LGT || CONFIG_MRC_TX
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  txfm_param.mask = mrc_mask;
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+#if CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
   txfm_param.dst = dst;
   txfm_param.stride = stride;
-#endif  // CONFIG_LGT || CONFIG_MRC_TX
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
   txfm_param.mode = mode;
-#endif
+#endif  // CONFIG_LGT_FROM_PRED
+#endif  // CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
 
   const int is_hbd = get_bitdepth_data_path_index(xd);
+#if CONFIG_TXMG
+  if (is_hbd) {
+    av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+  } else {
+    DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
+    int tmp_stride = MAX_TX_SIZE;
+    int w = tx_size_wide[tx_size];
+    int h = tx_size_high[tx_size];
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        tmp[r * tmp_stride + c] = dst[r * stride + c];
+      }
+    }
+
+    av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+                            &txfm_param);
+
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
+      }
+    }
+  }
+#else  // CONFIG_TXMG
   inv_txfm_func[is_hbd](dqcoeff, dst, stride, &txfm_param);
+#endif  // CONFIG_TXMG
 }
 
 void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
                                         int blk_row, int blk_col, int eob) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
   const PLANE_TYPE plane_type = get_plane_type(plane);
   const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
   const TX_TYPE tx_type =
@@ -2234,14 +3051,14 @@ void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
   const int dst_stride = pd->dst.stride;
   uint8_t *dst =
       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-#if CONFIG_LGT
-  PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
-  av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, dst,
-                              dst_stride, eob);
-#else
-  av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
-                              eob);
-#endif  // CONFIG_LGT
+  av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT_FROM_PRED
+                              xd->mi[0]->mbmi.mode,
+#endif  // CONFIG_LGT_FROM_PRED
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              tx_type, tx_size, dst, dst_stride, eob);
 }
 
 void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -2280,6 +3097,14 @@ void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
     case TX_32X16:
       highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
       break;
+#if CONFIG_TX64X64
+    case TX_64X32:
+      highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
+      break;
+    case TX_32X64:
+      highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
+      break;
+#endif  // CONFIG_TX64X64
     case TX_4X4:
       // this is like av1_short_idct4x4 but has a special case around eob<=1
       // which is significant (not just an optimization) for the lossless
@@ -2294,193 +3119,3 @@ void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
     default: assert(0 && "Invalid transform size"); break;
   }
 }
-
-#if CONFIG_DPCM_INTRA
-void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
-                               TX_TYPE_1D tx_type, uint8_t *dest) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d IHT[] = { aom_idct4_c, aom_iadst4_c, aom_iadst4_c,
-                                      iidtx4_c };
-  const transform_1d inv_tx = IHT[tx_type];
-  tran_low_t out[4];
-  inv_tx(input, out);
-  for (int i = 0; i < 4; ++i) {
-    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
-    dest[i * stride] =
-        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
-  }
-}
-
-void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
-                               TX_TYPE_1D tx_type, uint8_t *dest) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d IHT[] = { aom_idct8_c, aom_iadst8_c, aom_iadst8_c,
-                                      iidtx8_c };
-  const transform_1d inv_tx = IHT[tx_type];
-  tran_low_t out[8];
-  inv_tx(input, out);
-  for (int i = 0; i < 8; ++i) {
-    dest[i * stride] =
-        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
-  }
-}
-
-void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
-                                TX_TYPE_1D tx_type, uint8_t *dest) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d IHT[] = { aom_idct16_c, aom_iadst16_c,
-                                      aom_iadst16_c, iidtx16_c };
-  const transform_1d inv_tx = IHT[tx_type];
-  tran_low_t out[16];
-  inv_tx(input, out);
-  for (int i = 0; i < 16; ++i) {
-    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
-    dest[i * stride] =
-        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 5));
-  }
-}
-
-void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
-                                TX_TYPE_1D tx_type, uint8_t *dest) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d IHT[] = { aom_idct32_c, ihalfright32_c,
-                                      ihalfright32_c, iidtx32_c };
-  const transform_1d inv_tx = IHT[tx_type];
-  tran_low_t out[32];
-  inv_tx(input, out);
-  for (int i = 0; i < 32; ++i) {
-    dest[i * stride] =
-        clip_pixel_add(dest[i * stride], ROUND_POWER_OF_TWO(out[i], 4));
-  }
-}
-
-dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length) {
-  switch (tx_length) {
-    case 4: return av1_dpcm_inv_txfm_add_4_c;
-    case 8: return av1_dpcm_inv_txfm_add_8_c;
-    case 16: return av1_dpcm_inv_txfm_add_16_c;
-    case 32:
-      return av1_dpcm_inv_txfm_add_32_c;
-    // TODO(huisu): add support for TX_64X64.
-    default: assert(0); return NULL;
-  }
-}
-
-#if CONFIG_HIGHBITDEPTH
-// TODO(sarahparker) I am adding a quick workaround for these functions
-// to remove the old hbd transforms. This will be cleaned up in a followup.
-void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
-                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                   int dir) {
-  assert(tx_type < TX_TYPES_1D);
-  static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
-                                  av1_iidentity4_c };
-  // In order { horizontal, vertical }
-  static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
-    { &inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_4 },
-    { &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_4 },
-    { &inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_4 },
-    { &inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_4 }
-  };
-
-  const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
-  const TxfmFunc inv_tx = IHT[tx_type];
-
-  tran_low_t out[4];
-  inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
-  for (int i = 0; i < 4; ++i) {
-    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
-    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
-                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
-  }
-}
-
-void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
-                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                   int dir) {
-  assert(tx_type < TX_TYPES_1D);
-  static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
-                                  av1_iidentity4_c };
-  // In order { horizontal, vertical }
-  static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
-    { &inv_txfm_1d_row_cfg_dct_8, &inv_txfm_1d_col_cfg_dct_8 },
-    { &inv_txfm_1d_row_cfg_adst_8, &inv_txfm_1d_col_cfg_adst_8 },
-    { &inv_txfm_1d_row_cfg_adst_8, &inv_txfm_1d_col_cfg_adst_8 },
-    { &inv_txfm_1d_cfg_identity_8, &inv_txfm_1d_cfg_identity_8 }
-  };
-
-  const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
-  const TxfmFunc inv_tx = IHT[tx_type];
-
-  tran_low_t out[8];
-  inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
-  for (int i = 0; i < 8; ++i) {
-    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
-                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
-  }
-}
-
-void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
-                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                    int dir) {
-  assert(tx_type < TX_TYPES_1D);
-  static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
-                                  av1_iidentity4_c };
-  // In order { horizontal, vertical }
-  static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
-    { &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_16 },
-    { &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_16 },
-    { &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_16 },
-    { &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_16 }
-  };
-
-  const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
-  const TxfmFunc inv_tx = IHT[tx_type];
-
-  tran_low_t out[16];
-  inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
-  for (int i = 0; i < 16; ++i) {
-    out[i] = (tran_low_t)dct_const_round_shift(out[i] * Sqrt2);
-    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
-                                             ROUND_POWER_OF_TWO(out[i], 5), bd);
-  }
-}
-
-void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
-                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                    int dir) {
-  assert(tx_type < TX_TYPES_1D);
-  static const TxfmFunc IHT[] = { av1_idct4_new, av1_iadst4_new, av1_iadst4_new,
-                                  av1_iidentity4_c };
-  // In order { horizontal, vertical }
-  static const TXFM_1D_CFG *inv_txfm_cfg_ls[TX_TYPES_1D][2] = {
-    { &inv_txfm_1d_row_cfg_dct_32, &inv_txfm_1d_col_cfg_dct_32 },
-    { &inv_txfm_1d_row_cfg_adst_32, &inv_txfm_1d_col_cfg_adst_32 },
-    { &inv_txfm_1d_row_cfg_adst_32, &inv_txfm_1d_col_cfg_adst_32 },
-    { &inv_txfm_1d_cfg_identity_32, &inv_txfm_1d_cfg_identity_32 }
-  };
-
-  const TXFM_1D_CFG *inv_txfm_cfg = inv_txfm_cfg_ls[tx_type][dir];
-  const TxfmFunc inv_tx = IHT[tx_type];
-
-  tran_low_t out[32];
-  inv_tx(input, out, inv_txfm_cfg->cos_bit, inv_txfm_cfg->stage_range);
-  for (int i = 0; i < 32; ++i) {
-    dest[i * stride] = highbd_clip_pixel_add(dest[i * stride],
-                                             ROUND_POWER_OF_TWO(out[i], 4), bd);
-  }
-}
-
-hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length) {
-  switch (tx_length) {
-    case 4: return av1_hbd_dpcm_inv_txfm_add_4_c;
-    case 8: return av1_hbd_dpcm_inv_txfm_add_8_c;
-    case 16: return av1_hbd_dpcm_inv_txfm_add_16_c;
-    case 32:
-      return av1_hbd_dpcm_inv_txfm_add_32_c;
-    // TODO(huisu): add support for TX_64X64.
-    default: assert(0); return NULL;
-  }
-}
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_DPCM_INTRA
diff --git a/third_party/aom/av1/common/idct.h b/third_party/aom/av1/common/idct.h
index c2ca69b54..e4e4ad671 100644
--- a/third_party/aom/av1/common/idct.h
+++ b/third_party/aom/av1/common/idct.h
@@ -26,13 +26,28 @@
 extern "C" {
 #endif
 
-// TODO(kslu) move the common stuff in idct.h to av1_txfm.h or txfm_common.h
 typedef void (*transform_1d)(const tran_low_t *, tran_low_t *);
 
 typedef struct {
   transform_1d cols, rows;  // vertical and horizontal
 } transform_2d;
 
+#if CONFIG_LGT
+int get_lgt4(const TxfmParam *txfm_param, int is_col,
+             const tran_high_t **lgtmtx);
+int get_lgt8(const TxfmParam *txfm_param, int is_col,
+             const tran_high_t **lgtmtx);
+#endif  // CONFIG_LGT
+
+#if CONFIG_LGT_FROM_PRED
+void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
+                        const tran_high_t **lgtmtx, int ntx);
+void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
+                        const tran_high_t **lgtmtx, int ntx);
+void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
+                           const tran_high_t **lgtmtx, int ntx);
+#endif  // CONFIG_LGT_FROM_PRED
+
 #if CONFIG_HIGHBITDEPTH
 typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
 
@@ -53,9 +68,12 @@ void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
                       TxfmParam *txfm_param);
 void av1_inverse_transform_block(const MACROBLOCKD *xd,
                                  const tran_low_t *dqcoeff,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                  PREDICTION_MODE mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                 uint8_t *mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                  TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
                                  int stride, int eob);
 void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
@@ -72,37 +90,6 @@ void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
 void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
                              TxfmParam *txfm_param);
 
-#if CONFIG_DPCM_INTRA
-void av1_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
-                               TX_TYPE_1D tx_type, uint8_t *dest);
-void av1_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
-                               TX_TYPE_1D tx_type, uint8_t *dest);
-void av1_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
-                                TX_TYPE_1D tx_type, uint8_t *dest);
-void av1_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
-                                TX_TYPE_1D tx_type, uint8_t *dest);
-typedef void (*dpcm_inv_txfm_add_func)(const tran_low_t *input, int stride,
-                                       TX_TYPE_1D tx_type, uint8_t *dest);
-dpcm_inv_txfm_add_func av1_get_dpcm_inv_txfm_add_func(int tx_length);
-#if CONFIG_HIGHBITDEPTH
-void av1_hbd_dpcm_inv_txfm_add_4_c(const tran_low_t *input, int stride,
-                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                   int dir);
-void av1_hbd_dpcm_inv_txfm_add_8_c(const tran_low_t *input, int stride,
-                                   TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                   int dir);
-void av1_hbd_dpcm_inv_txfm_add_16_c(const tran_low_t *input, int stride,
-                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                    int dir);
-void av1_hbd_dpcm_inv_txfm_add_32_c(const tran_low_t *input, int stride,
-                                    TX_TYPE_1D tx_type, int bd, uint16_t *dest,
-                                    int dir);
-typedef void (*hbd_dpcm_inv_txfm_add_func)(const tran_low_t *input, int stride,
-                                           TX_TYPE_1D tx_type, int bd,
-                                           uint16_t *dest, int dir);
-hbd_dpcm_inv_txfm_add_func av1_get_hbd_dpcm_inv_txfm_add_func(int tx_length);
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_DPCM_INTRA
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/mips/dspr2/av1_itrans16_dspr2.c b/third_party/aom/av1/common/mips/dspr2/av1_itrans16_dspr2.c
deleted file mode 100644
index 1b3343155..000000000
--- a/third_party/aom/av1/common/mips/dspr2/av1_itrans16_dspr2.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "av1/common/common.h"
-#include "av1/common/blockd.h"
-#include "aom_dsp/mips/inv_txfm_dspr2.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-void av1_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch,
-                                TxfmParam *txfm_param) {
-  int i, j;
-  DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
-  int16_t *outptr = out;
-  int16_t temp_out[16];
-  uint32_t pos = 45;
-  int tx_type = txfm_param->tx_type;
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
-
-  switch (tx_type) {
-    case DCT_DCT:  // DCT in both horizontal and vertical
-      idct16_rows_dspr2(input, outptr, 16);
-      idct16_cols_add_blk_dspr2(out, dest, pitch);
-      break;
-    case ADST_DCT:  // ADST in vertical, DCT in horizontal
-      idct16_rows_dspr2(input, outptr, 16);
-
-      outptr = out;
-
-      for (i = 0; i < 16; ++i) {
-        iadst16_dspr2(outptr, temp_out);
-
-        for (j = 0; j < 16; ++j)
-          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
-                                           dest[j * pitch + i]);
-        outptr += 16;
-      }
-      break;
-    case DCT_ADST:  // DCT in vertical, ADST in horizontal
-    {
-      int16_t temp_in[16 * 16];
-
-      for (i = 0; i < 16; ++i) {
-        /* prefetch row */
-        prefetch_load((const uint8_t *)(input + 16));
-
-        iadst16_dspr2(input, outptr);
-        input += 16;
-        outptr += 16;
-      }
-
-      for (i = 0; i < 16; ++i)
-        for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j];
-
-      idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
-    } break;
-    case ADST_ADST:  // ADST in both directions
-    {
-      int16_t temp_in[16];
-
-      for (i = 0; i < 16; ++i) {
-        /* prefetch row */
-        prefetch_load((const uint8_t *)(input + 16));
-
-        iadst16_dspr2(input, outptr);
-        input += 16;
-        outptr += 16;
-      }
-
-      for (i = 0; i < 16; ++i) {
-        for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
-        iadst16_dspr2(temp_in, temp_out);
-        for (j = 0; j < 16; ++j)
-          dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
-                                           dest[j * pitch + i]);
-      }
-    } break;
-    default: printf("av1_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break;
-  }
-}
-#endif  // #if HAVE_DSPR2
diff --git a/third_party/aom/av1/common/mips/dspr2/av1_itrans4_dspr2.c b/third_party/aom/av1/common/mips/dspr2/av1_itrans4_dspr2.c
deleted file mode 100644
index d9da3a1e7..000000000
--- a/third_party/aom/av1/common/mips/dspr2/av1_itrans4_dspr2.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "av1/common/common.h"
-#include "av1/common/blockd.h"
-#include "aom_dsp/mips/inv_txfm_dspr2.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-void av1_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
-                             int dest_stride, TxfmParam *txfm_param) {
-  int i, j;
-  DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
-  int16_t *outptr = out;
-  int16_t temp_in[4 * 4], temp_out[4];
-  uint32_t pos = 45;
-  int tx_type = txfm_param->tx_type;
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                       :
-                       : [pos] "r"(pos));
-
-  switch (tx_type) {
-    case DCT_DCT:  // DCT in both horizontal and vertical
-      aom_idct4_rows_dspr2(input, outptr);
-      aom_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
-      break;
-    case ADST_DCT:  // ADST in vertical, DCT in horizontal
-      aom_idct4_rows_dspr2(input, outptr);
-
-      outptr = out;
-
-      for (i = 0; i < 4; ++i) {
-        iadst4_dspr2(outptr, temp_out);
-
-        for (j = 0; j < 4; ++j)
-          dest[j * dest_stride + i] = clip_pixel(
-              ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
-
-        outptr += 4;
-      }
-      break;
-    case DCT_ADST:  // DCT in vertical, ADST in horizontal
-      for (i = 0; i < 4; ++i) {
-        iadst4_dspr2(input, outptr);
-        input += 4;
-        outptr += 4;
-      }
-
-      for (i = 0; i < 4; ++i) {
-        for (j = 0; j < 4; ++j) {
-          temp_in[i * 4 + j] = out[j * 4 + i];
-        }
-      }
-      aom_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
-      break;
-    case ADST_ADST:  // ADST in both directions
-      for (i = 0; i < 4; ++i) {
-        iadst4_dspr2(input, outptr);
-        input += 4;
-        outptr += 4;
-      }
-
-      for (i = 0; i < 4; ++i) {
-        for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
-        iadst4_dspr2(temp_in, temp_out);
-
-        for (j = 0; j < 4; ++j)
-          dest[j * dest_stride + i] = clip_pixel(
-              ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
-      }
-      break;
-    default: printf("av1_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
-  }
-}
-#endif  // #if HAVE_DSPR2
diff --git a/third_party/aom/av1/common/mips/dspr2/av1_itrans8_dspr2.c b/third_party/aom/av1/common/mips/dspr2/av1_itrans8_dspr2.c
deleted file mode 100644
index f62d5faef..000000000
--- a/third_party/aom/av1/common/mips/dspr2/av1_itrans8_dspr2.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "./aom_config.h"
-#include "./av1_rtcd.h"
-#include "av1/common/common.h"
-#include "av1/common/blockd.h"
-#include "aom_dsp/mips/inv_txfm_dspr2.h"
-#include "aom_dsp/txfm_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-void av1_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
-                             int dest_stride, TxfmParam *txfm_param) {
-  int i, j;
-  DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
-  int16_t *outptr = out;
-  int16_t temp_in[8 * 8], temp_out[8];
-  uint32_t pos = 45;
-  int tx_type = txfm_param->tx_type;
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp    %[pos],    1    \n\t" : : [pos] "r"(pos));
-
-  switch (tx_type) {
-    case DCT_DCT:  // DCT in both horizontal and vertical
-      idct8_rows_dspr2(input, outptr, 8);
-      idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
-      break;
-    case ADST_DCT:  // ADST in vertical, DCT in horizontal
-      idct8_rows_dspr2(input, outptr, 8);
-
-      for (i = 0; i < 8; ++i) {
-        iadst8_dspr2(&out[i * 8], temp_out);
-
-        for (j = 0; j < 8; ++j)
-          dest[j * dest_stride + i] = clip_pixel(
-              ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
-      }
-      break;
-    case DCT_ADST:  // DCT in vertical, ADST in horizontal
-      for (i = 0; i < 8; ++i) {
-        iadst8_dspr2(input, outptr);
-        input += 8;
-        outptr += 8;
-      }
-
-      for (i = 0; i < 8; ++i) {
-        for (j = 0; j < 8; ++j) {
-          temp_in[i * 8 + j] = out[j * 8 + i];
-        }
-      }
-      idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
-      break;
-    case ADST_ADST:  // ADST in both directions
-      for (i = 0; i < 8; ++i) {
-        iadst8_dspr2(input, outptr);
-        input += 8;
-        outptr += 8;
-      }
-
-      for (i = 0; i < 8; ++i) {
-        for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
-
-        iadst8_dspr2(temp_in, temp_out);
-
-        for (j = 0; j < 8; ++j)
-          dest[j * dest_stride + i] = clip_pixel(
-              ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
-      }
-      break;
-    default: printf("av1_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break;
-  }
-}
-#endif  // #if HAVE_DSPR2
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c
index 522cce0f4..ff461b914 100644
--- a/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c
+++ b/third_party/aom/av1/common/mips/msa/av1_idct16x16_msa.c
@@ -19,7 +19,7 @@ void av1_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
   int32_t i;
   DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
   int16_t *out_ptr = &out[0];
-  int32_t tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   switch (tx_type) {
     case DCT_DCT:
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c
index 7a68dbbe6..37f7fd77b 100644
--- a/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c
+++ b/third_party/aom/av1/common/mips/msa/av1_idct4x4_msa.c
@@ -17,7 +17,7 @@
 void av1_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
                            int32_t dst_stride, TxfmParam *txfm_param) {
   v8i16 in0, in1, in2, in3;
-  int32_t tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   /* load vector elements of 4x4 block */
   LD4x4_SH(input, in0, in1, in2, in3);
diff --git a/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c b/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c
index c6ef61e1a..7410f7b98 100644
--- a/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c
+++ b/third_party/aom/av1/common/mips/msa/av1_idct8x8_msa.c
@@ -17,7 +17,7 @@
 void av1_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
                            int32_t dst_stride, TxfmParam *txfm_param) {
   v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
-  int32_t tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   /* load vector elements of 8x8 block */
   LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
diff --git a/third_party/aom/av1/common/mv.h b/third_party/aom/av1/common/mv.h
index dabfc0ead..65f0f7eda 100644
--- a/third_party/aom/av1/common/mv.h
+++ b/third_party/aom/av1/common/mv.h
@@ -20,6 +20,8 @@
 extern "C" {
 #endif
 
+#define INVALID_MV 0x80008000
+
 typedef struct mv {
   int16_t row;
   int16_t col;
@@ -88,10 +90,12 @@ typedef enum {
 // GLOBAL_TRANS_TYPES 7 - up to full homography
 #define GLOBAL_TRANS_TYPES 4
 
+#if GLOBAL_TRANS_TYPES > 4
 // First bit indicates whether using identity or not
 // GLOBAL_TYPE_BITS=ceiling(log2(GLOBAL_TRANS_TYPES-1)) is the
 // number of bits needed to cover the remaining possibilities
 #define GLOBAL_TYPE_BITS (get_msb(2 * GLOBAL_TRANS_TYPES - 3))
+#endif  // GLOBAL_TRANS_TYPES > 4
 
 typedef struct {
 #if CONFIG_GLOBAL_MOTION
@@ -116,14 +120,14 @@ typedef struct {
   int16_t alpha, beta, gamma, delta;
 } WarpedMotionParams;
 
-static INLINE void set_default_warp_params(WarpedMotionParams *wm) {
-  static const int32_t default_wm_mat[8] = {
-    0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0
-  };
-  memset(wm, 0, sizeof(*wm));
-  memcpy(wm->wmmat, default_wm_mat, sizeof(wm->wmmat));
-  wm->wmtype = IDENTITY;
-}
+/* clang-format off */
+static const WarpedMotionParams default_warp_params = {
+  IDENTITY,
+  { 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0,
+    0 },
+  0, 0, 0, 0
+};
+/* clang-format on */
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 
 #if CONFIG_GLOBAL_MOTION
@@ -202,21 +206,70 @@ static INLINE int convert_to_trans_prec(int allow_hp, int coor) {
   else
     return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 2) * 2;
 }
+#if CONFIG_AMVR
+static INLINE void integer_mv_precision(MV *mv) {
+  int mod = (mv->row % 8);
+  if (mod != 0) {
+    mv->row -= mod;
+    if (abs(mod) > 4) {
+      if (mod > 0) {
+        mv->row += 8;
+      } else {
+        mv->row -= 8;
+      }
+    }
+  }
 
-// Convert a global motion translation vector (which may have more bits than a
-// regular motion vector) into a motion vector
+  mod = (mv->col % 8);
+  if (mod != 0) {
+    mv->col -= mod;
+    if (abs(mod) > 4) {
+      if (mod > 0) {
+        mv->col += 8;
+      } else {
+        mv->col -= 8;
+      }
+    }
+  }
+}
+#endif
+// Convert a global motion vector into a motion vector at the centre of the
+// given block.
+//
+// The resulting motion vector will have three fractional bits of precision. If
+// allow_hp is zero, the bottom bit will always be zero. If CONFIG_AMVR and
+// is_integer is true, the bottom three bits will be zero (so the motion vector
+// represents an integer)
 static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
                                           int allow_hp, BLOCK_SIZE bsize,
-                                          int mi_col, int mi_row,
-                                          int block_idx) {
+                                          int mi_col, int mi_row, int block_idx
+#if CONFIG_AMVR
+                                          ,
+                                          int is_integer
+#endif
+                                          ) {
   const int unify_bsize = CONFIG_CB4X4;
   int_mv res;
   const int32_t *mat = gm->wmmat;
   int x, y, tx, ty;
 
   if (gm->wmtype == TRANSLATION) {
+    // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16)
+    // bits of fractional precision. The offset for a translation is stored in
+    // entries 0 and 1. For translations, all but the top three (two if
+    // cm->allow_high_precision_mv is false) fractional bits are always zero.
+    //
+    // After the right shifts, there are 3 fractional bits of precision. If
+    // allow_hp is false, the bottom bit is always zero (so we don't need a
+    // call to convert_to_trans_prec here)
     res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
     res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
+    assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
+#if CONFIG_AMVR
+    if (is_integer) {
+      integer_mv_precision(&res.as_mv);
+    }
+#endif
     return res;
   }
 
@@ -256,6 +309,12 @@ static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
 
   res.as_mv.row = ty;
   res.as_mv.col = tx;
+
+#if CONFIG_AMVR
+  if (is_integer) {
+    integer_mv_precision(&res.as_mv);
+  }
+#endif
   return res;
 }
 
diff --git a/third_party/aom/av1/common/mvref_common.c b/third_party/aom/av1/common/mvref_common.c
index fdbcebcd1..891396e9b 100644
--- a/third_party/aom/av1/common/mvref_common.c
+++ b/third_party/aom/av1/common/mvref_common.c
@@ -14,10 +14,56 @@
 #include "av1/common/warped_motion.h"
 #endif  // CONFIG_WARPED_MOTION
 
+#if CONFIG_GLOBAL_MOTION
+#define USE_CUR_GM_REFMV 1
+#endif  // CONFIG_GLOBAL_MOTION
+
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MODE_INFO *mi, int mi_row,
+                        int mi_col, int x_mis, int y_mis) {
+#if CONFIG_TMV
+  const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
+  MV_REF *frame_mvs = cm->cur_frame->mvs +
+                      ((mi_row & 0xfffe) >> 1) * frame_mvs_stride +
+                      ((mi_col & 0xfffe) >> 1);
+  x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
+  y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
+#else
+  const int frame_mvs_stride = cm->mi_cols;
+  MV_REF *frame_mvs = cm->cur_frame->mvs +
+                      (mi_row & 0xfffe) * frame_mvs_stride + (mi_col & 0xfffe);
+  x_mis = AOMMAX(x_mis, 2);
+  y_mis = AOMMAX(y_mis, 2);
+#endif  // CONFIG_TMV
+  int w, h;
+
+  for (h = 0; h < y_mis; h++) {
+    MV_REF *const frame_mv = frame_mvs + h * frame_mvs_stride;
+    for (w = 0; w < x_mis; w++) {
+      MV_REF *const mv = frame_mv + w;
+      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+      // (TODO:yunqing) The following 2 lines won't be used and can be removed.
+      mv->pred_mv[0].as_int = mi->mbmi.pred_mv[0].as_int;
+      mv->pred_mv[1].as_int = mi->mbmi.pred_mv[1].as_int;
+    }
+  }
+}
+
 static uint8_t add_ref_mv_candidate(
     const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate,
     const MV_REFERENCE_FRAME rf[2], uint8_t *refmv_count,
-    CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block, int col) {
+    CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+    int_mv *gm_mv_candidates, const WarpedMotionParams *gm_params,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+    int col, int weight
+#if CONFIG_AMVR
+    ,
+    int is_integer
+#endif
+    ) {
   int index = 0, ref;
   int newmv_count = 0;
 #if CONFIG_CB4X4
@@ -25,25 +71,36 @@ static uint8_t add_ref_mv_candidate(
 #else
   const int unify_bsize = 0;
 #endif
+  assert(weight % 2 == 0);
 
   if (rf[1] == NONE_FRAME) {
     // single reference frame
     for (ref = 0; ref < 2; ++ref) {
       if (candidate->ref_frame[ref] == rf[0]) {
-        int_mv this_refmv = get_sub_block_mv(candidate_mi, ref, col, block);
+        int_mv this_refmv;
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        if (is_global_mv_block(candidate_mi, block, gm_params[rf[0]].wmtype))
+          this_refmv = gm_mv_candidates[0];
+        else
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+          this_refmv = get_sub_block_mv(candidate_mi, ref, col, block);
+#if CONFIG_AMVR
+        lower_mv_precision(&this_refmv.as_mv, use_hp, is_integer);
+#else
         lower_mv_precision(&this_refmv.as_mv, use_hp);
+#endif  // CONFIG_AMVR
 
         for (index = 0; index < *refmv_count; ++index)
           if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
 
-        if (index < *refmv_count) ref_mv_stack[index].weight += 2 * len;
+        if (index < *refmv_count) ref_mv_stack[index].weight += weight * len;
 
         // Add a new item to the list.
         if (index == *refmv_count) {
           ref_mv_stack[index].this_mv = this_refmv;
           ref_mv_stack[index].pred_diff[0] = av1_get_pred_diff_ctx(
               get_sub_block_pred_mv(candidate_mi, ref, col, block), this_refmv);
-          ref_mv_stack[index].weight = 2 * len;
+          ref_mv_stack[index].weight = weight * len;
           ++(*refmv_count);
 
           if (candidate->mode == NEWMV) ++newmv_count;
@@ -53,8 +110,11 @@ static uint8_t add_ref_mv_candidate(
             !unify_bsize) {
           int alt_block = 3 - block;
           this_refmv = get_sub_block_mv(candidate_mi, ref, col, alt_block);
+#if CONFIG_AMVR
+          lower_mv_precision(&this_refmv.as_mv, use_hp, is_integer);
+#else
           lower_mv_precision(&this_refmv.as_mv, use_hp);
-
+#endif
           for (index = 0; index < *refmv_count; ++index)
             if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
 
@@ -80,8 +140,17 @@ static uint8_t add_ref_mv_candidate(
       int_mv this_refmv[2];
 
       for (ref = 0; ref < 2; ++ref) {
-        this_refmv[ref] = get_sub_block_mv(candidate_mi, ref, col, block);
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        if (is_global_mv_block(candidate_mi, block, gm_params[rf[ref]].wmtype))
+          this_refmv[ref] = gm_mv_candidates[ref];
+        else
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+          this_refmv[ref] = get_sub_block_mv(candidate_mi, ref, col, block);
+#if CONFIG_AMVR
+        lower_mv_precision(&this_refmv[ref].as_mv, use_hp, is_integer);
+#else
         lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
+#endif
       }
 
       for (index = 0; index < *refmv_count; ++index)
@@ -89,7 +158,7 @@ static uint8_t add_ref_mv_candidate(
             (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
           break;
 
-      if (index < *refmv_count) ref_mv_stack[index].weight += 2 * len;
+      if (index < *refmv_count) ref_mv_stack[index].weight += weight * len;
 
       // Add a new item to the list.
       if (index == *refmv_count) {
@@ -99,15 +168,10 @@ static uint8_t add_ref_mv_candidate(
             get_sub_block_pred_mv(candidate_mi, 0, col, block), this_refmv[0]);
         ref_mv_stack[index].pred_diff[1] = av1_get_pred_diff_ctx(
             get_sub_block_pred_mv(candidate_mi, 1, col, block), this_refmv[1]);
-        ref_mv_stack[index].weight = 2 * len;
+        ref_mv_stack[index].weight = weight * len;
         ++(*refmv_count);
 
-#if CONFIG_EXT_INTER
-        if (candidate->mode == NEW_NEWMV)
-#else
-        if (candidate->mode == NEWMV)
-#endif  // CONFIG_EXT_INTER
-          ++newmv_count;
+        if (candidate->mode == NEW_NEWMV) ++newmv_count;
       }
 
       if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0 &&
@@ -116,9 +180,13 @@ static uint8_t add_ref_mv_candidate(
         this_refmv[0] = get_sub_block_mv(candidate_mi, 0, col, alt_block);
         this_refmv[1] = get_sub_block_mv(candidate_mi, 1, col, alt_block);
 
-        for (ref = 0; ref < 2; ++ref)
+        for (ref = 0; ref < 2; ++ref) {
+#if CONFIG_AMVR
+          lower_mv_precision(&this_refmv[ref].as_mv, use_hp, is_integer);
+#else
           lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
-
+#endif
+        }
         for (index = 0; index < *refmv_count; ++index)
           if (ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int &&
               ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)
@@ -139,12 +207,7 @@ static uint8_t add_ref_mv_candidate(
           ref_mv_stack[index].weight = len;
           ++(*refmv_count);
 
-#if CONFIG_EXT_INTER
-          if (candidate->mode == NEW_NEWMV)
-#else
-          if (candidate->mode == NEWMV)
-#endif  // CONFIG_EXT_INTER
-            ++newmv_count;
+          if (candidate->mode == NEW_NEWMV) ++newmv_count;
         }
       }
     }
@@ -153,95 +216,144 @@ static uint8_t add_ref_mv_candidate(
 }
 
 static uint8_t scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                             const int mi_row, const int mi_col, int block,
+                             const int mi_col, int block,
                              const MV_REFERENCE_FRAME rf[2], int row_offset,
-                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
-  const TileInfo *const tile = &xd->tile;
+                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                             int_mv *gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                             int max_row_offset, int *processed_rows) {
+  const int end_mi = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
+  const int n8_w_8 = mi_size_wide[BLOCK_8X8];
+  const int n8_w_16 = mi_size_wide[BLOCK_16X16];
   int i;
   uint8_t newmv_count = 0;
+  int col_offset = 0;
 #if CONFIG_CB4X4
-  const int bsize = xd->mi[0]->mbmi.sb_type;
-  const int mi_offset =
-      bsize < BLOCK_8X8 ? mi_size_wide[BLOCK_4X4] : mi_size_wide[BLOCK_8X8];
+  const int shift = 0;
   // TODO(jingning): Revisit this part after cb4x4 is stable.
-  if (bsize >= BLOCK_8X8) row_offset *= 2;
+  if (abs(row_offset) > 1) {
+    col_offset = 1;
+    if (mi_col & 0x01 && xd->n8_w < n8_w_8) --col_offset;
+  }
+  const int use_step_16 = (xd->n8_w >= 16);
 #else
-  const int mi_offset = mi_size_wide[BLOCK_8X8];
+  const int shift = 1;
+  const int use_step_16 = (xd->n8_w >= 8);
 #endif
+  MODE_INFO **const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
 
-  for (i = 0; i < xd->n8_w && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
-    POSITION mi_pos;
-#if CONFIG_CB4X4
-    const int use_step_16 = (xd->n8_w >= 16);
+  for (i = 0; i < end_mi && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+    const MODE_INFO *const candidate_mi = candidate_mi0[col_offset + i];
+    const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+    const int candidate_bsize = candidate->sb_type;
+    const int n8_w = mi_size_wide[candidate_bsize];
+    int len = AOMMIN(xd->n8_w, n8_w);
+    if (use_step_16)
+      len = AOMMAX(n8_w_16, len);
+    else if (abs(row_offset) > 1)
+      len = AOMMAX(len, n8_w_8);
+
+    int weight = 2;
+    if (xd->n8_w >= n8_w_8 && xd->n8_w <= n8_w) {
+      int inc = AOMMIN(-max_row_offset + row_offset + 1,
+                       mi_size_high[candidate_bsize]);
+      // Obtain range used in weight calculation.
+      weight = AOMMAX(weight, (inc << shift));
+      // Update processed rows.
+      *processed_rows = inc - row_offset - 1;
+    }
+
+#if CONFIG_AMVR
+    newmv_count += add_ref_mv_candidate(
+        candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        col_offset + i, weight, cm->cur_frame_mv_precision_level);
 #else
-    const int use_step_16 = (xd->n8_w >= 8);
+    newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+                                        refmv_count, ref_mv_stack,
+                                        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        col_offset + i, weight);
 #endif
 
-    mi_pos.row = row_offset;
-    mi_pos.col = i;
-    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos)) {
-      const MODE_INFO *const candidate_mi =
-          xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
-      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
-      int len = AOMMIN(xd->n8_w, mi_size_wide[candidate->sb_type]);
-      if (use_step_16) len = AOMMAX(mi_size_wide[BLOCK_16X16], len);
-      newmv_count += add_ref_mv_candidate(
-          candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
-          cm->allow_high_precision_mv, len, block, mi_pos.col);
-      i += len;
-    } else {
-      if (use_step_16)
-        i += (mi_offset << 1);
-      else
-        i += mi_offset;
-    }
+    i += len;
   }
 
   return newmv_count;
 }
 
 static uint8_t scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                             const int mi_row, const int mi_col, int block,
+                             const int mi_row, int block,
                              const MV_REFERENCE_FRAME rf[2], int col_offset,
-                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
-  const TileInfo *const tile = &xd->tile;
+                             CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                             int_mv *gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                             int max_col_offset, int *processed_cols) {
+  const int end_mi = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
+  const int n8_h_8 = mi_size_high[BLOCK_8X8];
+  const int n8_h_16 = mi_size_high[BLOCK_16X16];
   int i;
   uint8_t newmv_count = 0;
+  int row_offset = 0;
 #if CONFIG_CB4X4
-  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  const int mi_offset =
-      (bsize < BLOCK_8X8) ? mi_size_high[BLOCK_4X4] : mi_size_high[BLOCK_8X8];
-  if (bsize >= BLOCK_8X8) col_offset *= 2;
+  const int shift = 0;
+  if (abs(col_offset) > 1) {
+    row_offset = 1;
+    if (mi_row & 0x01 && xd->n8_h < n8_h_8) --row_offset;
+  }
+  const int use_step_16 = (xd->n8_h >= 16);
 #else
-  const int mi_offset = mi_size_wide[BLOCK_8X8];
+  const int shift = 1;
+  const int use_step_16 = (xd->n8_h >= 8);
 #endif
 
-  for (i = 0; i < xd->n8_h && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
-    POSITION mi_pos;
-#if CONFIG_CB4X4
-    const int use_step_16 = (xd->n8_h >= 16);
+  for (i = 0; i < end_mi && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+    const MODE_INFO *const candidate_mi =
+        xd->mi[(row_offset + i) * xd->mi_stride + col_offset];
+    const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+    const int candidate_bsize = candidate->sb_type;
+    const int n8_h = mi_size_high[candidate_bsize];
+    int len = AOMMIN(xd->n8_h, n8_h);
+    if (use_step_16)
+      len = AOMMAX(n8_h_16, len);
+    else if (abs(col_offset) > 1)
+      len = AOMMAX(len, n8_h_8);
+
+    int weight = 2;
+    if (xd->n8_h >= n8_h_8 && xd->n8_h <= n8_h) {
+      int inc = AOMMIN(-max_col_offset + col_offset + 1,
+                       mi_size_wide[candidate_bsize]);
+      // Obtain range used in weight calculation.
+      weight = AOMMAX(weight, (inc << shift));
+      // Update processed cols.
+      *processed_cols = inc - col_offset - 1;
+    }
+
+#if CONFIG_AMVR
+    newmv_count += add_ref_mv_candidate(
+        candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        col_offset, weight, cm->cur_frame_mv_precision_level);
 #else
-    const int use_step_16 = (xd->n8_h >= 8);
+    newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+                                        refmv_count, ref_mv_stack,
+                                        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        col_offset, weight);
 #endif
-
-    mi_pos.row = i;
-    mi_pos.col = col_offset;
-    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos)) {
-      const MODE_INFO *const candidate_mi =
-          xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
-      const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
-      int len = AOMMIN(xd->n8_h, mi_size_high[candidate->sb_type]);
-      if (use_step_16) len = AOMMAX(mi_size_high[BLOCK_16X16], len);
-      newmv_count += add_ref_mv_candidate(
-          candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
-          cm->allow_high_precision_mv, len, block, mi_pos.col);
-      i += len;
-    } else {
-      if (use_step_16)
-        i += (mi_offset << 1);
-      else
-        i += mi_offset;
-    }
+    i += len;
   }
 
   return newmv_count;
@@ -251,6 +363,9 @@ static uint8_t scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                              const int mi_row, const int mi_col, int block,
                              const MV_REFERENCE_FRAME rf[2], int row_offset,
                              int col_offset, CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                             int_mv *gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
                              uint8_t *refmv_count) {
   const TileInfo *const tile = &xd->tile;
   POSITION mi_pos;
@@ -266,18 +381,33 @@ static uint8_t scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
     const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
     const int len = mi_size_wide[BLOCK_8X8];
 
+#if CONFIG_AMVR
     newmv_count += add_ref_mv_candidate(
         candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
-        cm->allow_high_precision_mv, len, block, mi_pos.col);
+        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+        mi_pos.col, 2, cm->cur_frame_mv_precision_level);
+#else
+    newmv_count += add_ref_mv_candidate(candidate_mi, candidate, rf,
+                                        refmv_count, ref_mv_stack,
+                                        cm->allow_high_precision_mv, len, block,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        gm_mv_candidates, cm->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                        mi_pos.col, 2);
+#endif
   }  // Analyze a single 8x8 block motion information.
 
   return newmv_count;
 }
 
-static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
-                         int bs) {
-  const int mask_row = mi_row & MAX_MIB_MASK;
-  const int mask_col = mi_col & MAX_MIB_MASK;
+static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                         int mi_row, int mi_col, int bs) {
+  const int sb_mi_size = mi_size_wide[cm->sb_size];
+  const int mask_row = mi_row & (sb_mi_size - 1);
+  const int mask_col = mi_col & (sb_mi_size - 1);
 
   // In a split partition all apart from the bottom right has a top right
   int has_tr = !((mask_row & bs) && (mask_col & bs));
@@ -288,7 +418,7 @@ static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
   // For each 4x4 group of blocks, when the bottom right is decoded the blocks
   // to the right have not been decoded therefore the bottom right does
   // not have a top right
-  while (bs < MAX_MIB_SIZE) {
+  while (bs < sb_mi_size) {
     if (mask_col & bs) {
       if ((mask_col & (2 * bs)) && (mask_row & (2 * bs))) {
         has_tr = 0;
@@ -310,9 +440,10 @@ static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
   if (xd->n8_w > xd->n8_h)
     if (xd->is_sec_rect) has_tr = 0;
 
-#if CONFIG_EXT_PARTITION_TYPES
-  // The bottom left square of a Vertical A does not have a top right as it is
-  // decoded before the right hand rectangle of the partition
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
+  // The bottom left square of a Vertical A (in the old format) does
+  // not have a top right as it is decoded before the right hand
+  // rectangle of the partition
   if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A)
     if ((mask_row & bs) && !(mask_col & bs)) has_tr = 0;
 #endif  // CONFIG_EXT_PARTITION_TYPES
@@ -320,19 +451,156 @@ static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
   return has_tr;
 }
 
-static int add_col_ref_mv(const AV1_COMMON *cm,
+#if CONFIG_MFMV
+static int check_sb_border(const AV1_COMMON *cm, const int mi_row,
+                           const int mi_col, const int row_offset,
+                           const int col_offset) {
+  const int sb_mi_size = mi_size_wide[cm->sb_size];
+  const int row = mi_row & (sb_mi_size - 1);
+  const int col = mi_col & (sb_mi_size - 1);
+
+  if (row + row_offset < 0 || row + row_offset >= sb_mi_size ||
+      col + col_offset < 0 || col + col_offset >= sb_mi_size)
+    return 0;
+
+  return 1;
+}
+
+static int add_tpl_ref_mv(const AV1_COMMON *cm,
                           const MV_REF *prev_frame_mvs_base,
                           const MACROBLOCKD *xd, int mi_row, int mi_col,
                           MV_REFERENCE_FRAME ref_frame, int blk_row,
                           int blk_col, uint8_t *refmv_count,
                           CANDIDATE_MV *ref_mv_stack, int16_t *mode_context) {
+  (void)prev_frame_mvs_base;
+  POSITION mi_pos;
+  int idx;
+  int coll_blk_count = 0;
+  const int weight_unit = 1;  // mi_size_wide[BLOCK_8X8];
+
+#if CONFIG_MV_COMPRESS
+  mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
+  mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
+#else
+  mi_pos.row = blk_row;
+  mi_pos.col = blk_col;
+#endif
+
+  if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos))
+    return coll_blk_count;
+
+  const TPL_MV_REF *prev_frame_mvs = cm->cur_frame->tpl_mvs +
+                                     (mi_row + mi_pos.row) * cm->mi_stride +
+                                     (mi_col + mi_pos.col);
+
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame);
+
+  if (rf[1] == NONE_FRAME) {
+    for (int i = 0; i < MFMV_STACK_SIZE; ++i) {
+      if (prev_frame_mvs->mfmv[ref_frame - LAST_FRAME][i].as_int !=
+          INVALID_MV) {
+        int_mv this_refmv = prev_frame_mvs->mfmv[ref_frame - LAST_FRAME][i];
+        lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
+
+        if (blk_row == 0 && blk_col == 0)
+          if (abs(this_refmv.as_mv.row) >= 16 ||
+              abs(this_refmv.as_mv.col) >= 16)
+            mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+        for (idx = 0; idx < *refmv_count; ++idx)
+          if (abs(this_refmv.as_mv.row - ref_mv_stack[idx].this_mv.as_mv.row) <
+                  4 &&
+              abs(this_refmv.as_mv.col - ref_mv_stack[idx].this_mv.as_mv.col) <
+                  4)
+            break;
+
+        if (idx < *refmv_count) ref_mv_stack[idx].weight += 2 * weight_unit;
+
+        if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+          ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+          // TODO(jingning): Hard coded context number. Need to make it better
+          // sense.
+          ref_mv_stack[idx].pred_diff[0] = 1;
+          ref_mv_stack[idx].weight = 2 * weight_unit;
+          ++(*refmv_count);
+        }
+
+        ++coll_blk_count;
+      }
+    }
+  } else {
+    // Process compound inter mode
+    for (int i = 0; i < MFMV_STACK_SIZE; ++i) {
+      if (prev_frame_mvs->mfmv[rf[0] - LAST_FRAME][i].as_int != INVALID_MV &&
+          prev_frame_mvs->mfmv[rf[1] - LAST_FRAME][i].as_int != INVALID_MV) {
+        int_mv this_refmv = prev_frame_mvs->mfmv[rf[0] - LAST_FRAME][i];
+        int_mv comp_refmv = prev_frame_mvs->mfmv[rf[1] - LAST_FRAME][i];
+        lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
+        lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv);
+
+        if (blk_row == 0 && blk_col == 0)
+          if (abs(this_refmv.as_mv.row) >= 16 ||
+              abs(this_refmv.as_mv.col) >= 16 ||
+              abs(comp_refmv.as_mv.row) >= 16 ||
+              abs(comp_refmv.as_mv.col) >= 16)
+            mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+        for (idx = 0; idx < *refmv_count; ++idx)
+          if (abs(this_refmv.as_mv.row - ref_mv_stack[idx].this_mv.as_mv.row) <
+                  4 &&
+              abs(this_refmv.as_mv.col - ref_mv_stack[idx].this_mv.as_mv.col) <
+                  4 &&
+              abs(comp_refmv.as_mv.row - ref_mv_stack[idx].comp_mv.as_mv.row) <
+                  4 &&
+              abs(comp_refmv.as_mv.col - ref_mv_stack[idx].comp_mv.as_mv.col) <
+                  4)
+            break;
+
+        if (idx < *refmv_count) ref_mv_stack[idx].weight += 2 * weight_unit;
+
+        if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+          ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+          ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
+          // TODO(jingning): Hard coded context number. Need to make it better
+          // sense.
+          ref_mv_stack[idx].pred_diff[0] = 1;
+          ref_mv_stack[idx].pred_diff[1] = 1;
+          ref_mv_stack[idx].weight = 2 * weight_unit;
+          ++(*refmv_count);
+        }
+
+        ++coll_blk_count;
+      }
+    }
+  }
+
+  return coll_blk_count;
+}
+#else
+static int add_col_ref_mv(const AV1_COMMON *cm,
+                          const MV_REF *prev_frame_mvs_base,
+                          int prev_frame_mvs_stride, const MACROBLOCKD *xd,
+                          int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
+                          int blk_row, int blk_col, uint8_t *refmv_count,
+                          CANDIDATE_MV *ref_mv_stack, int16_t *mode_context) {
+#if CONFIG_TMV
+  const MV_REF *prev_frame_mvs = prev_frame_mvs_base +
+                                 ((blk_row + 1) >> 1) * prev_frame_mvs_stride +
+                                 ((blk_col + 1) >> 1);
+#else
   const MV_REF *prev_frame_mvs =
-      prev_frame_mvs_base + blk_row * cm->mi_cols + blk_col;
+      prev_frame_mvs_base + blk_row * prev_frame_mvs_stride + blk_col;
+#endif
   POSITION mi_pos;
   int ref, idx;
   int coll_blk_count = 0;
   const int weight_unit = mi_size_wide[BLOCK_8X8];
 
+#if CONFIG_TMV
+  mi_pos.row = blk_row;
+  mi_pos.col = blk_col;
+#else
 #if CONFIG_MV_COMPRESS
   mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
   mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
@@ -340,16 +608,27 @@ static int add_col_ref_mv(const AV1_COMMON *cm,
   mi_pos.row = blk_row;
   mi_pos.col = blk_col;
 #endif
+#endif  // CONFIG_TMV
 
   if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, cm, &mi_pos))
     return coll_blk_count;
   for (ref = 0; ref < 2; ++ref) {
     if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
       int_mv this_refmv = prev_frame_mvs->mv[ref];
+#if CONFIG_AMVR
+      lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_mv_precision_level);
+#else
       lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
+#endif
 
-      if (abs(this_refmv.as_mv.row) >= 16 || abs(this_refmv.as_mv.col) >= 16)
-        mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+#if CONFIG_OPT_REF_MV
+      if (blk_row == 0 && blk_col == 0)
+#endif
+      {
+        if (abs(this_refmv.as_mv.row) >= 16 || abs(this_refmv.as_mv.col) >= 16)
+          mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+      }
 
       for (idx = 0; idx < *refmv_count; ++idx)
         if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
@@ -370,60 +649,161 @@ static int add_col_ref_mv(const AV1_COMMON *cm,
 
   return coll_blk_count;
 }
+#endif
 
 static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                               MV_REFERENCE_FRAME ref_frame,
                               uint8_t *refmv_count, CANDIDATE_MV *ref_mv_stack,
-                              int_mv *mv_ref_list, int block, int mi_row,
-                              int mi_col, int16_t *mode_context) {
+                              int_mv *mv_ref_list,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                              int_mv *gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                              int block, int mi_row, int mi_col,
+                              int16_t *mode_context) {
   int idx, nearest_refmv_count = 0;
   uint8_t newmv_count = 0;
   CANDIDATE_MV tmp_mv;
   int len, nr_len;
 
+#if CONFIG_TMV
+  const int prev_frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
+  const int tmi_row = mi_row & 0xfffe;
+  const int tmi_col = mi_col & 0xfffe;
+  const MV_REF *const prev_frame_mvs_base =
+      cm->use_prev_frame_mvs
+          ? cm->prev_frame->mvs + (tmi_row >> 1) * prev_frame_mvs_stride +
+                (tmi_col >> 1)
+          : NULL;
+#else
+  const int prev_frame_mvs_stride = cm->mi_cols;
 #if CONFIG_MV_COMPRESS
   const MV_REF *const prev_frame_mvs_base =
       cm->use_prev_frame_mvs
-          ? cm->prev_frame->mvs + (((mi_row >> 1) << 1) + 1) * cm->mi_cols +
+          ? cm->prev_frame->mvs +
+                (((mi_row >> 1) << 1) + 1) * prev_frame_mvs_stride +
                 ((mi_col >> 1) << 1) + 1
           : NULL;
 #else
   const MV_REF *const prev_frame_mvs_base =
       cm->use_prev_frame_mvs
-          ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+          ? cm->prev_frame->mvs + mi_row * prev_frame_mvs_stride + mi_col
           : NULL;
 #endif
+#endif  // CONFIG_TMV
 
   const int bs = AOMMAX(xd->n8_w, xd->n8_h);
-  const int has_tr = has_top_right(xd, mi_row, mi_col, bs);
+  const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
   MV_REFERENCE_FRAME rf[2];
 
+  const TileInfo *const tile = &xd->tile;
+  int max_row_offset = 0, max_col_offset = 0;
+#if CONFIG_CB4X4
+  const int row_adj = (xd->n8_h < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01);
+  const int col_adj = (xd->n8_w < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01);
+#endif
+  int processed_rows = 0;
+  int processed_cols = 0;
+  int row_offset, col_offset;
+
   av1_set_ref_frame(rf, ref_frame);
   mode_context[ref_frame] = 0;
   *refmv_count = 0;
 
-  // Scan the first above row mode info.
-  newmv_count += scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
-                               ref_mv_stack, refmv_count);
-  // Scan the first left column mode info.
-  newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
-                               ref_mv_stack, refmv_count);
+  // Find valid maximum row/col offset.
+  if (xd->up_available) {
+#if CONFIG_CB4X4
+    max_row_offset = -(MVREF_ROWS << 1) + row_adj;
+#else
+    max_row_offset = -MVREF_ROWS;
+#endif
+    max_row_offset =
+        find_valid_row_offset(tile, mi_row, cm->mi_rows, cm, max_row_offset);
+  }
 
+  if (xd->left_available) {
+#if CONFIG_CB4X4
+    max_col_offset = -(MVREF_COLS << 1) + col_adj;
+#else
+    max_col_offset = -MVREF_COLS;
+#endif
+    max_col_offset = find_valid_col_offset(tile, mi_col, max_col_offset);
+  }
+
+  // Scan the first above row mode info. row_offset = -1;
+  if (abs(max_row_offset) >= 1)
+    newmv_count +=
+        scan_row_mbmi(cm, xd, mi_col, block, rf, -1, ref_mv_stack, refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                      gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                      max_row_offset, &processed_rows);
+  // Scan the first left column mode info. col_offset = -1;
+  if (abs(max_col_offset) >= 1)
+    newmv_count +=
+        scan_col_mbmi(cm, xd, mi_row, block, rf, -1, ref_mv_stack, refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                      gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                      max_col_offset, &processed_cols);
   // Check top-right boundary
   if (has_tr)
     newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
-                                 xd->n8_w, ref_mv_stack, refmv_count);
+                                 xd->n8_w, ref_mv_stack,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                 gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                                 refmv_count);
 
   nearest_refmv_count = *refmv_count;
 
   for (idx = 0; idx < nearest_refmv_count; ++idx)
     ref_mv_stack[idx].weight += REF_CAT_LEVEL;
+
+#if CONFIG_MFMV
+  int blk_row, blk_col;
+  int coll_blk_count = 0;
+  int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n8_h);
+  int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n8_w);
+
+  int tpl_sample_pos[9][2] = {
+    { -2, hoffset }, { 0, hoffset },  { voffset, hoffset },
+    { voffset, 0 },  { voffset, -2 }, { voffset, -4 },
+    { -4, hoffset }, { voffset, 4 },  { 2, hoffset + 4 },
+  };
+  int i;
+
+  for (blk_row = 0; blk_row < xd->n8_h; blk_row += mi_size_high[BLOCK_8X8]) {
+    for (blk_col = 0; blk_col < xd->n8_w; blk_col += mi_size_wide[BLOCK_8X8]) {
+      // (TODO: yunqing) prev_frame_mvs_base is not used here, tpl_mvs is used.
+      // Can be modified the same way.
+      int is_available = add_tpl_ref_mv(
+          cm, prev_frame_mvs_base, xd, mi_row, mi_col, ref_frame, blk_row,
+          blk_col, refmv_count, ref_mv_stack, mode_context);
+      if (blk_row == 0 && blk_col == 0) coll_blk_count = is_available;
+    }
+  }
+
+  if (coll_blk_count == 0) mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+  for (i = 0; i < 9; ++i) {
+    blk_row = tpl_sample_pos[i][0];
+    blk_col = tpl_sample_pos[i][1];
+
+    if (!check_sb_border(cm, mi_row, mi_col, blk_row, blk_col)) continue;
+    // (TODO: yunqing) prev_frame_mvs_base is not used here, tpl_mvs is used.
+    // Can be modified the same way.
+    coll_blk_count += add_tpl_ref_mv(cm, prev_frame_mvs_base, xd, mi_row,
+                                     mi_col, ref_frame, blk_row, blk_col,
+                                     refmv_count, ref_mv_stack, mode_context);
+  }
+#else
 #if CONFIG_TEMPMV_SIGNALING
-  if (cm->use_prev_frame_mvs && rf[1] == NONE_FRAME) {
+  if (cm->use_prev_frame_mvs && rf[1] == NONE_FRAME)
 #else
   if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame &&
-      rf[1] == NONE_FRAME) {
+      rf[1] == NONE_FRAME)
 #endif
+  {
     int blk_row, blk_col;
     int coll_blk_count = 0;
 #if CONFIG_CB4X4
@@ -435,6 +815,8 @@ static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 #endif
 
 #if CONFIG_TPL_MV
+    // Modified sample positions to be consistent with frame_mvs
+    // spatial resolution.
     int tpl_sample_pos[5][2] = { { -1, xd->n8_w },
                                  { 0, xd->n8_w },
                                  { xd->n8_h, xd->n8_w },
@@ -445,9 +827,22 @@ static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 
     for (blk_row = 0; blk_row < xd->n8_h; blk_row += mi_step) {
       for (blk_col = 0; blk_col < xd->n8_w; blk_col += mi_step) {
-        coll_blk_count += add_col_ref_mv(
-            cm, prev_frame_mvs_base, xd, mi_row, mi_col, ref_frame, blk_row,
-            blk_col, refmv_count, ref_mv_stack, mode_context);
+#if CONFIG_TMV
+        int is_available =
+            add_col_ref_mv(cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd,
+                           tmi_row, tmi_col, ref_frame, blk_row, blk_col,
+                           refmv_count, ref_mv_stack, mode_context);
+#else
+        int is_available =
+            add_col_ref_mv(cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd,
+                           mi_row, mi_col, ref_frame, blk_row, blk_col,
+                           refmv_count, ref_mv_stack, mode_context);
+#endif  // CONFIG_TMV
+#if CONFIG_OPT_REF_MV
+        if (blk_row == 0 && blk_col == 0) coll_blk_count = is_available;
+#else
+        coll_blk_count += is_available;
+#endif
       }
     }
 
@@ -455,9 +850,15 @@ static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
     for (i = 0; i < 5; ++i) {
       blk_row = tpl_sample_pos[i][0];
       blk_col = tpl_sample_pos[i][1];
-      coll_blk_count += add_col_ref_mv(cm, prev_frame_mvs_base, xd, mi_row,
-                                       mi_col, ref_frame, blk_row, blk_col,
-                                       refmv_count, ref_mv_stack, mode_context);
+#if CONFIG_TMV
+      coll_blk_count += add_col_ref_mv(
+          cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd, tmi_row, tmi_col,
+          ref_frame, blk_row, blk_col, refmv_count, ref_mv_stack, mode_context);
+#else
+      coll_blk_count += add_col_ref_mv(
+          cm, prev_frame_mvs_base, prev_frame_mvs_stride, xd, mi_row, mi_col,
+          ref_frame, blk_row, blk_col, refmv_count, ref_mv_stack, mode_context);
+#endif  // CONFIG_TMV
     }
 #endif
 
@@ -465,36 +866,75 @@ static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   } else {
     mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
   }
+#endif
 
   // Scan the second outer area.
   scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, -1, ref_mv_stack,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
                 refmv_count);
-  for (idx = 2; idx <= 3; ++idx) {
-    scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -idx, ref_mv_stack,
-                  refmv_count);
-    scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -idx, ref_mv_stack,
-                  refmv_count);
+  for (idx = 2; idx <= MVREF_ROWS; ++idx) {
+#if CONFIG_CB4X4
+    row_offset = -(idx << 1) + 1 + row_adj;
+    col_offset = -(idx << 1) + 1 + col_adj;
+#else
+    row_offset = -idx;
+    col_offset = -idx;
+#endif
+
+    if (abs(row_offset) <= abs(max_row_offset) &&
+        abs(row_offset) > processed_rows)
+      scan_row_mbmi(cm, xd, mi_col, block, rf, row_offset, ref_mv_stack,
+                    refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                    gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                    max_row_offset, &processed_rows);
+
+    if (abs(col_offset) <= abs(max_col_offset) &&
+        abs(col_offset) > processed_cols)
+      scan_col_mbmi(cm, xd, mi_row, block, rf, col_offset, ref_mv_stack,
+                    refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                    gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                    max_col_offset, &processed_cols);
   }
-  scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack,
-                refmv_count);
+
+#if CONFIG_CB4X4
+  col_offset = -(MVREF_COLS << 1) + 1 + col_adj;
+#else
+  col_offset = -MVREF_COLS;
+#endif
+  if (abs(col_offset) <= abs(max_col_offset) &&
+      abs(col_offset) > processed_cols)
+    scan_col_mbmi(cm, xd, mi_row, block, rf, col_offset, ref_mv_stack,
+                  refmv_count,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                  gm_mv_candidates,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                  max_col_offset, &processed_cols);
 
   switch (nearest_refmv_count) {
-    case 0:
-      mode_context[ref_frame] |= 0;
+    case 0: mode_context[ref_frame] |= 0;
+#if !CONFIG_OPT_REF_MV
       if (*refmv_count >= 1) mode_context[ref_frame] |= 1;
-
       if (*refmv_count == 1)
         mode_context[ref_frame] |= (1 << REFMV_OFFSET);
       else if (*refmv_count >= 2)
         mode_context[ref_frame] |= (2 << REFMV_OFFSET);
+#endif
       break;
-    case 1:
-      mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
-
+    case 1: mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+#if CONFIG_OPT_REF_MV
+      mode_context[ref_frame] |= (3 << REFMV_OFFSET);
+#else
       if (*refmv_count == 1)
         mode_context[ref_frame] |= (3 << REFMV_OFFSET);
       else if (*refmv_count >= 2)
         mode_context[ref_frame] |= (4 << REFMV_OFFSET);
+#endif
       break;
 
     case 2:
@@ -564,9 +1004,22 @@ static void find_mv_refs_idx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                              void *const data, int16_t *mode_context,
                              int_mv zeromv) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
+  const int sb_mi_size = mi_size_wide[cm->sb_size];
   int i, refmv_count = 0;
   int different_ref_found = 0;
   int context_counter = 0;
+
+#if CONFIG_TMV
+  int tmi_row = mi_row & 0xfffe;
+  int tmi_col = mi_col & 0xfffe;
+  POSITION mi_pos = { 0, 0 };
+  int inside = is_inside(&xd->tile, tmi_col, tmi_row, cm->mi_rows, cm, &mi_pos);
+  const MV_REF *const prev_frame_mvs =
+      cm->use_prev_frame_mvs && inside
+          ? cm->prev_frame->mvs + (tmi_row >> 1) * ((cm->mi_cols + 1) >> 1) +
+                (tmi_col >> 1)
+          : NULL;
+#else
 #if CONFIG_MV_COMPRESS
   const TileInfo *const tile_ = &xd->tile;
   int mi_row_end = tile_->mi_row_end;
@@ -586,6 +1039,8 @@ static void find_mv_refs_idx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
           ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
           : NULL;
 #endif
+#endif  // CONFIG_TMV
+
 #if CONFIG_INTRABC
   assert(IMPLIES(ref_frame == INTRA_FRAME, cm->use_prev_frame_mvs == 0));
 #endif
@@ -669,8 +1124,8 @@ static void find_mv_refs_idx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
               ? NULL
               : &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
       if (candidate == NULL) continue;
-      if ((mi_row % MAX_MIB_SIZE) + mv_ref->row >= MAX_MIB_SIZE ||
-          (mi_col % MAX_MIB_SIZE) + mv_ref->col >= MAX_MIB_SIZE)
+      if ((mi_row & (sb_mi_size - 1)) + mv_ref->row >= sb_mi_size ||
+          (mi_col & (sb_mi_size - 1)) + mv_ref->col >= sb_mi_size)
         continue;
       different_ref_found = 1;
 
@@ -721,8 +1176,8 @@ static void find_mv_refs_idx(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                 ? NULL
                 : &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
         if (candidate == NULL) continue;
-        if ((mi_row % MAX_MIB_SIZE) + mv_ref->row >= MAX_MIB_SIZE ||
-            (mi_col % MAX_MIB_SIZE) + mv_ref->col >= MAX_MIB_SIZE)
+        if ((mi_row & (sb_mi_size - 1)) + mv_ref->row >= sb_mi_size ||
+            (mi_col & (sb_mi_size - 1)) + mv_ref->col >= sb_mi_size)
           continue;
 
         // If the candidate is INTRA we don't want to consider its mv.
@@ -764,7 +1219,6 @@ Done:
     mv_ref_list[i].as_int = zeromv.as_int;
 }
 
-#if CONFIG_EXT_INTER
 // This function keeps a mode count for a given MB/SB
 void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                            MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
@@ -814,17 +1268,13 @@ Done:
   if (mode_context)
     mode_context[ref_frame] = counter_to_context[context_counter];
 }
-#endif  // CONFIG_EXT_INTER
 
 void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
-#if CONFIG_EXT_INTER
-                      int16_t *compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                      int_mv *mv_ref_list, int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data,
-                      int16_t *mode_context) {
+                      int16_t *compound_mode_context, int_mv *mv_ref_list,
+                      int mi_row, int mi_col, find_mv_refs_sync sync,
+                      void *const data, int16_t *mode_context) {
   int_mv zeromv[2];
 #if CONFIG_GLOBAL_MOTION
   BLOCK_SIZE bsize = mi->mbmi.sb_type;
@@ -834,24 +1284,33 @@ void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   MV_REFERENCE_FRAME rf[2];
 #endif  // CONFIG_GLOBAL_MOTION
 
-#if CONFIG_EXT_INTER
   av1_update_mv_context(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
                         compound_mode_context);
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_GLOBAL_MOTION
   if (!CONFIG_INTRABC || ref_frame != INTRA_FRAME) {
     av1_set_ref_frame(rf, ref_frame);
     zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[rf[0]],
                                             cm->allow_high_precision_mv, bsize,
-                                            mi_col, mi_row, 0)
+                                            mi_col, mi_row, 0
+#if CONFIG_AMVR
+                                            ,
+                                            cm->cur_frame_mv_precision_level
+#endif
+                                            )
                            .as_int;
-    zeromv[1].as_int = (rf[1] != NONE_FRAME)
-                           ? gm_get_motion_vector(&cm->global_motion[rf[1]],
-                                                  cm->allow_high_precision_mv,
-                                                  bsize, mi_col, mi_row, 0)
-                                 .as_int
-                           : 0;
+    zeromv[1].as_int =
+        (rf[1] != NONE_FRAME)
+            ? gm_get_motion_vector(&cm->global_motion[rf[1]],
+                                   cm->allow_high_precision_mv, bsize, mi_col,
+                                   mi_row, 0
+#if CONFIG_AMVR
+                                   ,
+                                   cm->cur_frame_mv_precision_level
+#endif
+                                   )
+                  .as_int
+            : 0;
   } else {
     zeromv[0].as_int = zeromv[1].as_int = 0;
   }
@@ -864,6 +1323,9 @@ void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                      sync, data, mode_context, zeromv[0]);
 
   setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                    zeromv,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
                     -1, mi_row, mi_col, mode_context);
   /* Note: If global motion is enabled, then we want to set the ALL_ZERO flag
      iff all of the MVs we could generate with NEARMV/NEARESTMV are equivalent
@@ -880,26 +1342,38 @@ void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
        zero, so sets the ALL_ZERO flag.
      * This leads to an encode/decode mismatch.
   */
-  if (*ref_mv_count >= 2) {
-    for (idx = 0; idx < AOMMIN(3, *ref_mv_count); ++idx) {
-      if (ref_mv_stack[idx].this_mv.as_int != zeromv[0].as_int) all_zero = 0;
-      if (ref_frame > ALTREF_FRAME)
-        if (ref_mv_stack[idx].comp_mv.as_int != zeromv[1].as_int) all_zero = 0;
-    }
-  } else if (ref_frame <= ALTREF_FRAME) {
+  for (idx = 0; idx < AOMMIN(3, *ref_mv_count); ++idx) {
+    if (ref_mv_stack[idx].this_mv.as_int != zeromv[0].as_int) all_zero = 0;
+    if (ref_frame > ALTREF_FRAME)
+      if (ref_mv_stack[idx].comp_mv.as_int != zeromv[1].as_int) all_zero = 0;
+  }
+  if (*ref_mv_count < 2 && ref_frame <= ALTREF_FRAME) {
     for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
       if (mv_ref_list[idx].as_int != zeromv[0].as_int) all_zero = 0;
   }
 
+#if !CONFIG_OPT_REF_MV
   if (all_zero) mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET);
+#else
+  (void)all_zero;
+#endif
 }
 
 void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
-                           int_mv *near_mv) {
+                           int_mv *near_mv
+#if CONFIG_AMVR
+                           ,
+                           int is_integer
+#endif
+                           ) {
   int i;
   // Make sure all the candidates are properly clamped etc
   for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+#if CONFIG_AMVR
+    lower_mv_precision(&mvlist[i].as_mv, allow_hp, is_integer);
+#else
     lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+#endif
   }
   *nearest_mv = mvlist[0];
   *near_mv = mvlist[1];
@@ -908,14 +1382,8 @@ void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
 void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                    int block, int ref, int mi_row, int mi_col,
                                    CANDIDATE_MV *ref_mv_stack,
-                                   uint8_t *ref_mv_count,
-#if CONFIG_EXT_INTER
-                                   int_mv *mv_list,
-#endif  // CONFIG_EXT_INTER
+                                   uint8_t *ref_mv_count, int_mv *mv_list,
                                    int_mv *nearest_mv, int_mv *near_mv) {
-#if !CONFIG_EXT_INTER
-  int_mv mv_list[MAX_MV_REF_CANDIDATES];
-#endif  // !CONFIG_EXT_INTER
   MODE_INFO *const mi = xd->mi[0];
   b_mode_info *bmi = mi->bmi;
   int n;
@@ -931,7 +1399,12 @@ void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_GLOBAL_MOTION
   zeromv.as_int = gm_get_motion_vector(&cm->global_motion[rf[0]],
                                        cm->allow_high_precision_mv,
-                                       mi->mbmi.sb_type, mi_col, mi_row, block)
+                                       mi->mbmi.sb_type, mi_col, mi_row, block
+#if CONFIG_AMVR
+                                       ,
+                                       cm->cur_frame_mv_precision_level
+#endif
+                                       )
                       .as_int;
 #else
   zeromv.as_int = 0;
@@ -940,10 +1413,16 @@ void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
                    mi_col, NULL, NULL, NULL, zeromv);
 
   scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 0, ref_mv_stack,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                &zeromv,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
                 ref_mv_count);
   above_count = *ref_mv_count;
 
   scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, 0, -1, ref_mv_stack,
+#if CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
+                &zeromv,
+#endif  // CONFIG_GLOBAL_MOTION && USE_CUR_GM_REFMV
                 ref_mv_count);
   left_count = *ref_mv_count - above_count;
 
@@ -994,6 +1473,526 @@ void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
   }
 }
 
+#if CONFIG_FRAME_MARKER
+void av1_setup_frame_buf_refs(AV1_COMMON *cm) {
+  cm->cur_frame->cur_frame_offset = cm->frame_offset;
+  int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
+  int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
+  int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
+
+#if CONFIG_EXT_REFS
+  int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
+  int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
+  int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
+  int alt2_buf_idx = cm->frame_refs[ALTREF2_FRAME - LAST_FRAME].idx;
+#endif
+
+  if (alt_buf_idx >= 0)
+    cm->cur_frame->alt_frame_offset =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].cur_frame_offset;
+
+  if (lst_buf_idx >= 0)
+    cm->cur_frame->lst_frame_offset =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].cur_frame_offset;
+
+  if (gld_buf_idx >= 0)
+    cm->cur_frame->gld_frame_offset =
+        cm->buffer_pool->frame_bufs[gld_buf_idx].cur_frame_offset;
+
+#if CONFIG_EXT_REFS
+  if (lst2_buf_idx >= 0)
+    cm->cur_frame->lst2_frame_offset =
+        cm->buffer_pool->frame_bufs[lst2_buf_idx].cur_frame_offset;
+
+  if (lst3_buf_idx >= 0)
+    cm->cur_frame->lst3_frame_offset =
+        cm->buffer_pool->frame_bufs[lst3_buf_idx].cur_frame_offset;
+
+  if (bwd_buf_idx >= 0)
+    cm->cur_frame->bwd_frame_offset =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].cur_frame_offset;
+
+  if (alt2_buf_idx >= 0)
+    cm->cur_frame->alt2_frame_offset =
+        cm->buffer_pool->frame_bufs[alt2_buf_idx].cur_frame_offset;
+#endif
+}
+
+#if CONFIG_FRAME_SIGN_BIAS
+void av1_setup_frame_sign_bias(AV1_COMMON *cm) {
+  MV_REFERENCE_FRAME ref_frame;
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
+    if (buf_idx != INVALID_IDX) {
+      const int ref_frame_offset =
+          cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
+      cm->ref_frame_sign_bias[ref_frame] =
+          (ref_frame_offset <= (int)cm->frame_offset) ? 0 : 1;
+    } else {
+      cm->ref_frame_sign_bias[ref_frame] = 0;
+    }
+  }
+}
+#endif  // CONFIG_FRAME_SIGN_BIAS
+#endif  // CONFIG_FRAME_MARKER
+
+#if CONFIG_MFMV
+// Although we assign 32 bit integers, all the values are strictly under 14
+// bits.
+static int div_mult[32] = {
+  0,    16384, 8192, 5461, 4096, 3276, 2730, 2340, 2048, 1820, 1638,
+  1489, 1365,  1260, 1170, 1092, 1024, 963,  910,  862,  819,  780,
+  744,  712,   682,  655,  630,  606,  585,  564,  546,  528,
+};
+
+// TODO(jingning): Consider the use of lookup table for (num / den)
+// altogether.
+static void get_mv_projection(MV *output, MV ref, int num, int den) {
+  output->row =
+      (int16_t)(ROUND_POWER_OF_TWO(ref.row * num * div_mult[den], 14));
+  output->col =
+      (int16_t)(ROUND_POWER_OF_TWO(ref.col * num * div_mult[den], 14));
+}
+
+#define MAX_OFFSET_WIDTH 64
+#define MAX_OFFSET_HEIGHT 32
+
+static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row,
+                              int blk_col, MV mv, int sign_bias) {
+  if ((abs(mv.row) >> 3) > MAX_OFFSET_HEIGHT ||
+      (abs(mv.col) >> 3) > MAX_OFFSET_WIDTH)
+    return 0;
+
+  int row = (sign_bias == 1) ? blk_row - (mv.row >> (3 + MI_SIZE_LOG2))
+                             : blk_row + (mv.row >> (3 + MI_SIZE_LOG2));
+  int col = (sign_bias == 1) ? blk_col - (mv.col >> (3 + MI_SIZE_LOG2))
+                             : blk_col + (mv.col >> (3 + MI_SIZE_LOG2));
+
+  if (row < 0 || row >= cm->mi_rows || col < 0 || col >= cm->mi_cols) return 0;
+
+  *mi_r = row;
+  *mi_c = col;
+
+  return 1;
+}
+
+static uint32_t mv_sign_reverse(int_mv ref) {
+  int_mv this_mv;
+  this_mv.as_mv.row = -ref.as_mv.row;
+  this_mv.as_mv.col = -ref.as_mv.col;
+
+  return this_mv.as_int;
+}
+
+void av1_setup_motion_field(AV1_COMMON *cm) {
+  int cur_frame_index = cm->cur_frame->cur_frame_offset;
+  int lst_frame_index = 0, alt_frame_index = 0, gld_frame_index = 0;
+#if CONFIG_EXT_REFS
+  int lst2_frame_index = 0, lst3_frame_index = 0;
+  int bwd_frame_index = 0, alt2_frame_index = 0;
+#endif
+  TPL_MV_REF *tpl_mvs_base = cm->cur_frame->tpl_mvs;
+
+  for (int ref_frame = 0; ref_frame < INTER_REFS_PER_FRAME; ++ref_frame) {
+    int size = (cm->mi_rows + 16) * cm->mi_stride;
+    for (int idx = 0; idx < size; ++idx) {
+      for (int i = 0; i < MFMV_STACK_SIZE; ++i)
+        tpl_mvs_base[idx].mfmv[ref_frame][i].as_int = INVALID_MV;
+    }
+  }
+
+  int alt_buf_idx = cm->frame_refs[ALTREF_FRAME - LAST_FRAME].idx;
+  int lst_buf_idx = cm->frame_refs[LAST_FRAME - LAST_FRAME].idx;
+  int gld_buf_idx = cm->frame_refs[GOLDEN_FRAME - LAST_FRAME].idx;
+#if CONFIG_EXT_REFS
+  int lst2_buf_idx = cm->frame_refs[LAST2_FRAME - LAST_FRAME].idx;
+  int lst3_buf_idx = cm->frame_refs[LAST3_FRAME - LAST_FRAME].idx;
+  int bwd_buf_idx = cm->frame_refs[BWDREF_FRAME - LAST_FRAME].idx;
+  int alt2_buf_idx = cm->frame_refs[ALTREF2_FRAME - LAST_FRAME].idx;
+#endif
+
+  if (alt_buf_idx >= 0)
+    alt_frame_index = cm->buffer_pool->frame_bufs[alt_buf_idx].cur_frame_offset;
+
+  if (lst_buf_idx >= 0)
+    lst_frame_index = cm->buffer_pool->frame_bufs[lst_buf_idx].cur_frame_offset;
+
+  if (gld_buf_idx >= 0)
+    gld_frame_index = cm->buffer_pool->frame_bufs[gld_buf_idx].cur_frame_offset;
+
+#if CONFIG_EXT_REFS
+  if (lst2_buf_idx >= 0)
+    lst2_frame_index =
+        cm->buffer_pool->frame_bufs[lst2_buf_idx].cur_frame_offset;
+
+  if (lst3_buf_idx >= 0)
+    lst3_frame_index =
+        cm->buffer_pool->frame_bufs[lst3_buf_idx].cur_frame_offset;
+
+  if (bwd_buf_idx >= 0)
+    bwd_frame_index = cm->buffer_pool->frame_bufs[bwd_buf_idx].cur_frame_offset;
+
+  if (alt2_buf_idx >= 0)
+    alt2_frame_index =
+        cm->buffer_pool->frame_bufs[alt2_buf_idx].cur_frame_offset;
+#endif
+
+  if (alt_frame_index < cur_frame_index) return;
+
+  // ======================
+  // Process last frame
+  // ======================
+  if (lst_buf_idx >= 0) {
+    MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[lst_buf_idx].mvs;
+    const int lst_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].lst_frame_offset;
+    const int alt_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].alt_frame_offset;
+    const int gld_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].gld_frame_offset;
+#if CONFIG_EXT_REFS
+    const int lst2_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].lst2_frame_offset;
+    const int lst3_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].lst3_frame_offset;
+    const int bwd_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].bwd_frame_offset;
+    const int alt2_frame_idx =
+        cm->buffer_pool->frame_bufs[lst_buf_idx].alt2_frame_offset;
+#endif
+
+    int alt_offset = AOMMAX(1, alt_frame_idx - lst_frame_index);
+    int lst_offset = AOMMAX(1, lst_frame_index - lst_frame_idx);
+    int gld_offset = AOMMAX(1, lst_frame_index - gld_frame_idx);
+    int cur_to_lst = cur_frame_index - lst_frame_index;
+    int cur_to_alt = alt_frame_index - cur_frame_index;
+    int cur_to_gld = cur_frame_index - gld_frame_index;
+
+#if CONFIG_EXT_REFS
+    int bwd_offset = AOMMAX(1, bwd_frame_idx - lst_frame_index);
+    int alt2_offset = AOMMAX(1, alt2_frame_idx - lst_frame_index);
+    int lst2_offset = AOMMAX(1, lst_frame_index - lst2_frame_idx);
+    int lst3_offset = AOMMAX(1, lst_frame_index - lst3_frame_idx);
+    int cur_to_lst2 = cur_frame_index - lst2_frame_index;
+    int cur_to_lst3 = cur_frame_index - lst3_frame_index;
+    int cur_to_bwd = bwd_frame_index - cur_frame_index;
+    int cur_to_alt2 = alt2_frame_index - cur_frame_index;
+#endif
+
+    const int is_lst_overlay = (alt_frame_idx == gld_frame_index);
+    // clang-format off
+    const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
+#if CONFIG_EXT_REFS
+        0, lst_offset, lst2_offset, lst3_offset, gld_offset,
+        bwd_offset, alt2_offset, alt_offset
+#else
+        0, lst_offset, gld_offset, alt_offset
+#endif
+    };
+    // clang-format on
+
+    for (int blk_row = 0; blk_row < cm->mi_rows && !is_lst_overlay; ++blk_row) {
+      for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
+        MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
+        MV fwd_mv = mv_ref->mv[0].as_mv;
+        MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
+                                            mv_ref->ref_frame[1] };
+
+        // Derive  motion vectors toward last reference frame.
+        if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
+          int_mv this_mv;
+          int mi_r, mi_c;
+
+          const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
+
+          get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
+                            ref_frame_offset);
+          int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+                                             this_mv.as_mv, 1);
+
+          if (pos_valid) {
+            int mi_offset = mi_r * cm->mi_stride + mi_c;
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST_FRAME)][0].as_int =
+                this_mv.as_int;
+
+#if CONFIG_EXT_REFS
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST2_FRAME)][0].as_int =
+                this_mv.as_int;
+
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(LAST3_FRAME)][0].as_int =
+                this_mv.as_int;
+#endif
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)]
+                                        [0].as_int = this_mv.as_int;
+          }
+        }
+
+        for (int idx = 0; idx < 2; ++idx) {
+          if (ref_frame[idx] <= GOLDEN_FRAME) continue;
+
+          int_mv this_mv;
+          int mi_r, mi_c;
+          fwd_mv = mv_ref->mv[idx].as_mv;
+
+          const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[idx]];
+
+          get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
+                            ref_frame_offset);
+          int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+                                             this_mv.as_mv, 0);
+
+          if (pos_valid) {
+            int mi_offset = mi_r * cm->mi_stride + mi_c;
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(ALTREF_FRAME)]
+                                        [0].as_int = this_mv.as_int;
+
+#if CONFIG_EXT_REFS
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(BWDREF_FRAME)]
+                                        [0].as_int = this_mv.as_int;
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt2,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset].mfmv[FWD_RF_OFFSET(ALTREF2_FRAME)]
+                                        [0].as_int = this_mv.as_int;
+#endif
+          }
+        }
+      }
+    }
+  }
+
+  // =======================
+  // Process ARF frame
+  // =======================
+  if (alt_buf_idx >= 0) {
+    MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[alt_buf_idx].mvs;
+    const int lst_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].lst_frame_offset;
+    const int gld_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].gld_frame_offset;
+#if CONFIG_EXT_REFS
+    const int lst2_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].lst2_frame_offset;
+    const int lst3_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].lst3_frame_offset;
+    const int bwd_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].bwd_frame_offset;
+    const int alt2_frame_idx =
+        cm->buffer_pool->frame_bufs[alt_buf_idx].alt2_frame_offset;
+#endif
+
+    int lst_offset = AOMMAX(1, alt_frame_index - lst_frame_idx);
+    int gld_offset = AOMMAX(1, alt_frame_index - gld_frame_idx);
+    int cur_to_alt = alt_frame_index - cur_frame_index;
+    int cur_to_lst = cur_frame_index - lst_frame_index;
+    int cur_to_gld = cur_frame_index - gld_frame_index;
+#if CONFIG_EXT_REFS
+    int bwd_offset = AOMMAX(1, alt_frame_index - bwd_frame_idx);
+    int alt2_offset = AOMMAX(1, alt_frame_index - alt2_frame_idx);
+    int lst2_offset = AOMMAX(1, alt_frame_index - lst2_frame_idx);
+    int lst3_offset = AOMMAX(1, alt_frame_index - lst3_frame_idx);
+    int cur_to_lst2 = cur_frame_index - lst2_frame_index;
+    int cur_to_lst3 = cur_frame_index - lst3_frame_index;
+    int cur_to_bwd = bwd_frame_index - cur_frame_index;
+    int cur_to_alt2 = alt2_frame_index - cur_frame_index;
+#endif
+    const int ref_stamp = FWD_RF_OFFSET(ALTREF_FRAME);
+    // clang-format off
+    const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
+#if CONFIG_EXT_REFS
+        0, lst_offset, lst2_offset, lst3_offset, gld_offset,
+        bwd_offset, alt2_offset, 0,
+#else
+        0, lst_offset, gld_offset, 0,
+#endif
+    };
+    // clang-format on
+
+    for (int blk_row = 0; blk_row < cm->mi_rows; ++blk_row) {
+      for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
+        MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
+        MV fwd_mv = mv_ref->mv[0].as_mv;
+        MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
+                                            mv_ref->ref_frame[1] };
+
+        const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
+
+        if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
+          int_mv this_mv;
+          int mi_r, mi_c;
+
+          get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt,
+                            ref_frame_offset);
+          int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+                                             this_mv.as_mv, 0);
+
+          if (pos_valid) {
+            int mi_offset = mi_r * cm->mi_stride + mi_c;
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(ALTREF_FRAME)][ref_stamp]
+                .as_int = mv_sign_reverse(this_mv);
+
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(LAST_FRAME)][ref_stamp]
+                .as_int = this_mv.as_int;
+
+#if CONFIG_EXT_REFS
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(BWDREF_FRAME)][ref_stamp]
+                .as_int = mv_sign_reverse(this_mv);
+
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_alt2,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(ALTREF2_FRAME)][ref_stamp]
+                .as_int = mv_sign_reverse(this_mv);
+
+            if (ref_frame[0] >= LAST2_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(LAST2_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+
+            if (ref_frame[0] >= LAST3_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(LAST3_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+#endif
+            if (ref_frame[0] >= GOLDEN_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+          }
+        }
+      }
+    }
+  }
+
+// ==========================================
+// Process BWD reference frame
+// ==========================================
+#if CONFIG_EXT_REFS
+  if (bwd_buf_idx >= 0) {
+    MV_REF *mv_ref_base = cm->buffer_pool->frame_bufs[bwd_buf_idx].mvs;
+    const int lst_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].lst_frame_offset;
+    const int gld_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].gld_frame_offset;
+    const int lst2_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].lst2_frame_offset;
+    const int lst3_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].lst3_frame_offset;
+    const int bwd_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].bwd_frame_offset;
+    const int alt2_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].alt2_frame_offset;
+    const int alt_frame_idx =
+        cm->buffer_pool->frame_bufs[bwd_buf_idx].alt_frame_offset;
+
+    int lst_offset = AOMMAX(1, bwd_frame_index - lst_frame_idx);
+    int gld_offset = AOMMAX(1, bwd_frame_index - gld_frame_idx);
+    int cur_to_lst = cur_frame_index - lst_frame_index;
+
+    int lst2_offset = AOMMAX(1, bwd_frame_index - lst2_frame_idx);
+    int lst3_offset = AOMMAX(1, bwd_frame_index - lst3_frame_idx);
+    int bwd_offset = AOMMAX(1, bwd_frame_idx - bwd_frame_index);
+    int alt2_offset = AOMMAX(1, alt2_frame_idx - bwd_frame_index);
+    int alt_offset = AOMMAX(1, alt_frame_idx - bwd_frame_index);
+    int cur_to_lst2 = cur_frame_index - lst2_frame_index;
+    int cur_to_lst3 = cur_frame_index - lst3_frame_index;
+    int cur_to_gld = cur_frame_index - gld_frame_index;
+    int cur_to_bwd = bwd_frame_index - cur_frame_index;
+
+    const int ref_stamp = FWD_RF_OFFSET(BWDREF_FRAME);
+    const int ref_frame_offset_buffer[TOTAL_REFS_PER_FRAME] = {
+      0,          lst_offset, lst2_offset, lst3_offset,
+      gld_offset, bwd_offset, alt2_offset, alt_offset,
+    };
+
+    for (int blk_row = 0; blk_row < cm->mi_rows; ++blk_row) {
+      for (int blk_col = 0; blk_col < cm->mi_cols; ++blk_col) {
+        MV_REF *mv_ref = &mv_ref_base[blk_row * cm->mi_cols + blk_col];
+        MV fwd_mv = mv_ref->mv[0].as_mv;
+        MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
+                                            mv_ref->ref_frame[1] };
+
+        if (ref_frame[0] <= GOLDEN_FRAME && ref_frame[0] > INTRA_FRAME) {
+          const int ref_frame_offset = ref_frame_offset_buffer[ref_frame[0]];
+          int_mv this_mv;
+          int mi_r, mi_c;
+
+          get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_bwd,
+                            ref_frame_offset);
+          int pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+                                             this_mv.as_mv, 0);
+
+          if (pos_valid) {
+            int mi_offset = mi_r * cm->mi_stride + mi_c;
+
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(BWDREF_FRAME)][ref_stamp]
+                .as_int = mv_sign_reverse(this_mv);
+
+            // Project the motion vector onto last reference frame
+            get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst,
+                              ref_frame_offset);
+            tpl_mvs_base[mi_offset]
+                .mfmv[FWD_RF_OFFSET(LAST_FRAME)][ref_stamp]
+                .as_int = this_mv.as_int;
+
+            if (ref_frame[0] >= LAST2_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst2,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(LAST2_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+
+            if (ref_frame[0] >= LAST3_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_lst3,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(LAST3_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+
+            if (ref_frame[0] >= GOLDEN_FRAME) {
+              get_mv_projection(&this_mv.as_mv, fwd_mv, cur_to_gld,
+                                ref_frame_offset);
+              tpl_mvs_base[mi_offset]
+                  .mfmv[FWD_RF_OFFSET(GOLDEN_FRAME)][ref_stamp]
+                  .as_int = this_mv.as_int;
+            }
+          }
+        }
+      }
+    }
+  }
+#endif
+}
+#endif  // CONFIG_MFMV
+
 #if CONFIG_WARPED_MOTION
 #if WARPED_MOTION_SORT_SAMPLES
 static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
@@ -1072,71 +2071,49 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
   int ref_frame = mbmi0->ref_frame[0];
   int up_available = xd->up_available;
   int left_available = xd->left_available;
-  int i, mi_step = 1, np = 0, n, j, k;
+  int i, mi_step = 1, np = 0;
   int global_offset_c = mi_col * MI_SIZE;
   int global_offset_r = mi_row * MI_SIZE;
 
   const TileInfo *const tile = &xd->tile;
-  // Search nb range in the unit of mi
-  int bs =
-      (AOMMAX(xd->n8_w, xd->n8_h) > 1) ? (AOMMAX(xd->n8_w, xd->n8_h) >> 1) : 1;
-  int marked[16 * 32];  // max array size for 128x128
   int do_tl = 1;
   int do_tr = 1;
 
-  // scan the above rows
+  // scan the nearest above rows
   if (up_available) {
-    for (n = 0; n < bs; n++) {
-      int mi_row_offset = -1 * (n + 1);
-
-      if (!n) {
-        MODE_INFO *mi = xd->mi[mi_row_offset * xd->mi_stride];
-        MB_MODE_INFO *mbmi = &mi->mbmi;
-        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
-
-        // Handle "current block width <= above block width" case.
-        if (xd->n8_w <= n8_w) {
-          int col_offset = -mi_col % n8_w;
-
-          if (col_offset < 0) do_tl = 0;
-          if (col_offset + n8_w > xd->n8_w) do_tr = 0;
-
-          if (mbmi->ref_frame[0] == ref_frame &&
-              mbmi->ref_frame[1] == NONE_FRAME) {
-            record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                           global_offset_c, 0, -1, col_offset, 1);
-            pts += 2;
-            pts_inref += 2;
-            pts_mv += 2;
-            np++;
-          }
-          break;
-        }
-      }
+    int mi_row_offset = -1;
+    MODE_INFO *mi = xd->mi[mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *mbmi = &mi->mbmi;
+    uint8_t n8_w = mi_size_wide[mbmi->sb_type];
 
-      // Handle "current block width > above block width" case.
-      if (!n) memset(marked, 0, bs * xd->n8_w * sizeof(*marked));
+    if (xd->n8_w <= n8_w) {
+      // Handle "current block width <= above block width" case.
+      int col_offset = -mi_col % n8_w;
 
+      if (col_offset < 0) do_tl = 0;
+      if (col_offset + n8_w > xd->n8_w) do_tr = 0;
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                       global_offset_c, 0, -1, col_offset, 1);
+        pts += 2;
+        pts_inref += 2;
+        pts_mv += 2;
+        np++;
+      }
+    } else {
+      // Handle "current block width > above block width" case.
       for (i = 0; i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
         int mi_col_offset = i;
-        MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-        MB_MODE_INFO *mbmi = &mi->mbmi;
-        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
-        uint8_t n8_h = mi_size_high[mbmi->sb_type];
-
+        mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        mbmi = &mi->mbmi;
+        n8_w = mi_size_wide[mbmi->sb_type];
         mi_step = AOMMIN(xd->n8_w, n8_w);
 
-        // Processed already
-        if (marked[n * xd->n8_w + i]) continue;
-
-        for (j = 0; j < AOMMIN(bs, n8_h); j++)
-          for (k = 0; k < AOMMIN(xd->n8_w, n8_w); k++)
-            marked[(n + j) * xd->n8_w + i + k] = 1;
-
         if (mbmi->ref_frame[0] == ref_frame &&
             mbmi->ref_frame[1] == NONE_FRAME) {
           record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                         global_offset_c, -n, -1, i, 1);
+                         global_offset_c, 0, -1, i, 1);
           pts += 2;
           pts_inref += 2;
           pts_mv += 2;
@@ -1147,58 +2124,41 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
   }
   assert(2 * np <= SAMPLES_ARRAY_SIZE);
 
-  // scan the left columns
+  // scan the nearest left columns
   if (left_available) {
-    for (n = 0; n < bs; n++) {
-      int mi_col_offset = -1 * (n + 1);
-
-      if (!n) {
-        MODE_INFO *mi = xd->mi[mi_col_offset];
-        MB_MODE_INFO *mbmi = &mi->mbmi;
-        uint8_t n8_h = mi_size_high[mbmi->sb_type];
-
-        // Handle "current block height <= above block height" case.
-        if (xd->n8_h <= n8_h) {
-          int row_offset = -mi_row % n8_h;
-
-          if (row_offset < 0) do_tl = 0;
-
-          if (mbmi->ref_frame[0] == ref_frame &&
-              mbmi->ref_frame[1] == NONE_FRAME) {
-            record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                           global_offset_c, row_offset, 1, 0, -1);
-            pts += 2;
-            pts_inref += 2;
-            pts_mv += 2;
-            np++;
-          }
-          break;
-        }
-      }
+    int mi_col_offset = -1;
 
-      // Handle "current block height > above block height" case.
-      if (!n) memset(marked, 0, bs * xd->n8_h * sizeof(*marked));
+    MODE_INFO *mi = xd->mi[mi_col_offset];
+    MB_MODE_INFO *mbmi = &mi->mbmi;
+    uint8_t n8_h = mi_size_high[mbmi->sb_type];
 
+    if (xd->n8_h <= n8_h) {
+      // Handle "current block height <= above block height" case.
+      int row_offset = -mi_row % n8_h;
+
+      if (row_offset < 0) do_tl = 0;
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                       global_offset_c, row_offset, 1, 0, -1);
+        pts += 2;
+        pts_inref += 2;
+        pts_mv += 2;
+        np++;
+      }
+    } else {
+      // Handle "current block height > above block height" case.
       for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
         int mi_row_offset = i;
-        MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-        MB_MODE_INFO *mbmi = &mi->mbmi;
-        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
-        uint8_t n8_h = mi_size_high[mbmi->sb_type];
-
+        mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        mbmi = &mi->mbmi;
+        n8_h = mi_size_high[mbmi->sb_type];
         mi_step = AOMMIN(xd->n8_h, n8_h);
 
-        // Processed already
-        if (marked[n * xd->n8_h + i]) continue;
-
-        for (j = 0; j < AOMMIN(bs, n8_w); j++)
-          for (k = 0; k < AOMMIN(xd->n8_h, n8_h); k++)
-            marked[(n + j) * xd->n8_h + i + k] = 1;
-
         if (mbmi->ref_frame[0] == ref_frame &&
             mbmi->ref_frame[1] == NONE_FRAME) {
           record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                         global_offset_c, i, 1, -n, -1);
+                         global_offset_c, i, 1, 0, -1);
           pts += 2;
           pts_inref += 2;
           pts_mv += 2;
@@ -1229,7 +2189,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
   assert(2 * np <= SAMPLES_ARRAY_SIZE);
 
   // Top-right block
-  if (do_tr && has_top_right(xd, mi_row, mi_col, AOMMAX(xd->n8_w, xd->n8_h))) {
+  if (do_tr &&
+      has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->n8_w, xd->n8_h))) {
     POSITION trb_pos = { -1, xd->n8_w };
 
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &trb_pos)) {
diff --git a/third_party/aom/av1/common/mvref_common.h b/third_party/aom/av1/common/mvref_common.h
index 8caa74535..348887e43 100644
--- a/third_party/aom/av1/common/mvref_common.h
+++ b/third_party/aom/av1/common/mvref_common.h
@@ -19,6 +19,8 @@ extern "C" {
 #endif
 
 #define MVREF_NEIGHBOURS 9
+#define MVREF_ROWS 3
+#define MVREF_COLS 4
 
 typedef struct position {
   int row;
@@ -51,19 +53,16 @@ static const int mode_2_counter[] = {
   9,  // D153_PRED
   9,  // D207_PRED
   9,  // D63_PRED
-#if CONFIG_ALT_INTRA
   9,  // SMOOTH_PRED
 #if CONFIG_SMOOTH_HV
   9,    // SMOOTH_V_PRED
   9,    // SMOOTH_H_PRED
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
   9,    // TM_PRED
   0,    // NEARESTMV
   0,    // NEARMV
   3,    // ZEROMV
   1,    // NEWMV
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   0,    // SR_NEAREST_NEARMV
         //  1,    // SR_NEAREST_NEWMV
@@ -79,7 +78,6 @@ static const int mode_2_counter[] = {
   1,    // NEW_NEARMV
   3,    // ZERO_ZEROMV
   1,    // NEW_NEWMV
-#endif  // CONFIG_EXT_INTER
 };
 
 // There are 3^3 different combinations of 3 counts that can be either 0,1 or
@@ -209,11 +207,46 @@ static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
   }
 }
 
-static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
-  if (!allow_hp) {
-    if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
-    if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row,
+                                        int mi_rows, const AV1_COMMON *cm,
+                                        int row_offset) {
+#if CONFIG_DEPENDENT_HORZTILES
+  const int dependent_horz_tile_flag = cm->dependent_horz_tiles;
+#else
+  const int dependent_horz_tile_flag = 0;
+  (void)cm;
+#endif
+  if (dependent_horz_tile_flag && !tile->tg_horz_boundary)
+    return clamp(row_offset, -mi_row, mi_rows - mi_row - 1);
+  else
+    return clamp(row_offset, tile->mi_row_start - mi_row,
+                 tile->mi_row_end - mi_row - 1);
+}
+
+static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col,
+                                        int col_offset) {
+  return clamp(col_offset, tile->mi_col_start - mi_col,
+               tile->mi_col_end - mi_col - 1);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp
+#if CONFIG_AMVR
+                                      ,
+                                      int is_integer
+#endif
+                                      ) {
+#if CONFIG_AMVR
+  if (is_integer) {
+    integer_mv_precision(mv);
+  } else {
+#endif
+    if (!allow_hp) {
+      if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
+      if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+    }
+#if CONFIG_AMVR
   }
+#endif
 }
 
 static INLINE uint8_t av1_get_pred_diff_ctx(const int_mv pred_mv,
@@ -280,10 +313,8 @@ static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
   { LAST_FRAME, BWDREF_FRAME },  { LAST2_FRAME, BWDREF_FRAME },
   { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
 
-#if CONFIG_ALTREF2
   { LAST_FRAME, ALTREF2_FRAME },  { LAST2_FRAME, ALTREF2_FRAME },
   { LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME },
-#endif  // CONFIG_ALTREF2
 
   { LAST_FRAME, ALTREF_FRAME },  { LAST2_FRAME, ALTREF_FRAME },
   { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
@@ -357,39 +388,49 @@ static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
   return 0;
 }
 
+#if CONFIG_FRAME_MARKER
+void av1_setup_frame_buf_refs(AV1_COMMON *cm);
+#if CONFIG_FRAME_SIGN_BIAS
+void av1_setup_frame_sign_bias(AV1_COMMON *cm);
+#endif  // CONFIG_FRAME_SIGN_BIAS
+#if CONFIG_MFMV
+void av1_setup_motion_field(AV1_COMMON *cm);
+#endif  // CONFIG_MFMV
+#endif  // CONFIG_FRAME_MARKER
+
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MODE_INFO *mi, int mi_row,
+                        int mi_col, int x_mis, int y_mis);
+
 typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
 void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                       MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                       uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
-#if CONFIG_EXT_INTER
-                      int16_t *compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                      int_mv *mv_ref_list, int mi_row, int mi_col,
-                      find_mv_refs_sync sync, void *const data,
-                      int16_t *mode_context);
+                      int16_t *compound_mode_context, int_mv *mv_ref_list,
+                      int mi_row, int mi_col, find_mv_refs_sync sync,
+                      void *const data, int16_t *mode_context);
 
 // check a list of motion vectors by sad score using a number rows of pixels
 // above and a number cols of pixels in the left to select the one with best
 // score to use as ref motion vector
+#if CONFIG_AMVR
+void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+                           int_mv *near_mv, int is_integer);
+#else
 void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
                            int_mv *near_mv);
+#endif
 
 void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                    int block, int ref, int mi_row, int mi_col,
                                    CANDIDATE_MV *ref_mv_stack,
-                                   uint8_t *ref_mv_count,
-#if CONFIG_EXT_INTER
-                                   int_mv *mv_list,
-#endif  // CONFIG_EXT_INTER
+                                   uint8_t *ref_mv_count, int_mv *mv_list,
                                    int_mv *nearest_mv, int_mv *near_mv);
 
-#if CONFIG_EXT_INTER
 // This function keeps a mode count for a given MB/SB
 void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                            MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                            int_mv *mv_ref_list, int block, int mi_row,
                            int mi_col, int16_t *mode_context);
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_WARPED_MOTION
 #if WARPED_MOTION_SORT_SAMPLES
diff --git a/third_party/aom/av1/common/ncobmc_kernels.c b/third_party/aom/av1/common/ncobmc_kernels.c
new file mode 100644
index 000000000..af951398b
--- /dev/null
+++ b/third_party/aom/av1/common/ncobmc_kernels.c
@@ -0,0 +1,1181 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/ncobmc_kernels.h"
+
+// The kernels are only used in the experiment "ncobmc-adapt-weight", which
+// blends four predictions to form a final prediction for an inter-block
+// The indices of the default kernels correspond to
+// 1. the index of the size of the kernels (ADAPT_OVERLAP_BLOCKS )
+// 2. the interpolation modes (NCOBMC_MODE)
+// 3. the prediction the kernels applies to
+
+static int16_t default_ncobmc_krnl_0_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5684, 3601, 1367, 364, 1509, 2313, 4007, 5080 },
+  { 3728, 2486, 827, 196, 1434, 2034, 2868, 3000 },
+  { 1643, 1465, 726, 208, 890, 1386, 1242, 1293 },
+  { 794, 723, 277, -237, 206, 487, 749, 896 },
+  { 1176, 730, 286, 136, 281, 262, 724, 953 },
+  { 2086, 1958, 783, 539, 751, 984, 1143, 1491 },
+  { 2665, 2520, 1402, 1037, 939, 1223, 1593, 1937 },
+  { 3451, 3172, 2350, 1291, 1069, 1916, 2672, 3223 }
+};
+static int16_t default_ncobmc_krnl_0_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5541, 8123, 10470, 11908, 11291, 10382, 8800, 6446 },
+  { 3338, 5536, 7249, 8080, 7671, 6428, 5280, 3900 },
+  { 1732, 3087, 3842, 4325, 4034, 2929, 2318, 1800 },
+  { 744, 1217, 1559, 2215, 1957, 1352, 707, 322 },
+  { 685, 1082, 1792, 2300, 1975, 1350, 738, 671 },
+  { 1168, 2336, 3303, 3965, 3790, 3098, 2909, 2141 },
+  { 3005, 4370, 5806, 6716, 6282, 5553, 4782, 3453 },
+  { 4748, 6650, 7779, 9010, 9208, 8184, 6987, 5197 }
+};
+static int16_t default_ncobmc_krnl_0_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 6026, 4784, 2400, 1250, 1002, 2371, 3320, 5285 },
+  { 8638, 6094, 3257, 1498, 1297, 3145, 5252, 7625 },
+  { 10859, 7249, 3868, 1871, 1813, 3569, 6577, 8858 },
+  { 11432, 8123, 4216, 1786, 2477, 4370, 6669, 9366 },
+  { 11894, 8466, 4870, 1917, 2479, 4656, 7057, 9383 },
+  { 11109, 7432, 3924, 1288, 2018, 3946, 6660, 9877 },
+  { 10138, 6548, 2830, 461, 2087, 3810, 6170, 9255 },
+  { 8613, 5163, 1658, 279, 1694, 3082, 4807, 7897 }
+};
+static int16_t default_ncobmc_krnl_0_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { -833, -80, 2193, 2907, 2623, 1359, 298, -383 },
+  { 705, 2300, 5090, 6649, 6024, 4820, 3020, 1892 },
+  { 2189, 4625, 7990, 10015, 9679, 8539, 6284, 4464 },
+  { 3445, 6356, 10371, 12660, 11773, 10205, 8287, 5828 },
+  { 2664, 6149, 9483, 12064, 11681, 10156, 7908, 5409 },
+  { 2040, 4690, 8405, 10631, 9862, 8396, 5711, 2909 },
+  { 626, 2993, 6387, 8212, 7123, 5840, 3877, 1788 },
+  { -402, 1431, 4636, 5850, 4461, 3246, 1964, 122 }
+};
+static int16_t default_ncobmc_krnl_0_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 1465, 553, -76, 10, 635, 756, 1843, 3144 },
+  { 687, 117, -404, -300, 238, 280, 696, 1415 },
+  { 49, -38, -224, -241, -135, -209, -237, 382 },
+  { 48, 37, -266, -273, -235, -137, -208, -94 },
+  { 555, -3, -132, -172, -98, 93, 347, 313 },
+  { 887, 256, -266, -307, 304, 222, -98, 82 },
+  { 1701, 816, 454, 501, 119, 230, 450, 551 },
+  { 2732, 1502, 1174, 540, 323, 709, 1002, 936 }
+};
+static int16_t default_ncobmc_krnl_0_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 7707, 10467, 11036, 10942, 10165, 9420, 8728, 5835 },
+  { 3167, 5146, 5854, 5771, 4914, 4684, 4357, 3154 },
+  { 900, 1646, 2412, 2014, 1974, 1986, 1776, 1005 },
+  { -198, -179, 488, 737, 866, 784, 828, 236 },
+  { -469, 32, 402, 574, 738, 495, 242, -187 },
+  { 186, 1078, 1378, 1480, 1226, 1506, 1656, 745 },
+  { 1531, 2525, 3139, 3367, 3535, 3519, 3095, 2171 },
+  { 3152, 5453, 6176, 7089, 7310, 6879, 6483, 4916 }
+};
+static int16_t default_ncobmc_krnl_0_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 7378, 3775, 1232, 453, 133, 936, 1688, 4950 },
+  { 10336, 5944, 2400, 1175, 168, 954, 2894, 6436 },
+  { 11176, 6145, 2051, 829, 543, 1193, 3403, 6517 },
+  { 10387, 6062, 2036, 646, 507, 1077, 2998, 6029 },
+  { 10768, 6277, 2226, 677, 321, 982, 2845, 6378 },
+  { 10072, 5808, 1937, 873, 372, 1396, 3498, 7298 },
+  { 8951, 4724, 1216, 104, 656, 1830, 3722, 7558 },
+  { 7447, 3372, 468, -135, 99, 1177, 2980, 7260 }
+};
+static int16_t default_ncobmc_krnl_0_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { -229, 1545, 4135, 4921, 5405, 5226, 4081, 2418 },
+  { 2120, 5121, 8485, 9692, 11018, 10406, 8380, 5338 },
+  { 4205, 8593, 12099, 13717, 13936, 13366, 11402, 8436 },
+  { 6068, 10382, 14047, 15190, 15155, 14577, 12684, 10145 },
+  { 5458, 10012, 13836, 15243, 15361, 14752, 12876, 9818 },
+  { 5153, 9162, 13256, 14256, 14385, 13170, 11245, 8186 },
+  { 4140, 8257, 11521, 12362, 12028, 10762, 9062, 6053 },
+  { 2966, 5975, 8490, 8807, 8561, 7529, 5836, 3204 }
+};
+static int16_t default_ncobmc_krnl_1_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 4414, 2642, 2518, 1763, 1089, 644, 355, 254, -234, 454, 399, 228, 525, 785,
+    558, 919 },
+  { 2989, 3035, 2685, 1476, 1872, 768, 820, -309, -107, 273, 87, 286, 499, 638,
+    929, 1025 },
+  { 1779, 1672, 1713, 645, 953, 151, 617, 79, -91, 185, 18, 307, 794, 681, 484,
+    521 },
+  { 1429, 1571, 1893, 1493, 949, 288, -232, -248, -152, 179, -50, 74, 107, 329,
+    539, 822 },
+  { 1444, 852, 1022, 688, 850, 205, 135, -629, 334, 96, 106, 337, 259, 300, 150,
+    680 },
+  { 962, 367, 329, 921, 591, -79, 146, 201, 296, 179, -190, 143, 46, -107, 215,
+    853 },
+  { 915, 865, 463, 169, 498, -390, 12, 202, 225, 490, 410, 483, 52, 99, 293,
+    569 },
+  { 728, -135, 241, 383, 288, -69, 33, 421, 523, 506, 376, 58, 143, -4, 151,
+    218 },
+  { 337, 65, 255, 282, 173, 267, 237, 15, 38, 114, 253, 110, 32, 227, 92, -48 },
+  { 317, 115, 295, 231, 380, 435, 331, -97, 392, 393, 51, 59, 276, 41, -33,
+    46 },
+  { 31, -14, 86, 250, -36, -214, 210, -79, -117, 401, 193, 440, 171, 200, 8,
+    112 },
+  { 46, 19, 165, -6, 75, 180, 266, -98, 76, 276, 6, 29, 230, -49, 177, 168 },
+  { 104, -243, -121, 295, -8, 180, 16, -44, 232, 315, 176, 10, 0, -95, -154,
+    141 },
+  { 248, 201, 361, 430, -20, -45, 209, -44, 222, 540, 527, 297, 240, 625, 531,
+    409 },
+  { 91, 37, 193, 177, 233, 210, -299, 120, 327, 214, 293, 189, 86, 123, 206,
+    356 },
+  { 501, 779, 295, 199, 148, 81, -31, 70, 211, 309, 300, 110, 227, 30, 242,
+    261 }
+};
+static int16_t default_ncobmc_krnl_1_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 6603, 7905, 7762, 8159, 8426, 10334, 10792, 10984, 12097, 10534, 11216,
+    10624, 9514, 8715, 8672, 8846 },
+  { 5897, 6238, 6272, 7323, 7162, 8091, 9465, 9845, 9929, 9747, 10562, 10737,
+    9059, 7651, 7330, 7314 },
+  { 5572, 6017, 5568, 7112, 6617, 6501, 7334, 8494, 8900, 8826, 9852, 8034,
+    6956, 7691, 7513, 6106 },
+  { 4564, 3877, 4682, 4586, 5135, 5795, 7968, 7859, 7720, 6548, 6306, 5639,
+    6357, 6514, 6493, 5609 },
+  { 4142, 4154, 3332, 4193, 3873, 4977, 4685, 5787, 5707, 5300, 5854, 4720,
+    5452, 5642, 4810, 4250 },
+  { 2993, 3176, 3012, 2637, 2664, 4336, 4207, 3687, 4627, 4487, 4847, 4120,
+    4079, 3931, 3730, 3205 },
+  { 2479, 2268, 1858, 1737, 2266, 2806, 2919, 3017, 3231, 2964, 3181, 3423,
+    3096, 3025, 2684, 2353 },
+  { 1969, 2001, 1997, 1959, 1323, 1565, 1963, 1351, 1957, 1711, 2093, 2057,
+    1762, 1926, 1118, 1367 },
+  { 1473, 816, 655, 1628, 1252, 1764, 1723, 1675, 2559, 3029, 1951, 2160, 2305,
+    2299, 1688, 1048 },
+  { 3073, 1667, 1324, 1360, 1562, 1774, 2154, 2740, 3281, 3434, 3258, 4095,
+    2823, 2443, 2894, 2449 },
+  { 3813, 2830, 3352, 2125, 2627, 2974, 3847, 3720, 4592, 4846, 4787, 5066,
+    4598, 4229, 4032, 3478 },
+  { 3415, 2733, 3827, 3637, 3381, 3743, 3768, 4732, 5055, 5445, 5870, 5937,
+    5734, 5980, 5010, 4954 },
+  { 4878, 3604, 5532, 4558, 4210, 4880, 4847, 5771, 5136, 6486, 7096, 6426,
+    5765, 6824, 6094, 5753 },
+  { 6076, 5817, 5318, 5268, 5784, 5482, 6453, 6582, 6803, 7077, 8113, 8173,
+    8329, 7653, 6448, 6476 },
+  { 7549, 5758, 5554, 6383, 7113, 7664, 7123, 6712, 8539, 8997, 9047, 8794,
+    8700, 8760, 7600, 7995 },
+  { 7698, 7133, 7048, 7498, 7821, 8401, 9152, 8647, 8934, 8874, 8595, 8789,
+    8828, 8766, 9019, 8783 }
+};
+static int16_t default_ncobmc_krnl_1_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5573, 5972, 5705, 5033, 5738, 3189, 2009, 1476, 2057, 2178, 1869, 2927,
+    3305, 4036, 4017, 5328 },
+  { 7539, 7568, 7302, 5564, 4410, 3954, 2153, 2693, 622, 1831, 1753, 1636, 3552,
+    4374, 4319, 6015 },
+  { 8753, 7544, 6620, 5710, 6142, 5819, 2731, 2898, 1702, 1487, 2249, 1688,
+    4110, 4483, 5108, 5621 },
+  { 9273, 7922, 6245, 6310, 6442, 5272, 3068, 2649, 1599, 2693, 3219, 4501,
+    4588, 4310, 5647, 6894 },
+  { 9697, 8245, 7267, 6551, 5199, 4626, 3466, 3256, 2099, 3125, 3608, 4297,
+    3944, 5468, 6056, 7545 },
+  { 8831, 8583, 7466, 6937, 6056, 5482, 3407, 3324, 1802, 3128, 3078, 4560,
+    4560, 5901, 6131, 7612 },
+  { 9556, 7457, 6602, 7342, 5370, 4431, 3573, 3339, 1668, 3172, 3779, 4564,
+    5744, 7244, 8522, 8407 },
+  { 10238, 8809, 7064, 6643, 4885, 4246, 2737, 2684, 2501, 3443, 3761, 6174,
+    5891, 6882, 7585, 8821 },
+  { 10151, 10001, 8289, 6859, 6054, 4903, 3809, 3540, 2644, 3424, 3542, 4649,
+    5389, 5384, 6733, 8360 },
+  { 9635, 9516, 7609, 7438, 6181, 4529, 4140, 3439, 2568, 3338, 3789, 5195,
+    5510, 6181, 7566, 8512 },
+  { 9988, 8848, 6807, 6731, 6139, 5355, 3797, 4097, 3364, 3319, 4230, 5136,
+    5581, 6125, 7748, 8229 },
+  { 10252, 9244, 7204, 7260, 6478, 6040, 4659, 3920, 2869, 3263, 4068, 5475,
+    5714, 7183, 7153, 8318 },
+  { 9682, 9366, 7096, 6059, 6036, 4463, 3898, 3477, 2065, 2704, 4434, 5167,
+    5502, 6743, 8002, 7443 },
+  { 9252, 8211, 6399, 6747, 6498, 5626, 4016, 3880, 2172, 2557, 3576, 4270,
+    4968, 5115, 6840, 7550 },
+  { 8753, 8157, 7097, 6500, 5779, 5174, 4190, 2645, 2380, 3239, 4155, 5263,
+    5437, 5337, 5663, 6667 },
+  { 9680, 7710, 6921, 5657, 4863, 3990, 3485, 2172, 2620, 3003, 3328, 4112,
+    4806, 6020, 6833, 7212 }
+};
+static int16_t default_ncobmc_krnl_1_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { -219, -121, 416, 1445, 1150, 2238, 3251, 3695, 2496, 3247, 2925, 2639, 3064,
+    2869, 3167, 1320 },
+  { -68, -450, 130, 2039, 2952, 3584, 3966, 4171, 5961, 4555, 3995, 3732, 3281,
+    3731, 3827, 2052 },
+  { 262, 1161, 2497, 2934, 2690, 3939, 5735, 4938, 5906, 5924, 4296, 6388, 4553,
+    3551, 3297, 4164 },
+  { 1091, 3025, 3566, 4005, 3874, 5040, 5600, 6151, 7241, 6990, 6924, 6186,
+    5356, 5256, 3726, 3083 },
+  { 1079, 3140, 4769, 4958, 6480, 6589, 8111, 7988, 8255, 7879, 6838, 7052,
+    6751, 5005, 5393, 3931 },
+  { 3566, 4255, 5572, 5909, 7098, 6653, 8641, 9199, 9689, 8617, 8673, 7591,
+    7733, 6676, 6324, 4737 },
+  { 3411, 5802, 7481, 7149, 8259, 9553, 9900, 9854, 11285, 9779, 9040, 7939,
+    7515, 6037, 4902, 5075 },
+  { 3417, 5718, 7095, 7425, 9913, 10666, 11679, 11951, 11429, 10749, 10173,
+    8116, 8610, 7605, 7548, 5992 },
+  { 4408, 5515, 7201, 7627, 8922, 9470, 10636, 11166, 11159, 9844, 10673, 9502,
+    8693, 8503, 7905, 7046 },
+  { 3340, 5097, 7171, 7366, 8273, 9660, 9784, 10332, 10155, 9232, 9301, 7056,
+    7798, 7746, 5981, 5402 },
+  { 2531, 4732, 6148, 7284, 7672, 8287, 8551, 8672, 8567, 7846, 7199, 5757,
+    6057, 5863, 4613, 4578 },
+  { 2646, 4394, 5195, 5511, 6471, 6443, 7713, 7854, 8408, 7427, 6461, 4968,
+    4731, 3294, 4066, 2960 },
+  { 1692, 3664, 3881, 5480, 6162, 6871, 7635, 7198, 8963, 6891, 4694, 4801,
+    5141, 2932, 2459, 3060 },
+  { 769, 2144, 4310, 3945, 4125, 5329, 5712, 5975, 7200, 6220, 4179, 3662, 2868,
+    3007, 2579, 1958 },
+  { -45, 2434, 3549, 3335, 3273, 3357, 5394, 6931, 5159, 3956, 2912, 2164, 2187,
+    2187, 2935, 1388 },
+  { -1514, 786, 2135, 3045, 3561, 3922, 3800, 5515, 4650, 4225, 4169, 3387,
+    2539, 1590, 317, 161 }
+};
+static int16_t default_ncobmc_krnl_1_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 2375, 1912, 1469, 213, 933, -507, -173, -369, -333, 187, -128, 427, 999,
+    1166, 1515, 2728 },
+  { 1857, 1085, 817, 454, 598, 479, 53, -218, -611, 240, 76, 31, 284, 1347,
+    1738, 1317 },
+  { 1911, 531, 453, 89, 639, -361, -331, -605, -162, 63, -154, 259, 446, 390,
+    708, 1113 },
+  { 818, 1304, 871, 665, 1006, -114, -405, -407, 331, 203, 304, 506, 476, 1053,
+    1155, 879 },
+  { 1054, 874, 714, -162, 624, -144, -306, -541, 30, -281, 296, 812, 418, 858,
+    755, 252 },
+  { 967, 388, 354, 878, 31, -691, -244, -307, 425, 281, 0, -50, 110, -107, 279,
+    255 },
+  { 152, -53, 156, 266, 192, -864, -236, -110, 397, 484, -129, 14, 22, 44, -90,
+    278 },
+  { 203, -54, 103, -142, -598, -741, -546, -26, 545, 253, -43, -234, -391, -504,
+    -158, -143 },
+  { 387, 275, 136, 69, -289, -9, -210, -364, 39, 3, 4, 61, -66, -102, -94,
+    -215 },
+  { 195, 213, 433, 158, 128, -131, -203, -266, -132, -285, -301, -367, -315,
+    -249, -144, -9 },
+  { 600, 145, 418, 277, 156, -118, 85, -20, 119, 260, 41, 72, -85, 316, -97,
+    -41 },
+  { 682, 610, 356, 880, 527, 272, 90, 92, -124, 314, -204, -339, -590, -384,
+    -248, -192 },
+  { 999, 423, 208, 752, 623, 409, 91, -57, -3, -124, 148, 255, -7, 112, -128,
+    -144 },
+  { 1007, 710, 609, 766, 264, -163, 324, 291, 219, -61, 24, 507, 74, 109, 127,
+    629 },
+  { 2211, 878, 853, 462, 86, 203, -71, 122, -36, 131, 308, 267, 210, 369, 50,
+    -96 },
+  { 1810, 1630, 1123, 645, 610, 217, -93, -37, -220, -341, -250, -110, 135, 0,
+    112, 93 }
+};
+static int16_t default_ncobmc_krnl_1_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5824, 7106, 8063, 8929, 8632, 9731, 10164, 11047, 11088, 10239, 10606, 8987,
+    8411, 7117, 6115, 5322 },
+  { 4980, 6239, 7135, 7245, 7418, 8526, 9440, 9438, 8119, 8336, 7993, 8057,
+    6686, 5210, 4193, 4841 },
+  { 2436, 4500, 5019, 5908, 5578, 7270, 7391, 7974, 7281, 6871, 6705, 6327,
+    4867, 4521, 4286, 3605 },
+  { 2298, 3501, 4714, 4692, 4835, 5278, 5830, 4968, 4435, 4824, 4373, 4085,
+    3825, 2657, 2539, 2557 },
+  { 1643, 2741, 2604, 2664, 1877, 3334, 2995, 3162, 3367, 3104, 3356, 2827,
+    3577, 2359, 1755, 2140 },
+  { 742, 1397, 1315, 1332, 1864, 3032, 2472, 2253, 1692, 2071, 2260, 2426, 1951,
+    1610, 1189, 1275 },
+  { 482, 869, 598, 288, 769, 1490, 1284, 1692, 883, 1061, 1259, 1239, 1118, 585,
+    219, 571 },
+  { 178, 278, 580, 915, 717, 873, 1012, 721, 52, 348, 624, 540, 691, 102, -108,
+    383 },
+  { -718, -648, -223, -520, -1000, -754, -688, -639, -528, -414, -440, -365,
+    -268, -546, -672, -332 },
+  { -256, -226, -395, -158, -311, -325, -66, 87, 533, 705, 261, 344, 484, 692,
+    155, 507 },
+  { 204, 448, 131, -571, 889, 712, 626, 349, 261, 578, 240, 1012, 849, 900, 889,
+    977 },
+  { 132, 1395, 1847, 1181, 845, 1203, 1920, 2068, 2141, 2071, 1834, 2191, 2130,
+    2522, 1537, 1326 },
+  { 140, 1278, 2440, 2063, 1581, 2204, 2781, 2532, 1677, 2426, 2538, 2210, 1568,
+    2564, 2394, 1945 },
+  { 2943, 3776, 3833, 3310, 3900, 4118, 4161, 3571, 4059, 4143, 4145, 4273,
+    4034, 3940, 3720, 2418 },
+  { 3437, 3906, 4106, 4294, 5303, 5257, 4956, 4027, 5935, 5373, 4102, 4853,
+    5331, 5251, 3964, 4748 },
+  { 5493, 5799, 5966, 6535, 7015, 7397, 8011, 6526, 5832, 6257, 6247, 7097,
+    6499, 6272, 5963, 5593 }
+};
+static int16_t default_ncobmc_krnl_1_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 6049, 4906, 3617, 2960, 2187, 1950, 556, 497, 688, 355, 503, 1054, 1170,
+    1641, 2343, 4226 },
+  { 7459, 6408, 4326, 3635, 2042, 1565, 492, 572, 746, 338, 719, 797, 2540,
+    2283, 2301, 4089 },
+  { 8025, 6914, 5072, 4249, 2793, 1910, 430, 1137, -150, 451, 1061, 872, 1515,
+    2805, 3823, 4550 },
+  { 9615, 6936, 5226, 3388, 2611, 2061, 801, 1003, -537, 72, 736, 1347, 2215,
+    3509, 4262, 5097 },
+  { 9677, 6521, 5633, 5223, 2996, 2449, 1300, 1136, 160, 918, 488, 801, 2306,
+    3781, 4818, 6441 },
+  { 9988, 7509, 6019, 4950, 3376, 2777, 1427, 1395, -118, 310, 393, 1626, 3387,
+    3649, 4737, 7431 },
+  { 10542, 7745, 5192, 4494, 1637, 1960, 1212, 1056, -309, 383, 1166, 2107,
+    4048, 4030, 7206, 7851 },
+  { 9350, 7480, 4343, 3589, 1748, 1687, 1057, 898, 592, 776, 680, 1960, 3804,
+    4598, 5688, 7834 },
+  { 8769, 7236, 5518, 4182, 2776, 2412, 915, 1370, 789, 561, 520, 1146, 3139,
+    4730, 5542, 7514 },
+  { 9580, 7116, 5910, 4623, 3085, 2450, 1703, 745, 419, 600, 1077, 1208, 3256,
+    4261, 5611, 6709 },
+  { 9725, 7053, 5594, 4217, 2573, 1834, 562, 512, 496, 356, 883, 1360, 3323,
+    4866, 5632, 7594 },
+  { 10110, 7367, 5622, 3858, 3720, 2398, 1075, 1687, 616, 461, 1082, 1786, 2570,
+    4242, 5731, 8319 },
+  { 9416, 7582, 6054, 3915, 3283, 2035, 1335, 1138, 317, 92, 253, 483, 1715,
+    3597, 5613, 8103 },
+  { 8693, 6881, 4626, 3505, 2663, 1949, 751, 792, -343, 55, 303, 460, 2027,
+    3584, 6230, 8704 },
+  { 7368, 6609, 5087, 3861, 2790, 1746, 1487, 518, 497, -165, 439, 904, 2514,
+    3735, 6082, 6914 },
+  { 7004, 5321, 3472, 2621, 1221, 999, 1172, 377, 850, 864, 866, 647, 2574,
+    3977, 6416, 7777 }
+};
+static int16_t default_ncobmc_krnl_1_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 2085, 2421, 3201, 4245, 4593, 5179, 5800, 5172, 4904, 5558, 5357, 5889,
+    5769, 6415, 6377, 4080 },
+  { 2031, 2607, 4062, 5018, 6279, 5766, 6373, 6562, 8085, 7434, 7557, 7449,
+    6834, 7509, 8119, 6106 },
+  { 3960, 4394, 5800, 6108, 7339, 7531, 8876, 7849, 9371, 8973, 8753, 8896,
+    9525, 8636, 7540, 7092 },
+  { 3599, 4610, 5527, 7597, 7898, 9121, 10115, 10783, 12123, 11248, 10928,
+    10406, 9827, 9129, 8401, 7814 },
+  { 3953, 6203, 7382, 8619, 10852, 10722, 12369, 12580, 12777, 12605, 12198,
+    11899, 10047, 9350, 9018, 7521 },
+  { 4615, 7038, 8644, 9190, 11073, 11216, 12685, 13003, 14345, 13679, 13689,
+    12344, 10902, 11188, 10148, 7399 },
+  { 5141, 7775, 10402, 11309, 13751, 13759, 14094, 13720, 15371, 14418, 14061,
+    12988, 11166, 11692, 9019, 7665 },
+  { 6591, 8644, 11320, 11985, 14476, 14526, 14816, 14745, 15159, 14966, 15071,
+    14071, 12238, 12154, 10931, 8266 },
+  { 7897, 9483, 10910, 12615, 14865, 14701, 16336, 15966, 16036, 16200, 16266,
+    15506, 13546, 12270, 11580, 9377 },
+  { 6808, 9239, 10394, 11719, 13438, 14348, 14923, 15789, 15519, 15341, 15316,
+    15166, 12927, 11656, 10736, 9138 },
+  { 5796, 8696, 10198, 12417, 12722, 13926, 15077, 15506, 15468, 15155, 15184,
+    13906, 12262, 10270, 9924, 7815 },
+  { 5386, 6960, 8500, 10429, 11262, 12474, 13263, 12505, 13713, 13502, 13632,
+    12702, 12233, 9964, 9329, 6889 },
+  { 5768, 7049, 7630, 9626, 10868, 11697, 12128, 12718, 14351, 13953, 13402,
+    13389, 13063, 10072, 8470, 6445 },
+  { 3665, 4962, 7272, 8760, 9507, 10431, 11095, 11676, 12400, 12216, 11874,
+    11099, 10214, 8725, 6279, 4598 },
+  { 3293, 4948, 6288, 7711, 8156, 9140, 9976, 11683, 9946, 11003, 11496, 10325,
+    8287, 6988, 6251, 4796 },
+  { 2010, 3599, 5789, 6548, 7490, 7725, 7264, 9488, 9893, 9573, 9487, 8725,
+    7145, 6110, 3858, 2891 }
+};
+static int16_t default_ncobmc_krnl_2_0_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 3437, 3490, 4578, 2066,  1672, 1354, 1502, 2345, 2273, -600, 52,
+    272,  484,  2214, -1553, -197, 165,  278,  306,  384,  73,   415,
+    -213, 357,  497,  288,   714,  6,    -82,  -118, 170,  181 },
+  { 2505,  3488, 306,   3011,  2631, 181,  636,  2608, 1663, -964, 594,
+    -1455, 1057, -1198, -1647, 187,  404,  412,  177,  -32,  269,  -24,
+    148,   233,  -290,  -359,  -178, -164, -362, -19,  -408, 106 },
+  { 2588, 3528, 3391, 3134, 1812, 2387, -34, -298, -13,  -955, 40,
+    -475, 1243, 283,  -247, -484, 200,  -46, 36,   -642, -386, -438,
+    34,   295,  93,   -528, -13,  412,  -8,  41,   -457, 28 },
+  { 796, 3353, 435,  3473,  458,  1851, 519,  1061, 259,  942,  416,
+    195, 390,  -151, -1141, -710, 716,  -401, 33,   -771, -759, -336,
+    88,  -124, -139, -372,  -223, -505, -164, -100, -512, -465 },
+  { 3233,  3990, 2698, -107,  -448, 297, 331, -13, -530, -383, -464,
+    -1530, 715,  -899, -1978, -879, 43,  93,  -77, -138, -425, -97,
+    -167,  -348, -460, -95,   280,  -45, 235, 172, -357, -200 },
+  { 868,   4162,  1417,  487,  -1446, -355, 392, -159, 202,  704,  -814,
+    -3095, -1052, -1482, -745, -1403, -199, -27, -38,  -387, -208, 20,
+    -64,   -130,  -265,  81,   -20,   238,  49,  121,  -137, 495 },
+  { 2774, 3478, 2072, 1229, 819,  1359, 106,  -222, -1445, -1559, 924,
+    -98,  44,   -347, 455,  -862, -318, -288, -31,  281,   -144,  -107,
+    148,  103,  -171, -239, -134, 25,   125,  108,  -142,  -129 },
+  { 610,  990,  -703,  1003,  437,  -275, -179, -233, -2041, -445, -1145,
+    -488, 335,  -2684, -1339, -294, -176, -195, -36,  -65,   -276, 10,
+    -111, -277, -134,  -222,  -51,  31,   -369, -279, -105,  69 },
+  { 420,  2773, 375,   -372, 489,  989,  -900, 1075, 182,  119,  -529,
+    -470, -504, -2225, 225,  101,  -264, -417, -253, -459, -317, -205,
+    -528, -7,   -43,   -268, -116, -857, -608, -208, -216, 220 },
+  { 2969, 1927, -314,  -476, 402,   -637, -838, 835,  1229, 1200, 135,
+    -299, -324, -2136, 340,  -1563, -309, -98,  -408, -137, -154, 668,
+    101,  -90,  245,   112,  -51,   -37,  -525, -254, -244, -126 },
+  { 1404, -258, 2333,  2019,  309,   -29,  -2468, 18,   -494, 70,  -260,
+    245,  515,  -1984, -1759, -1003, -504, 104,   472,  197,  -38, 265,
+    378,  6,    50,    -183,  -204,  -17,  -383,  -318, -396, 142 },
+  { 807,  637,  712,   1237,  -971, -176, -1160, -210, -2072, -782, -959,
+    -372, -590, -1159, -1017, -889, -750, -399,  -98,  -15,   2,    -172,
+    -48,  -488, -628,  -12,   -25,  136,  229,   -200, -212,  -472 },
+  { -1464, 333,  -1978, -1394, -281, -1820, -124, 385,  97,   -297, -1497,
+    -3,    -916, -660,  -949,  -504, 117,   11,   86,   88,   2,    219,
+    333,   -120, -224,  71,    237,  -507,  13,   -381, -207, -113 },
+  { 1100, -717,  -1827, -1908, -1030, -1562, 404,  794,  4,    -682, -1306,
+    -612, -1197, 8,     -131,  525,   159,   -345, -91,  9,    -222, -482,
+    -69,  482,   593,   -32,   -239,  -408,  -522, -692, -126, 712 },
+  { -798, -735, -174, -1695, 819,   -737, -15, -426, -750, 876, 34,
+    -622, 448,  -71,  -950,  -2094, 74,   170, 18,   57,   156, 443,
+    -85,  -374, -416, -537,  -348,  -126, 62,  -381, 399,  -53 },
+  { -552, -1352, 536,  -1,    -322, -1094, -428, 309,  -142, -752, 354,
+    900,  473,   -137, -1263, -370, -731,  -864, -30,  -101, 354,  -321,
+    -523, 377,   9,    -415,  -87,  -145,  -154, -286, 100,  23 },
+  { 44,  607,  316,  -268, -246, -497, 267, 154, 160, 717,  324,
+    240, -130, -218, -107, -252, -64,  4,   113, -57, -162, 123,
+    -5,  143,  -312, -66,  -230, -33,  -57, 60,  153, 85 },
+  { 158,  14,  -307, -240, -85, -416, 304, -402, -461, -221, 193,
+    -123, 384, -142, 48,   -77, -378, 36,  -56,  20,   2,    -240,
+    -88,  -1,  -185, 87,   6,   94,   -22, 82,   191,  194 },
+  { 417,  259,  -85,  -170, -45,  -151, -402, 136, 28,   -40, 101,
+    224,  -337, 97,   98,   51,   -401, 95,   -77, -153, 357, -99,
+    -473, -142, -289, -80,  -349, -76,  -87,  97,  40,   198 },
+  { -236, 62,  -104, -8,  98,  68,  128, 116, 47,  54,  -121,
+    -150, -20, -120, 196, -80, 37,  290, 231, 247, 131, -113,
+    -126, -87, 65,   250, 260, 102, -68, 234, 76,  -87 },
+  { 245, 486, 38,   -10,  -135, 106, 217,  -187, -200, 96,   20,
+    117, -40, -97,  68,   -139, 276, 8,    -55,  -53,  -187, -20,
+    -41, 1,   -145, -246, -106, -45, -145, -353, 185,  -35 },
+  { 448,  -172, -496, -63, -84, -106, 151,  9,   -143, -180, -38,
+    -276, -223, 269,  100, 38,  -236, -66,  124, -59,  475,  -78,
+    -407, -20,  -119, -19, 162, -4,   -226, 101, 247,  78 },
+  { -348, -156, -324, -260, -173, 0,   -41,  63,  235,  -114, 109,
+    -362, -96,  279,  -277, 36,   394, 394,  240, 30,   -88,  209,
+    29,   176,  59,   -20,  -244, 50,  -104, 192, -157, 48 },
+  { -376, -176, 269, -426, -159, -108, -18,  -163, 93,  130, -222,
+    -40,  539,  176, 164,  -62,  -709, -354, 502,  664, 243, -414,
+    -51,  192,  33,  54,   -10,  -57,  -141, -3,   144, 71 },
+  { -137, -636, 627,  6,    -129, -159, -45, -150, -15,  402, 207,
+    20,   202,  1,    -203, 88,   183,  62,  -76,  120,  418, -196,
+    -104, -154, -433, -338, -73,  1,    -79, -14,  -200, 84 },
+  { 184, -334, 175,  114,  -274, -60, -429, 176,  36,   373, 468,
+    134, 110,  -11,  -201, -94,  352, 109,  115,  91,   187, -83,
+    21,  0,    -154, -180, 288,  0,   -61,  -197, -246, 42 },
+  { -143, 26,   190,  -110, -335, -385, -357, 27,   103,  -66, -96,
+    -189, -337, -150, 129,  -104, -176, -418, -216, -118, 28,  126,
+    -112, -130, 110,  17,   141,  111,  -82,  238,  22,   -50 },
+  { 104, -95, 48,   -239, -40, -148, -327, 244,  323,  -102, 244,
+    151, 113, -150, -74,  223, -81,  -328, -178, 140,  -233, -165,
+    182, 514, 216,  -129, -8,  141,  -81,  451,  -110, -71 },
+  { -116, 84,   -228, 177, 318, 62,   134, -3,   239,  14,  338,
+    278,  -439, -254, 3,   -82, -210, -62, -236, -124, 5,   -60,
+    112,  -18,  -115, -31, 5,   -65,  278, 4,    -19,  -130 },
+  { 236, -64,  -147, -519, 147,  -27, 71,  -567, -133, 24, -199,
+    229, -107, 126,  -141, -148, -35, -34, 68,   230,  8,  72,
+    40,  -148, 203,  97,   84,   107, 32,  17,   -58,  -18 },
+  { -43,  -408, -101, 120, 118, 168,  -170, -233, -323, -120, -339,
+    80,   -294, -151, 85,  52,  -420, 79,   -162, -233, -237, -47,
+    -131, -53,  -199, 14,  85,  -80,  93,   -150, -15,  318 },
+  { -106, 107,  -6,   189, 53,  -109, 22,  -474, -335, -102, -279,
+    -321, -66,  186,  -65, -13, 61,   167, 43,   -159, -57,  -13,
+    37,   -125, -137, 132, 161, -156, -27, -276, -89,  15 }
+};
+static int16_t default_ncobmc_krnl_2_0_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5401, 5987, 4279, 6550, 4858, 4986,  5733,  7172,  8194, 7631, 7549,
+    6971, 9288, 7485, 8583, 9244, 12058, 11530, 10461, 8453, 8304, 11724,
+    8999, 9457, 5018, 6922, 8375, 7860,  7915,  6921,  7703, 8963 },
+  { 2308, 2670,  5018,  5298, 3883, 6449,  4267,  4119, 9252, 10082, 7844,
+    7414, 9050,  9261,  8739, 7808, 10974, 10279, 8627, 8840, 9203,  9406,
+    9360, 10574, 10156, 7673, 6238, 8876,  6800,  6423, 6931, 8589 },
+  { 6608,  4325, 3372, 5227, 6182, 3670, 5595, 5758, 8575, 8025, 8251,
+    10711, 5449, 6965, 5443, 7178, 9099, 8842, 7132, 7830, 5795, 9882,
+    8939,  8323, 7507, 7248, 8750, 6786, 6940, 4942, 7125, 6399 },
+  { 3977, 3060, 4962, 7094, 7211, 6388, 6256, 3960, 7672, 7814, 7711,
+    7237, 7088, 7232, 5716, 6040, 9565, 6643, 8113, 7841, 9849, 10144,
+    8297, 7676, 6792, 8447, 7805, 5475, 5499, 4728, 5379, 7645 },
+  { 4598, 4391, 3660, 6284, 6694, 8302, 5610,  5341, 7466, 6298, 6406,
+    7734, 5743, 5155, 5257, 6958, 9035, 11566, 9636, 7825, 8147, 9427,
+    6612, 5526, 7635, 7259, 7696, 7853, 5505,  6744, 9265, 5394 },
+  { 5980, 2356, 2746, 5955, 4045, 4283, 5117, 3799, 5386, 5594, 7671,
+    6984, 6232, 6028, 3101, 3391, 5757, 9530, 7408, 6206, 5512, 7867,
+    5144, 8011, 6690, 6994, 4877, 5063, 6175, 5205, 1965, 859 },
+  { 2619, 4096, 4225, 4712, 5637, 6418, 6649, 3904, 5463, 5102, 4785,
+    4100, 5127, 3858, 3419, 5301, 6002, 7649, 8260, 6241, 4168, 4551,
+    6153, 5016, 7113, 7845, 5201, 5455, 5069, 2335, 3311, 5194 },
+  { 1278, 4942, 4441, 3456, 3791, 5620, 5275, 2243, 5080, 4619, 5834,
+    4859, 4320, 5092, 1481, 846,  4969, 4835, 3646, 5940, 5736, 5862,
+    3628, 5918, 5865, 4945, 4385, 4699, 4342, 5415, 8383, 4711 },
+  { 3855, 1678, 2560, 4631, 2765, 1444, 1449, 1895, 4494, 5706, 4813,
+    4882, 3532, 2264, 3222, 5444, 4097, 5236, 5036, 3713, 6547, 4371,
+    5311, 2363, 5113, 6290, 3743, 5343, 5369, 2813, 2486, 1647 },
+  { -651, 1098, 2116, 3495, 2289, 1836, 4507, 4057, 5225, 4553, 2631,
+    2791, 2984, 3605, 3416, 3611, 4358, 4719, 3450, 4146, 3973, 3263,
+    3826, 5881, 6402, 4584, 4396, 3689, 2020, 1960, 2100, 4304 },
+  { -622, 1848, 379,  112,  -1474, 1013, 6023, 260,  1035, 1984, 3811,
+    2362, 1394, 2546, 3347, 2472,  1865, 755,  2251, 1139, 1933, 2252,
+    1163, 3003, 4091, 4792, 3801,  3517, 4247, 3798, 5216, 4543 },
+  { 1342, 2229, 1014, 1212, 260,  432,  1975, 99,   2798, 818,  2455,
+    3858, 2231, 3773, 136,  857,  2171, 815,  1966, 1825, 1711, 964,
+    2142, 2514, 5367, 3539, 3241, 3116, 3982, 3839, 3553, 3535 },
+  { 1800, 27,   321,  111,  1003, 528,  254,  979,  2444, 2413, 3807,
+    961,  1961, 1173, 2156, 3935, 259,  263,  1815, 1979, 1218, 2393,
+    3738, 1109, 4444, 3726, 3647, 3428, 2966, 4602, 4903, 5851 },
+  { 1340, 753,  317,  1318, 738,  1880,  -500, -691, 1108, 38,   412,
+    890,  494,  291,  -131, 759,  -111,  221,  -95,  2575, 3099, 3223,
+    3140, 3156, 3952, 1942, 2615, -2313, 2991, 6367, 5744, 4528 },
+  { 752,  490,  1255, 2396, 14,   3819, 1319,  1239, 3491, 2464, 3243,
+    3083, 392,  1273, 1712, -226, -931, -2130, 710,  864,  385,  265,
+    1431, 1796, 3063, 3531, 3879, 3986, 3503,  4045, 2539, 3489 },
+  { 1943, 170,  358,  1884, 2344, 1566, 92,   1721, 1381, 1115, 723,
+    1670, 2294, 1497, 1697, 973,  1286, 2306, 381,  2582, 2551, 3852,
+    2481, 3432, 2273, 3079, 2076, 3014, 3365, 3906, 2241, 2250 },
+  { 1741, -705, 595,  956, 2038, 793,  1518, 148,   -524, -881, -487,
+    711,  720,  773,  431, 2181, -435, -841, -1106, -552, 434,  -2007,
+    -41,  -234, -960, -23, 394,  -655, 792,  934,   1495, 1947 },
+  { 2086, 1360,  97,   1352, -95,  1800, -729, -916, -152, 956,  196,
+    1746, -1973, -690, 472,  1788, -28,  385,  781,  589,  -320, 1167,
+    -484, 66,    1136, 1038, 1741, 888,  3056, 2114, 3495, 1297 },
+  { 1900, 1373, 983,  3718, 1409,  2096, 932,  -604,  -1370, 1153, 109,
+    58,   104,  2851, 602,  -2071, 252,  -888, 1428,  2724,  1344, 1567,
+    563,  1902, 1370, 519,  -294,  393,  1153, -1032, 2129,  335 },
+  { 2652, 2620,  3178,  2344,  2466, 2241, 1145, -101, -635, 306, -1036,
+    638,  -2606, -1921, -1098, -328, -324, 2598, 1092, 1832, 493, 2507,
+    1152, 1461,  -796,  2126,  -742, 1182, 2078, 1549, 2665, 2366 },
+  { 1080, 798,  1934, 568,  1218, 3206, 155, 1844, 2313, 3509, 1090,
+    650,  1166, 2515, 1846, 1025, 259,  720, 1587, 3010, 4955, 6457,
+    2952, 2764, -396, 1937, 1563, 673,  828, 4062, 2711, 1548 },
+  { 871,  657,  2761, 1756, 2349, 198,   -1003, -1105, -1181, -69,  146,
+    3201, -27,  1493, 13,   291,  -2260, -468,  1178,  928,   2665, 3887,
+    3140, 1334, 1969, 2687, 544,  3842,  2885,  733,   3419,  1963 },
+  { 1491, 1698, 302,  2127, 1256, 907,  1607, 1833, 2061, -536, 988,
+    4380, 2723, -195, 962,  1769, 2466, 1735, 2707, -369, -713, 1599,
+    3031, 2924, 2023, 2045, 5259, 1733, 3517, 4274, 440,  412 },
+  { 2163, 1,    167,  1755, 5694, 3272, 739,  4235, 6123,  3811, 4611,
+    5800, 2424, 2409, 1458, 2152, 104,  115,  466,  -998,  -806, 2824,
+    4473, 2511, 4878, 3258, 5014, 3559, 1003, 2074, -2091, 1403 },
+  { 964,  1051, -1527, 1266, 3883, 2349, 1054, 1972,  1929, -249, 3796,
+    2861, 1542, 449,   539,  1942, -16,  58,   2080,  56,   1106, 4248,
+    580,  2540, 3095,  4536, 152,  354,  4067, -2246, 1505, 1981 },
+  { 1081, 1440, 324,  736,  2839, 2597, 3712, 2282, 3717, 2483,  1247,
+    4456, 3604, 3415, 2487, 3715, 2073, 2928, 2372, 828,  -2700, 2054,
+    4315, -125, 1777, 2211, 2992, 7336, 4216, 3571, 2657, 6780 },
+  { 1997, 2104, 1255, 1942, 1335, 1450, 3567, 1447, 3812, 6083, 5233,
+    4484, 3536, 3564, 3290, 4062, 2589, 2816, 3971, 4406, 3481, 2664,
+    1245, 1759, 3353, 1036, 2054, 1299, 2263, 4010, 4171, 3972 },
+  { 1519, 4826, -750, 988,  1338, 2999, 212,  3858, 5202, 5306,  5717,
+    3066, 2629, 6461, 6043, 6637, 8388, 7252, 4890, 4161, -1056, 4615,
+    2538, 5633, 3389, 6439, 2985, 7148, 5149, 4509, 8001, 8863 },
+  { 1047, 876,  2713, 3913, 2232, 1084, 1702, 2626, 1983,  3744, 2044,
+    3690, 2087, 4497, 2656, 5592, 6247, 4584, 4218, 6097,  6884, 6277,
+    2412, 5097, 7400, 2789, 6089, 6157, 7247, 9712, 11393, 5627 },
+  { 2876, 4288, 2443, 3081, 1569, 1823, 1050, 2325,  2558, 2591, 4223,
+    6300, 4237, 4354, 4411, 7502, 4175, 3350, 4208,  1100, 6473, 6664,
+    5460, 4207, 5297, 8047, 6850, 6496, 7866, 10375, 7455, 2868 },
+  { 3282, 5838, 6486, 6479, 3474, 4665, 3790, 2882,  5116, 4457, 4649,
+    4208, 4520, 7271, 4363, 7125, 8799, 6540, 10158, 5716, 6794, 5762,
+    6462, 8561, 2742, 7002, 9454, 8451, 8560, 7973,  7759, 6679 },
+  { 5957, 7221, 5126, 7057, 5824, 4274,  5374,  6023, 7549, 6239, 7666,
+    6368, 4014, 5338, 7150, 9793, 10608, 9838,  6748, 9691, 5465, 4631,
+    7964, 7692, 8173, 9362, 8989, 11677, 10282, 9960, 6666, 9276 }
+};
+static int16_t default_ncobmc_krnl_2_0_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 7499, 5941, 5384,  4566, 4006, 3634, 2288, 4112, 2127,  3001, 2639,
+    1927, 467,  -1639, 1484, 1143, 66,   -316, 626,  1721,  1208, 193,
+    1591, 3903, 8472,  3945, 1882, 4378, 6453, 8972, 11867, 10110 },
+  { 7919, 6226, 8601, 3825, 4644, 4380, 3957, 2964, 1316, 3586, 2268,
+    2802, 2193, 1427, 1479, 1353, -55,  373,  271,  979,  526,  1827,
+    2463, 1938, 3963, 4851, 5040, 4192, 3731, 4522, 8903, 6733 },
+  { 6373, 4994, 6414, 4822, 4923, 4881, 4383, 6117, 3342, 5068, 2353,
+    2370, 2231, 758,  1768, 1338, 742,  1498, 454,  1453, 1466, -213,
+    177,  1223, 512,  5366, 2462, 4667, 5671, 5039, 6065, 6874 },
+  { 9299, 8698, 12939, 6170, 7063, 3147, 3256, 3492, 2696, 4498, 3705,
+    3176, 2797, 1099,  2852, 1331, 527,  1272, -388, 1619, 110,  -406,
+    390,  3801, 4468,  3193, 2944, 7284, 7144, 4560, 6320, 8073 },
+  { 5937, 4572, 5212, 6678, 5291, 2561, 2752, 4892, 2713, 5203, 4202,
+    1527, -470, 2424, 2850, 1217, 401,  587,  191,  1122, 1314, 1854,
+    3860, 4579, 2455, 5427, 1614, 5037, 5073, 5074, 3101, 7734 },
+  { 7035, 5229, 7515, 6523, 7587, 5653, 5311, 4945, 4097, 4237, 2836,
+    2667, 1959, 4095, 1669, 1484, 57,   467,  1028, 642,  2843, 2782,
+    3604, -825, 1592, 4305, 2202, 4432, 4683, 3867, 3520, 9281 },
+  { 7248, 3787, 4243, 4710, 3288, 1975, 2766, 4057, 1506, 2644, 1436,
+    818,  1150, 2159, 787,  920,  98,   137,  1065, 306,  3880, 537,
+    3871, 1060, 3821, 3395, 2484, 3532, 4072, 3339, 2638, 3982 },
+  { 8810, 5802, 5538, 4090,  3659, 3742, 3818, 6827, 6474, 4756, 4093,
+    3735, 4063, 4586, -1945, 470,  328,  -163, 958,  511,  2541, 3057,
+    2972, 4349, 4754, 5115,  5847, 6843, 7299, 6652, 5891, 5655 },
+  { 9091, 5007, 6438, 4749, 5610, 3664, 6151, 5188, 3686, 2005, 2670,
+    -245, 1788, 3724, 2626, 679,  -52,  -839, -145, 356,  3488, 1970,
+    1988, 2126, 1099, 2578, 5401, 6965, 4908, 5526, 6748, 5968 },
+  { 6412, 7516, 8029, 8748, 6742, 7509, 6552, 4078, 4300, 5066, 4786,
+    3270, 4270, 3875, 2319, 4282, 1640, -843, -439, 427,  1587, 520,
+    -28,  2251, 3358, 3049, 4407, 7286, 8994, 7802, 5924, 6824 },
+  { 8467, 6838, 3934, 2952, 7200, 5407, 4593, 5882, 3353, 3865, 1870,
+    1535, 2130, 4121, 3527, 1799, -637, -937, 513,  247,  169,  607,
+    2947, 3530, 3717, 6082, 9703, 6867, 2729, 6292, 3084, 4879 },
+  { 9934, 8638, 7508, 6894, 7343, 5306, 6208, 6136, 5240, 7136, 3958,
+    1811, 3171, 1064, 2246, 882,  1681, 727,  1694, 769,  1700, 1370,
+    1901, 5812, 3852, 6468, 5875, 5416, 6007, 3348, 3600, 6661 },
+  { 10978, 9383, 9741, 10746, 5208, 8469, 4608, 5824, 4424, 3460, 3841,
+    4037,  3687, 1582, 3784,  988,  1974, 1292, 2272, 2128, 2210, 2888,
+    -967,  5864, 5568, 4693,  3796, 6361, 4816, 2697, 4559, 6437 },
+  { 8329, 9809, 8672, 9375, 7503, 5775, 3454, 4596, 5093, 5033, 4021,
+    2860, 2833, 2782, 3056, -617, 1644, 1759, 2434, 2570, 3312, 3807,
+    3518, 3521, 1126, 2830, 3378, 4432, 3261, 5211, 4073, 10050 },
+  { 9992, 8148, 7951, 7194, 5624, 5032, 3296, 2981, 5388, 3910, 2274,
+    1436, 1425, 1053, 2111, 2806, 1606, 1446, 1681, -211, 1877, 1541,
+    1700, 2736, 2088, 2551, 1045, 2977, 2632, 1719, 4896, 5378 },
+  { 9403, 8846, 8061, 7478, 5269, 6655, 6312, 4110, 3529, 5802, 3108,
+    3246, 1943, 909,  2436, 1678, 1513, 1243, 797,  213,  3888, 4015,
+    2775, 2082, 2395, 2792, 2136, 2475, 1657, 2156, 1878, 2587 },
+  { 9499, 9075, 5426, 6962, 8206, 8057, 3968, 5184, 2759, 2277, 2744,
+    3531, 2518, 367,  1075, 2118, 900,  901,  2964, 3641, 5282, 2186,
+    2416, 2312, 2366, 2149, 1024, 1912, 1119, 220,  401,  727 },
+  { 7615, 8271, 8148, 7699, 7063, 7658, 5473, 7497, 7302, 5841, 4165,
+    3092, 734,  2215, 3316, 2226, 1197, 1236, 2996, 5007, 2872, 3460,
+    2371, 1898, 1917, 1442, 853,  1412, 700,  620,  317,  1237 },
+  { 8331, 8530, 8633, 7185, 6863, 9076, 5328,  5045, 5378, 4004, 4089,
+    1469, 1341, -333, 2689, 1982, 115,  -1158, 383,  1548, 1118, 2864,
+    3154, 1803, 2079, 1676, 1450, 1165, 967,   795,  136,  1184 },
+  { 8763, 9102, 6716, 8961, 5448, 6366, 3438, 5722, 5374, 5651, 5422,
+    1728, 1751, 2444, 1024, 1118, 424,  2288, 3655, 2719, 2254, 1313,
+    3476, 1983, 1975, 1502, 1172, 2333, 937,  594,  122,  149 },
+  { 8146, 9931, 7629, 8882, 6328, 7491, 5646, 5494, 7238, 7355, 4478,
+    2019, 2646, 3486, 4193, 1121, 562,  1823, 2787, 1720, 2228, 3627,
+    4470, 3351, 2439, 2214, 1926, 2118, 1771, 767,  353,  1062 },
+  { 10816, 9814, 10917, 7424, 8207, 9717, 8537, 8728, 7356, 7376, 7246,
+    3223,  1981, 277,   1282, 951,  515,  222,  1392, 789,  4372, 2112,
+    4083,  2706, 3234,  2414, 2655, 1407, 702,  1369, 121,  676 },
+  { 11362, 10078, 7520, 7828, 10705, 7300, 7358,  6559, 8337, 7569, 5067,
+    3465,  2417,  1956, 2165, 759,   -106, -1282, 1822, 3225, 4767, 5619,
+    4119,  3383,  3877, 2702, 2410,  2459, 1441,  1392, 945,  216 },
+  { 10112, 8115, 3762, 5107, 7443, 7676, 7498, 7380, 6235, 7523, 6246,
+    3574,  2749, 3853, 303,  1558, 1896, 1107, 462,  2172, 2388, 4222,
+    2000,  1688, 3560, 2297, 1593, 3679, 3628, 1507, 1549, -188 },
+  { 7794, 6437, 6605, 5381, 6404, 4410, 6677, 4233, 4949, 3000, 2812,
+    3756, 1805, 2877, 2098, 1737, 1809, 1427, 378,  2031, 2115, 5006,
+    3159, 3602, 6343, 3503, 3356, 5971, 3138, 3932, 1028, 699 },
+  { 6757, 7738, 6538, 8248, 6959, 6557, 5264, 3092, 3765, 1895, 1865,
+    901,  2485, 2217, 1699, 1946, 3573, 1501, 2141, 2177, 180,  1003,
+    1816, 4793, 2112, 4560, 3820, 2881, 4376, 2091, 681,  623 },
+  { 9057, 8917, 7385, 7072, 6324, 5492, 5283, 5053, 5785, 4277, 3322,
+    1267, 1946, 1894, 3701, 472,  1658, 1154, 777,  2193, 2349, 3611,
+    3129, 3719, 1781, 5389, 3418, 2463, 3734, 3644, 3365, 2247 },
+  { 9444, 9439, 8598, 9152, 6982,  8451, 8279, 6129, 5172, 3730, 2416,
+    2483, 2775, 1913, 1041, -1110, -392, 1068, 556,  598,  4171, 2377,
+    1870, 1906, 5449, 5413, 2589,  3564, 6473, 6692, 3140, 2665 },
+  { 10567, 10001, 8225, 8289, 6898, 6856, 3920, 4547, 4297, 1456, 2348,
+    1526,  2343,  2863, 1429, 312,  57,   930,  1619, 1189, 596,  1815,
+    2589,  3141,  1662, 3349, 1311, 4091, 4596, 7321, 5911, 6965 },
+  { 9593, 9214, 9132, 8273, 8030, 8135, 5179,  5564,  4052, 4155, 4052,
+    2249, 2178, 1680, 439,  822,  -378, -1210, -1149, 3709, 2830, 747,
+    2987, 5873, 795,  5124, 4233, 3887, 5573,  5312,  7258, 11014 },
+  { 8373, 8033, 8934, 7880, 7434, 6144, 7528, 5163, 2591,  4301, 2489,
+    4137, 1295, 760,  703,  805,  -308, -320, 2205, -1113, 362,  581,
+    2567, 689,  5949, 2652, 1996, 2138, 7469, 4835, 8058,  11132 },
+  { 8586, 6026, 7656, 7201, 8141, 7249, 5995, 4896, 3152,  4255, 1711,
+    3498, 3933, 1852, 1444, 715,  -104, -695, 4021, 3937,  6478, 1755,
+    935,  384,  1002, 2595, 3359, 4532, 7103, 5192, 12241, 14373 }
+};
+static int16_t default_ncobmc_krnl_2_0_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { -18,  921,  2116, 3151, 5822, 6391, 6844, 2748, 3794,  6358, 6115,
+    7194, 6145, 8324, 7847, 6181, 4052, 4867, 4967, 5823,  6786, 4035,
+    5989, 2636, 2376, 5222, 5409, 4121, 2105, 626,  -3363, -2857 },
+  { 3594, 3991, 2433, 4231, 5187, 5335, 7496, 6672, 4132, 3625, 5649,
+    7621, 4052, 6868, 7772, 7010, 5041, 5311, 7273, 6593, 6376, 5150,
+    4421, 3618, 2523, 4188, 5275, 3469, 6209, 5459, 953,  947 },
+  { 786,  3510, 3161, 3162, 3435, 5439, 6415, 4784, 4467, 4232, 5708,
+    3775, 7437, 8362, 9398, 8331, 6300, 6049, 8740, 7748, 9508, 7139,
+    7232, 6528, 8257, 4296, 5180, 4497, 3755, 6329, 3620, 3050 },
+  { 2273, 1239, -1997, -385, 1641, 4987, 6332, 7869, 5742, 3115, 4523,
+    5739, 6076, 8184,  8936, 9733, 5577, 8872, 8635, 7679, 7192, 6961,
+    7586, 5022, 5256,  5107, 5842, 4127, 3898, 7191, 5184, 1097 },
+  { 2576, 3444,  4787, 3494,  4843, 5213, 7669, 6154, 6713, 5224, 6221,
+    8653, 10387, 9676, 10219, 9062, 6899, 4115, 6617, 7548, 7319, 5169,
+    6051, 6609,  6735, 3759,  6779, 3520, 5518, 4355, 4386, 3459 },
+  { 2457, 4623, 4686, 3390,  6167,  6776,  5546, 7755, 6678,  5831, 6667,
+    9797, 9222, 7728, 12319, 12899, 10764, 6383, 7947, 9907,  8225, 5677,
+    7690, 9312, 8324, 4971,  9288,  6616,  5448, 7180, 11014, 5709 },
+  { 3687,  5015,  5834,  5702,  6619,  6602,  6844, 8607,  10828, 10170, 9206,
+    11527, 10057, 10677, 11683, 11009, 10585, 8869, 7057,  9542,  8465,  11391,
+    6180,  10182, 5594,  5353,  8810,  7358,  7118, 10591, 10569, 7318 },
+  { 5659, 4619, 7090, 7819,  8483,  7258,  7446,  7530,  6847, 7424, 7586,
+    8261, 7644, 9373, 18173, 15351, 11259, 11899, 11787, 9977, 8370, 7422,
+    9853, 6375, 5873, 6503,  6194,  4792,  5082,  4563,  2192, 5942 },
+  { 3004,  6927,  6994,  7359,  7505,  10247, 9661,  8199,  7979,  8529, 9388,
+    12192, 11555, 12591, 10308, 10143, 12579, 12379, 11700, 12735, 6629, 10209,
+    9592,  11878, 10187, 7755,  7344,  4922,  6699,  8240,  7341,  8532 },
+  { 7590,  5795, 6512,  4587,  6933,  7660,  6141,  7410,  5605,  5542,  8790,
+    10597, 9438, 10999, 10270, 10028, 10678, 12591, 13767, 11933, 10966, 11898,
+    12452, 8305, 6352,  8621,  7598,  5409,  5869,  6860,  8606,  5371 },
+  { 7095,  7927,  9729,  11290, 10321, 9966,  8226,  10211, 12468, 10459, 10959,
+    12232, 12326, 11686, 11247, 13106, 15660, 16448, 13119, 14772, 14295, 13233,
+    11880, 9805,  8498,  5650,  3043,  5995,  9756,  6592,  8450,  6801 },
+  { 4251,  4844,  7130,  7033,  9742,  10794, 9341,  10350, 10410, 9188,  10907,
+    11059, 11547, 12685, 14995, 15511, 13256, 15229, 12788, 13792, 12937, 14179,
+    12355, 8519,  7767,  6376,  7293,  7706,  6134,  9392,  9423,  6656 },
+  { 5032,  6597,  8267,  6875,  10431, 9182,  11606, 9174,  9394,  10754, 10214,
+    11384, 11633, 14256, 11377, 11933, 13999, 14801, 12182, 12170, 12927, 10856,
+    13248, 9493,  6586,  7871,  8697,  7094,  8561,  9451,  7116,  4183 },
+  { 5550,  6479,  9188,  7562,  9126,  10236, 12984, 11667, 10146, 11981, 13257,
+    13227, 14228, 13278, 13571, 15730, 14696, 14740, 14122, 11230, 10186, 9795,
+    9766,  9187,  10707, 11612, 10594, 14651, 10618, 5465,  6640,  1085 },
+  { 6402,  8472,  7318,  8449,  9884,  8237,  11776, 12579, 8248,  9119,  10813,
+    12464, 14087, 14122, 13487, 15884, 15630, 16883, 13968, 15663, 13943, 14099,
+    13309, 12222, 11647, 10827, 11813, 9543,  10171, 10991, 8523,  7564 },
+  { 5558,  8716,  7398,  7003,  9081,  9234,  10389, 10222, 11602, 10189, 12165,
+    10551, 11676, 14110, 13499, 14107, 14297, 13673, 15239, 13669, 9564,  8809,
+    11609, 10482, 11688, 10885, 12257, 11025, 11490, 10586, 12134, 11499 },
+  { 5054,  7370,  10001, 8690,  6346,  7990,  10600, 10877, 13977, 14230, 13786,
+    11880, 13256, 15455, 14951, 12311, 15970, 16289, 14385, 13318, 10806, 16058,
+    14004, 14150, 15275, 14285, 15169, 15124, 14484, 15130, 14320, 13627 },
+  { 6472,  6714,  8422,  7520,  9468,  7309,  11310, 10173, 9680,  9775,  11809,
+    11641, 17217, 14973, 12511, 12431, 15565, 14706, 12653, 10736, 13799, 11984,
+    14576, 14406, 13494, 13775, 13748, 13952, 12627, 13551, 12343, 13637 },
+  { 5691,  6196,  6840,  5618,  8130,  5337,  10502, 11764, 12309, 11243, 12058,
+    14603, 15254, 13730, 12988, 16426, 16398, 18336, 14653, 12258, 13528, 12015,
+    13122, 12816, 13238, 14265, 15564, 14875, 14346, 16501, 14057, 14664 },
+  { 5142,  4576,  6578,  5068,  8343,  7665,  11649, 10611, 11541, 10331, 12078,
+    14129, 17221, 15930, 16224, 15649, 16231, 11200, 11389, 11572, 13476, 12629,
+    11861, 13013, 15114, 12486, 15663, 12735, 13401, 13979, 13507, 13952 },
+  { 6851,  5162,  6778,  6922,  8951,  5567,  10360, 9216,  7036,  5410, 10771,
+    13577, 12588, 10477, 10248, 14359, 15261, 13795, 12048, 11716, 9361, 6278,
+    8997,  10237, 14438, 12459, 12976, 13600, 13892, 11879, 13127, 13802 },
+  { 4195,  6070,  3151,  7247,  5889,  6549,  8672,  8715,  10338, 9229, 9026,
+    10246, 14651, 14345, 15001, 15116, 18364, 16684, 13657, 14718, 8840, 10437,
+    9581,  12367, 11264, 11291, 13002, 11111, 13027, 14172, 12590, 13651 },
+  { 3818, 4756,  8879,  6693,  4570,  8158,  7459,  7913,  5727,  9446,  10204,
+    8887, 11326, 14337, 13524, 13813, 13628, 15506, 11578, 13470, 12391, 8927,
+    9166, 9882,  10411, 11665, 8963,  12141, 11521, 10521, 15132, 15679 },
+  { 4425, 8428,  12163, 9947,  3396,  5526,  8133,  4898,  3913,  4891,  5711,
+    7034, 10657, 9932,  14435, 12716, 15058, 15501, 14937, 14530, 14536, 9746,
+    9923, 11968, 7869,  10734, 9735,  9164,  11842, 12786, 16768, 15073 },
+  { 7712,  9515,  10650, 9707,  6201,  9752,  8700,  10334, 9503,  13202, 9555,
+    9748,  12814, 13027, 13920, 12593, 14370, 14808, 13965, 14154, 12735, 7319,
+    12721, 10395, 7361,  8678,  12937, 10057, 9234,  14695, 14044, 13613 },
+  { 8309,  7528,  9323,  7254,  6829,  7276,  7831,  10824, 8851,  11605, 12763,
+    10865, 10153, 10736, 12379, 10799, 10370, 11817, 11734, 13290, 18692, 13378,
+    10209, 11690, 12616, 9779,  9257,  6142,  7818,  10903, 13276, 8893 },
+  { 5420,  5315,  7529,  7453, 9027,  9825,  7865,  9813,  6673, 6090,  7914,
+    10790, 11205, 11064, 9239, 11947, 12306, 12802, 11856, 9896, 10502, 9968,
+    12099, 11011, 11103, 9920, 10747, 12477, 10458, 8485,  8805, 10199 },
+  { 5275,  2169,  8448, 6454, 8077,  5060, 8189, 6133,  5673,  7424,  7993,
+    10659, 10836, 8138, 9347, 10570, 8447, 8359, 11071, 11453, 13480, 9521,
+    11755, 8294,  7308, 4637, 10781, 5515, 4843, 4737,  5330,  4893 },
+  { 4846,  5401,  5671, 3987,  6910,  8363,  10605, 9189,  9832, 11154, 11632,
+    10874, 12377, 9266, 12273, 10543, 10287, 10912, 10745, 9206, 8851,  8327,
+    11242, 8123,  7431, 10266, 8947,  6186,  4259,  -682,  -920, 3901 },
+  { 3634, 2920,  4925,  5515,  6626, 6450,  10063, 9047,  9880,  9577, 8277,
+    7582, 10044, 10186, 11630, 8182, 12589, 14249, 13236, 11328, 7042, 8880,
+    7868, 6442,  10067, 3096,  5190, 5874,  2890,  668,   1718,  2480 },
+  { 4732, 2901,  1056, 1878,  5356, 5406, 5212,  8538, 8974,  7742, 9588,
+    7933, 10867, 8487, 11203, 8392, 8301, 10070, 4166, 11993, 9436, 10071,
+    7464, 7158,  7848, 6669,  4825, 5838, 236,   3720, 562,   -1751 },
+  { 1899, 3004, 3605, 1918, 2347, 4957, 5010, 5918, 6020,  5972, 7291,
+    6820, 8455, 8985, 7833, 5877, 5796, 7048, 5548, 2886,  4467, 10008,
+    7443, 8399, 7314, 4277, 3852, 296,  -983, 1487, -2474, -7290 }
+};
+static int16_t default_ncobmc_krnl_2_1_0[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 4140, 3361, 5678, 1978,  3443, 3453, 2905, 2131, 4836, 2612, 1530,
+    -831, -257, 584,  -1193, -391, 107,  -47,  32,   125,  282,  684,
+    161,  23,   -22,  -95,   555,  -405, 569,  -268, -92,  105 },
+  { 4680, 4183, 4456, 4730, 4264, 4681, 2310, 2034, 3081, 2493, 2012,
+    1397, 1521, -881, -976, -668, -606, -768, -273, 256,  -4,   -290,
+    64,   -55,  -444, -989, -316, -496, 206,  -169, -158, -87 },
+  { 3199, 3846,  3775, 632,  2359, 3492, 3355, 53,   -1201, 145,  263,
+    -93,  -1435, 415,  -844, 954,  -241, -483, -165, -191,  -561, -185,
+    -300, -258,  -154, -654, 308,  -64,  -36,  -150, 95,    146 },
+  { 680,  2863, 889,  1721, 3444, 2472,  -27,  2458, 816,  -186, 123,
+    3214, 2029, 2485, -631, 323,  1030,  -275, 196,  -532, -537, 153,
+    274,  61,   -453, -283, -533, -1062, -145, -388, 158,  0 },
+  { 1962, 4004, 1406, -535, 1315, 2669, 2522, 654, 3394, 4205, 2731,
+    -40,  -118, 599,  -511, 618,  162,  840,  43,  253,  -59,  222,
+    64,   -21,  -671, -179, 241,  283,  902,  226, 305,  -204 },
+  { 516,  1205, 3201, -5,   1479, 945,  2129, -628, 3181, 900, 1439,
+    1128, 799,  -158, -431, 347,  -118, 527,  389,  268,  -73, 2,
+    534,  133,  -287, -19,  561,  329,  394,  -120, 38,   -461 },
+  { 2130, 2022, 1966, 210, 447,  402,  1249, 1677, 2353, 1113, 1723,
+    1300, 2060, -144, 420, 2008, -417, -74,  -197, 135,  217,  310,
+    152,  339,  -99,  -81, 279,  44,   54,   -160, -82,  4 },
+  { 2134, -1849, -990, -93,  1932, 2119, 2954, -371, -1021, -831, 1662,
+    1330, 1634,  246,  -777, 852,  130,  -67,  191,  -316,  -429, -240,
+    -147, -198,  92,   -15,  310,  141,  -10,  146,  35,    85 },
+  { 2763, 4779, 994, 1054, 2625, 2031, 1784, -161, 1142, 1052, 2300,
+    2462, 1943, 516, 816,  27,   18,   171,  158,  -311, -636, 20,
+    -463, -235, 145, 339,  240,  -354, -110, 41,   404,  353 },
+  { 3625, 3557, 2333, 950,  2020, 2445, 2562, 1506, 2571, 1559, 4781,
+    2030, 1325, 2507, 2045, 1896, -526, -22,  -272, -143, -189, 17,
+    10,   405,  143,  414,  -95,  -229, -215, 0,    -347, 83 },
+  { 2808, 1062, 1502, 411, 1139, 998, 1577, 1233, 1637, 998,  1846,
+    2487, 3868, 2225, 533, -51,  -6,  -180, -30,  186,  -175, 247,
+    352,  57,   83,   290, 330,  160, 165,  354,  -465, 131 },
+  { 2809, 2966, 2929, 1435, 2875, 1948, 130,  1168, 252,  1276, 2838,
+    3507, 3001, 1410, 312,  1941, -336, -431, -190, -194, -130, -336,
+    238,  75,   -472, -189, 123,  61,   -583, 147,  305,  200 },
+  { -23,  2306, 2169, 33,   1848, 1832, 2721, 49,  1435, 585, 1036,
+    2116, 1658, 1011, 815,  920,  101,  108,  262, 299,  283, 357,
+    268,  141,  -71,  -285, 205,  142,  -71,  224, 252,  156 },
+  { 1447, 2625, 4643, 2096, -847, -154, 2876, 1050, 104,  -873, -327,
+    146,  -596, 622,  -337, 1317, -61,  9,    -201, 110,  90,   644,
+    337,  204,  155,  278,  320,  -306, -504, 357,  -108, 132 },
+  { -16, 2815, 1344, -2044, 2236, -549, 586,  409, 30,  152,  1588,
+    243, -115, 291,  -30,   -170, -96,  -10,  433, 205, -134, 17,
+    528, -16,  -22,  -198,  -43,  -143, -224, 270, 153, 37 },
+  { 1478, 829,  628, 1055, 1323, -406, -282, -12,  418,  40,  -795,
+    -286, -627, -41, -448, 454,  -267, -258, -129, -57,  -44, -406,
+    -260, -67,  134, -196, -236, -125, 35,   -62,  -137, -5 },
+  { 220,  26,  -380, -257, -90,  -453, -196, -56,  -193, 37,   131,
+    151,  -88, -695, 66,   -113, -200, -144, 132,  -48,  -244, -207,
+    -178, 268, -107, -1,   69,   337,  -84,  -197, 87,   119 },
+  { 7,    3,   -85,  -185, 334,  -86, -69, 152, -320, -239, 587,
+    415,  246, 290,  -146, -134, -9,  -69, -66, -148, -41,  -206,
+    -148, 283, -144, -287, -73,  93,  -23, 247, 398,  174 },
+  { 46,  -256, -114, -61,  -532, 103,  32,   -223, 24,   -20,  132,
+    339, 61,   -381, -711, -160, -200, -334, 78,   173,  -281, -139,
+    -27, 134,  -120, 96,   110,  -251, -114, -32,  -299, -183 },
+  { -193, 28,  -134, 200,  155,  -316, -363, 285,  268, 665, 233,
+    -127, 436, -20,  -536, -163, 51,   -40,  162,  78,  -27, 192,
+    -34,  -40, -17,  -205, 203,  106,  -62,  -211, -84, 60 },
+  { -440, 312, -195, 221,  251, -388, -116, -252, -101, 92,  -244,
+    -694, -27, 198,  -3,   255, -257, -17,  0,    143,  -20, 48,
+    -68,  110, -130, -340, 136, -45,  -138, 251,  -111, -2 },
+  { 325,  219,  -68,  215,  -177, -206, 14,   108,  -291, 211, 92,
+    -62,  -166, -218, -158, -220, -279, 199,  113,  -263, 271, 153,
+    -433, -16,  19,   -322, -28,  258,  -295, -300, -285, -123 },
+  { -345, 543,  356, -541, -726, -205, -332, -397, -10, -132, 232,
+    132,  308,  324, 229,  79,   -151, 161,  143,  -40, -144, -464,
+    32,   -364, -11, -99,  -285, 61,   -258, 182,  -28, 107 },
+  { -55, 70,   -78,  -269, -709, -52,  351,  94,   80,  268, 249,
+    -56, 189,  -191, -60,  -88,  15,   -205, 111,  -62, 21,  85,
+    77,  -107, -35,  -13,  -107, -472, -546, -197, 5,   115 },
+  { -363, -297, 246,  -84, -419, -230, 283,  -128, 34,   -27, 112,
+    125,  166,  163,  176, -422, 14,   -238, -80,  -153, 313, -366,
+    -208, -54,  -260, 48,  -176, 21,   -91,  -295, -270, 40 },
+  { 85,   242,  107,  -41,  -283, -390, -105, 360, 181,  -720, -582,
+    27,   -96,  -350, -217, -189, -135, -12,  280, 86,   3,    25,
+    -126, -213, -384, 41,   -15,  101,  -68,  143, -211, 86 },
+  { -183, 13,  274,  -46, -86,  -633, 181,  -232, -90, -106, -22,
+    332,  -12, -16,  -30, 87,   5,    46,   37,   -99, 27,   292,
+    -74,  -94, -237, -16, -145, 76,   -106, 227,  -52, 168 },
+  { 40,  -258, -140, -6,   203,  146,  -64, -88, -183, 221,  62,
+    67,  114,  -216, -307, -560, -197, -46, 149, -126, -120, -316,
+    -36, -227, -200, 115,  -41,  -51,  97,  123, -47,  103 },
+  { -51, 44,  -99,  -230, -156, -46, -145, -412, -56,  48, -239,
+    222, 83,  -339, -196, -64,  175, 149,  -140, -316, 6,  -62,
+    -27, -56, -21,  -269, 229,  -7,  122,  -18,  -129, 86 },
+  { -372, 106, 18,  172,  364,  19,  -245, -73,  -124, 164, -9,
+    14,   214, -67, -217, -175, -45, 119,  -194, 36,   18,  -83,
+    126,  196, 112, -297, -102, 104, -74,  -152, 19,   199 },
+  { 314,  81,  -49,  -188, 48,  -82, -4,   107, -221, -4,  207,
+    -245, 197, -37,  -185, -50, -56, -214, 100, -231, -31, -2,
+    21,   -53, -215, -77,  168, -23, 82,   5,   155,  169 },
+  { 258, 188, -27,  -27,  165,  29,  -17,  100, -27, -80, -80,
+    196, 23,  -391, -533, -171, 84,  -137, 0,   14,  251, 99,
+    35,  88,  -28,  1,    144,  -96, -235, 176, 103, -85 }
+};
+static int16_t default_ncobmc_krnl_2_1_1[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 5724, 6155, 5101, 6937, 3616, 3940, 3066, 5662, 7104, 5021, 4979,
+    5907, 4968, 7085, 6582, 7719, 9143, 4128, 6447, 4879, 7061, 11362,
+    7837, 9965, 7152, 6477, 6581, 5803, 1819, 5309, 8559, 10776 },
+  { 1775, 3231, 4026, 2629, 4438, 6309, 5114, 2895, 5657, 6541, 6734,
+    5994, 7468, 4555, 9911, 5200, 5402, 1698, 4298, 6112, 6417, 6691,
+    4816, 6195, 4139, 5856, 3358, 1993, 1542, 661,  1660, 4762 },
+  { 1953, 726,  336,  2519, 4189, -753, 2993, 4957, 5850, 4298, 3651,
+    5353, 3255, 5491, 7815, 3406, 3928, 2987, 4148, 4276, 3530, 8058,
+    5079, 5821, 4622, 3354, 3146, 2460, 489,  1550, 1587, 1399 },
+  { -801, 328,  103,  886,  1381, 2280, 4320, 2452, 1215, 6261, 2206,
+    4849, 4488, 3829, 6128, 5213, 1739, 3173, 4425, 4567, 5845, 5197,
+    5910, 6147, 4260, 3730, 4240, 5420, 307,  672,  963,  3278 },
+  { -1721, -2596, -155, 3029, 3428, 2390, 2321, 3757, 1383, -1283, -1621,
+    1418,  2475,  4188, 5570, 3575, 799,  4017, 2856, 1426, 2012,  2722,
+    3669,  4104,  3800, 4116, 3275, 3739, 326,  95,   2421, 3075 },
+  { -551, -927, -520, 2944, 2518, -722, -215, 1875, 137,  2182, 2761,
+    159,  762,  3693, 1681, 2600, 880,  3273, 4470, 5007, 4272, 3074,
+    2474, 4254, 6828, 4219, 3671, 2407, 1044, 129,  -478, 2814 },
+  { -2686, -1229, 1372, 4761, 4668, 1462, 509,  2727, 930,  2438, 3542,
+    1456,  1961,  541,  1063, 1426, 3603, 2873, 2412, 2999, 2101, 3739,
+    2385,  5494,  5444, 5655, 5034, 381,  321,  90,   2585, 4160 },
+  { -4203, 479,  1122, 2688, 2124, 942,  -2136, -1643, -491, 2581, -2155,
+    -2375, 559,  582,  2202, 2081, 3774, 3330,  1101,  894,  3410, 3691,
+    2509,  5195, 6226, 5471, 5022, 2525, 778,   1212,  2736, 3350 },
+  { -2415, -2903, 4719, 5860, 4006, 2692, 4035, 4143, 2498, 4377, 2058,
+    488,   1429,  3199, -11,  2009, 2087, 2903, 155,  522,  4521, 2221,
+    2310,  3124,  2870, 1941, 3262, 2258, 1515, 2257, 1584, 1048 },
+  { -1469, -2652, -561,  2135, 389,  -522, -589, 447,  -847, 268,  -1641,
+    -1540, -1513, -1334, -599, -581, 2848, 2828, 1416, 2157, 2198, 925,
+    2421,  1437,  1963,  369,  2195, -548, 2051, 868,  824,  2683 },
+  { -2620, -3631, -4548, -885, 629, 523,  -528, -2178, -1743, 1644, 353,
+    -2687, -3041, -1722, 283,  178, 1594, 1190, 968,   -386,  2305, 1317,
+    245,   1443,  968,   800,  471, 521,  1564, 669,   903,   243 },
+  { -1791, -3282, -4140, -1753, -1006, -374, 1027,  -176,  -1477, -891, 191,
+    -912,  497,   96,    359,   1045,  1467, 172,   1303,  2510,  3516, 3671,
+    789,   -807,  2670,  1483,  547,   -521, -1219, -1856, 1008,  1053 },
+  { -1427, -2698, -3949, -436, 801,  -614, -1548, 523,  -176, -683, 423,
+    -871,  820,   -2279, -143, 375,  768,  2306,  5249, 1302, -338, -396,
+    -1590, -608,  1469,  2344, -187, -693, 599,   -661, -458, 160 },
+  { -3491, -3877, -2952, 1252, 767,   -3037, -3638, 188, 587,  710,  1416,
+    1176,  -319,  -473,  1873, -1997, 725,   596,   -94, 1875, 2992, -519,
+    -139,  1938,  1025,  521,  760,   1090,  3648,  392, 564,  902 },
+  { -2186, -3264, -1742, 2634, -36,  -51,  -1253, -314, -908, -459, -1701,
+    -1437, -991,  84,    1265, -964, 402,  1454,  -772, -927, 1765, 1543,
+    484,   2346,  3310,  1887, 1754, 3058, 1474,  728,  -466, -1646 },
+  { -1826, -332, 48,   744,  -618, -97, -165, -155, -908,  -143, 1285,
+    1739,  1185, 885,  1134, -531, -15, -526, 543,  1438,  2026, 3022,
+    558,   1827, -139, 1792, 2022, 769, 2400, 444,  -1572, 598 },
+  { 165,  -357, 15,  666, 1315, 1155, 376,  -7,  991,  213,  1687,
+    -34,  452,  352, 203, 1605, 1484, -498, 581, 533,  467,  1744,
+    1315, 874,  82,  900, 1437, -692, -417, 456, -271, -1132 },
+  { 646, 210,   320,  1208, 145,  971,   396, -448, 557, 1876, -1791,
+    913, -1288, -452, 1015, 925,  -1197, -49, -285, 442, 1093, -410,
+    125, 519,   -52,  513,  1497, -1337, 298, -402, 820, 732 },
+  { -796, 627, -1017, 2972, 4463, 2331, 1387, 1496, 1796, 1608, 1681,
+    -877, 881, -160,  -581, -433, 949,  471,  307,  140,  -946, -597,
+    247,  650, 1143,  694,  10,   -682, 890,  409,  617,  810 },
+  { 1653, 4435,  2388,  294,  2578, 1229, 1072, 1871, 465,  1650, 1524,
+    -430, -1195, -3427, -116, 1117, 217,  967,  -254, 259,  -55,  1425,
+    1583, -1261, -1773, 1232, 2886, 646,  1346, 1518, 2090, -837 },
+  { 2020, 728,   2038,  316, 5725, 4193, 890,  1490, 584,  2705, 694,
+    -892, 34,    2041,  972, 332,  -295, -218, -756, 2193, 1672, 1440,
+    2310, -2136, -2204, 399, -753, 743,  3155, 2521, 3534, 166 },
+  { 824,  1664, 991,  853,  700,  -80,   148, -908, -194, -620, 1053,
+    -368, 1616, 1250, 1449, 3140, -1065, 286, 2226, -590, -570, -1131,
+    477,  -61,  -708, 519,  586,  1148,  898, 1653, 4697, 1581 },
+  { 2014, 1921, -210, 556,  686,  -561, -1239, -1345, -664,  -138, -215,
+    -343, 1019, 1294, 519,  -179, 212,  -299,  -2160, -1450, -329, 293,
+    691,  162,  -645, 1079, 2005, 1466, 1127,  2263,  730,   179 },
+  { 5629, 4670, 597,  2030, 3873, 3698, 54,   2714, 62,   352,   2177,
+    908,  1306, 1504, 1464, -288, -106, -69,  -179, -900, -1340, -4,
+    877,  487,  2606, 358,  2055, 1131, 1421, 931,  -477, 1173 },
+  { 757,  -493, 1510, 2513, 4514, 4649, -478, 2069, 124, -1186, 2855,
+    1906, 1420, 1738, 19,   1916, 1195, -519, 32,   512, 230,   528,
+    43,   -263, 1314, 1350, 137,  -256, 939,  256,  168, -201 },
+  { 663, 947,  699,  3239, 4730, 5279, 1739, 1659, 2774,  -1660, -1677,
+    185, 3745, 1319, 2347, 477,  364,  531,  608,  -520,  -783,  -123,
+    -59, -345, 1202, 1766, 88,   883,  654,  1399, -1082, 658 },
+  { 4534, 5694, 5332, 4909, 4828, 4761, 7376, 3834, 2327, 4737, 7135,
+    5306, 6337, 5240, 5578, 4321, 2107, -205, 1387, 597,  1112, 904,
+    1567, 610,  461,  371,  250,  602,  358,  1807, -617, -59 },
+  { 6124, 8363, 9624, 5674, 7043, 4437, 3846, 3121, 3477, 2818, 5445,
+    3618, 5067, 3996, 5759, 7185, 2150, 785,  1581, 2084, 3321, 4828,
+    -545, 510,  2309, 2501, 1594, 2028, 528,  113,  248,  550 },
+  { 8154,  9890, 6292, 6421, 8295, 4403, 7503, 5496, 7256, 3699, 2845,
+    3725,  5365, 5905, 7170, 2903, 733,  4614, 3856, 4346, 7099, -902,
+    -1492, 1703, 2321, 1842, 3488, 1690, 982,  524,  -467, -687 },
+  { 5338, 10331, 7754, 7014, 3581, 5660, 5471, 5420, 3976, 2548, 6486,
+    9144, 6584,  5442, 6795, 4845, 5182, 2855, 8246, 3660, 5417, 1845,
+    1803, 288,   1434, 639,  1404, 2752, 923,  1055, 741,  -984 },
+  { 4457, 7110, 5195, 5959, 6818, 8562, 5548, 2071, 5544, 8734, 7080,
+    4737, 9481, 7672, 8374, 7638, 4204, 3562, 3758, 3598, 5016, 2863,
+    3927, 5001, 4677, 4444, 2481, 1773, 2525, 3142, 4840, 3965 },
+  { 1134, 3249, 4702, 5483, 4471, 7234, 7281, 6240, 5891, 7577, 3826,
+    5886, 4798, 7117, 6319, 7264, 4115, 5613, 4674, 4999, 4518, 2501,
+    6830, 4913, 2356, 789,  1926, 2190, 1914, 1434, 987,  1761 }
+};
+static int16_t default_ncobmc_krnl_2_1_2[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 6131, 7769, 6548, 6297, 4967, 4708, 3127, 5937, 697,  748,  1850,
+    2290, 2945, -80,  216,  377,  318,  1009, 2112, 2962, -886, 849,
+    510,  4160, 2257, 2875, 4589, 5345, 7363, 5350, 6815, 1644 },
+  { 6949, 8044, 7295, 7318, 3142, 2084, 1819, 3048, 1654, 1831, 1344,
+    3344, 2065, 2889, -88,  3746, 696,  1143, 232,  1444, 1587, 4125,
+    3991, 3840, 5642, 4933, 3560, 6540, 5865, 6663, 6729, 5520 },
+  { 7816, 4894, 7089, 7533, 4271, 6814, 1972, 3845,  3755, 3498, 3571,
+    1884, 3171, 1843, 70,   2358, 2622, 1241, 143,   2657, 3804, 2968,
+    1781, 262,  2864, 4345, 1302, 5434, 7815, 10560, 9211, 8202 },
+  { 10656, 7490, 8639, 7975, 4318, 7432, 6148,  3321, 3776, 2781, 3544,
+    246,   2350, 793,  1600, 1266, 2372, -1382, -983, 1926, 493,  447,
+    2275,  3510, 4789, 3766, 878,  2353, 3314,  6282, 5853, 3709 },
+  { 11083, 7270, 6211, 6170, 4927, 4198, 3939, 4605, 1734, 2009, 2950,
+    546,   722,  99,   550,  597,  2350, 41,   1314, 1148, -183, 1143,
+    5392,  3550, 3102, 1161, -556, 1700, 7598, 8412, 6019, 9654 },
+  { 10358, 7350, 6589, 5975, 3587, 6201, 4603, 3974, 2262, 886,  1815,
+    1899,  1642, 2894, 1557, 228,  1625, 1879, 838,  182,  919,  1168,
+    3272,  1155, 889,  2292, 128,  4478, 5205, 7668, 8767, 10921 },
+  { 8569, 4702, 5397, 5147, 2577, 4301, 2139, 1630, 721,  1721, -218,
+    1595, 275,  1133, 1051, -777, 1556, -245, 972,  106,  2205, 385,
+    1410, 366,  3348, 2139, -164, 3111, 2656, 5036, 6021, 4847 },
+  { 7654, 5535, 5975, 4580, 3005, 5483, 4637, 5560, 6252, 4946, 4508,
+    3600, 1824, 1528, 338,  131,  1290, 309,  344,  3110, 3607, 2484,
+    1062, 1267, 1426, -860, 1155, 6137, 2415, 5482, 6846, 4916 },
+  { 8060,  5296,  4396, 2040, 867,  1189, 3555, 3397, 3438, 664,  -1931,
+    -1938, -1414, 1317, 762,  -312, -655, -801, -243, 2795, 1663, 1314,
+    1478,  2856,  562,  1075, 3211, 7482, 2988, 3880, 4156, 3289 },
+  { 8146, 7596, 7056,  7622, 5755, 7181, 7862, 4736, 4932, 3146, 1043,
+    -422, -813, -2152, 1444, 441,  3599, 395,  2173, 755,  4245, 3047,
+    1545, 1062, 1159,  1621, 209,  6521, 7385, 7730, 6511, 8959 },
+  { 9567,  8044, 7535, 6969, 3284, 4284, 4734, 4758, 5177, 2342, 230,
+    -1852, -839, -769, 222,  255,  -315, -16,  1101, -28,  3561, 2004,
+    -260,  789,  1856, 1960, 4962, 4207, 2425, 8406, 6771, 7796 },
+  { 8019,  7612,  8357,  5521, 4711,  3374, 4391, 7093, 5013, 3608, 238,
+    -1564, -1662, -1373, -198, -1045, 100,  2694, 1251, 489,  2110, 1670,
+    188,   -1362, 953,   2340, 3361,  3595, 6405, 7676, 1634, 7730 },
+  { 10177, 6488, 5822, 5121, 2615,  2725, 3372, 4849, 2232, 2548, 2841,
+    874,   895,  307,  1293, -150,  411,  -981, -815, -24,  936,  -2339,
+    254,   3019, 5892, 4302, -2171, 6747, 7198, 5638, 4832, 9538 },
+  { 7260,  9945, 2818, 1106, 6179, 6331, 5106, 1814, 5997, 4045, 1456,
+    -230,  297,  1045, 1918, -126, 752,  1014, 999,  -506, 198,  -732,
+    -1900, 139,  749,  3999, 5614, 5241, 6339, 8316, 3673, 7681 },
+  { 11101, 6954, 7475,  5729, 4242, 6118, 4569, 2348, 5307, 3762, 2933,
+    -1610, 988,  -1178, -104, -151, -507, 491,  -906, 1236, 3075, 1525,
+    1631,  2901, 2758,  1303, 1578, 6405, 3807, 7189, 8468, 9262 },
+  { 6835, 4602, 5501, 5568, 4338, 6143, 4304, 3557, 3258, 3797, 1242,
+    968,  1683, -251, 1218, 301,  1257, 1924, 985,  1251, 3051, 433,
+    1756, 167,  -660, 3884, 3450, 7202, 6544, 5184, 7556, 9366 },
+  { 5991, 6762, 3854, 4856, 6714, 5701, 4072, 2489, 422,  -365, 1488,
+    1660, 725,  1157, -778, 654,  313,  -18,  3162, 3065, 2925, 2391,
+    827,  5547, 461,  2487, 1492, 5810, 7042, 5284, 3995, 6870 },
+  { 6435, 8283, 4732, 5896, 5599, 4229, 4798, 3309, 3128, 941,  2565,
+    394,  257,  2477, 721,  1494, 3161, 1409, 1306, 2534, 1261, 2719,
+    756,  4388, 570,  5416, 3719, 6067, 4092, 2565, 6299, 10504 },
+  { 6042, 7417, 5391, 4671, 3245, 7547,  3777,  3203, 2044, 583,  2083,
+    1971, 1721, 1948, -169, 1197, -1141, -480,  2155, 1033, 1313, 268,
+    1857, 4493, 3083, 2005, 5347, 4397,  10144, 4828, 6622, 9817 },
+  { 7202, 5045, 6601, 6937, 3704, 5796, 5061, 3575, 2383, 1389, 3111,
+    1751, 1603, 2813, 174,  706,  -569, 2620, 1735, 1418, 1871, -1542,
+    168,  2156, 5107, 6329, 4968, 7018, 6279, 6864, 5898, 9157 },
+  { 5722, 5683, 4189, 4814, 2883, 5508, 5100, 1625, 2169, 3680, 1884,
+    2109, 462,  1145, 334,  515,  191,  441,  1058, 917,  1528, -96,
+    1843, 5395, 4498, 5681, 4193, 5196, 8356, 5303, 7262, 10141 },
+  { 5879, 5779,  7257, 3873, 6911, 6238, 5672,  3583, 3261, 3048, 2536,
+    -310, -1046, -69,  -660, 417,  -719, -2058, 1740, 888,  2746, 1367,
+    1668, 1090,  1830, 1153, 5047, 7336, 3380,  7160, 4422, 9401 },
+  { 7809, 7945, 8385, 8535, 7803, 3953, 5065, 3185,  2013,  1659, 1648,
+    769,  292,  -135, 114,  -579, 713,  1407, -1181, 1569,  3525, 5630,
+    219,  3518, 3739, 3432, 7282, 6357, 619,  5779,  10116, 6448 },
+  { 9496,  7224, 5342, 5960, 5092,  4225, 4353, 3995, 3631, 1662, 1413,
+    762,   534,  126,  -551, -1025, 2327, 602,  -452, 1285, 2103, 2579,
+    -1369, 2724, 6353, 3925, 4631,  9139, 4974, 6630, 7755, 4125 },
+  { 5226, 7729, 5768,  5815, 4531, 2948, 3029,  2603, 2549, 1366, 119,
+    405,  21,   -1831, -327, -287, -415, -1317, -214, 3017, 1586, 2436,
+    868,  1094, 290,   668,  2117, 756,  1228,  2700, 5743, 8052 },
+  { 6262, 5531, 4454, 4616, 3913, 2022, 4240, 2241, 4201, 2506, 1810,
+    628,  -496, -779, -471, 394,  756,  1666, -445, 490,  575,  -478,
+    894,  1182, 822,  626,  1782, 1781, 5333, 5482, 1760, 8187 },
+  { 6488,  6875,  4960, 6837,  4564, 1871, 390,  2940, 4330, 1634, 131,
+    -1102, -1451, -928, -1067, -419, -614, -2,   1017, 1066, 1051, 917,
+    1097,  844,   465,  513,   2377, 1031, 3548, 5088, 4516, 10564 },
+  { 6497, 6047,  5649, 7156, 4974, 3683, 2875, 4421, 1502, 1244, 668,
+    -30,  -1465, -59,  -399, -721, 954,  -281, -2,   664,  1039, 814,
+    758,  1911,  319,  4247, 1848, 1606, 2536, 2189, 1372, 7759 },
+  { 5994, 5659,  6777, 6693, 4758, 2986, 1463, 1186, 2116, -166, 499,
+    73,   -1151, -164, 279,  -895, -169, 339,  1194, 1772, 752,  1649,
+    1696, -2615, 1581, 1740, 1789, 1832, 1899, 510,  2135, 7149 },
+  { 9107,  4250, 5418, 4334,  613,   2618, 3395, 4809, 1724, 873, -78,
+    -1146, -431, -547, -1104, -1128, -6,   -290, 945,  794,  564, 1670,
+    737,   4540, 1574, 6285,  2596,  2859, 1191, 1428, 5614, 8419 },
+  { 5905, 4490, 6470,  3636, 2119,  1731, 3532, 2461, 2391, 473,  176,
+    -562, 389,  -1300, -916, -1436, 371,  567,  1038, 866,  59,   195,
+    679,  -721, 2994,  3260, 1813,  1589, 850,  1982, 7410, 11546 },
+  { 7265, 8775, 6672, 6657, 6182, 3732, 3222, 4564, 2644, 790,  924,
+    -596, 628,  -681, -57,  -236, 103,  364,  603,  1420, 309,  787,
+    1257, 770,  2453, 3401, 1175, 434,  792,  4019, 8792, 11773 }
+};
+static int16_t default_ncobmc_krnl_2_1_3[MAX_SB_SIZE][MAX_SB_SIZE] = {
+  { 391,  -894, -939, 1155,  4362, 4297, 7296,  2684, 3758, 8010, 8044,
+    9041, 8748, 8816, 10796, 8701, 6840, 11306, 7814, 8456, 9952, 3511,
+    7870, 2227, 7018, 7148,  4672, 5660, 6657,  6007, 1098, 3866 },
+  { 2970, 945,  619,  1701, 4540, 3326,  7140,  8401,  6001, 5524, 6311,
+    5657, 5333, 9833, 7547, 8127, 10894, 14326, 12130, 8591, 8408, 5873,
+    7524, 6398, 7054, 6594, 9788, 8347,  8784,  9253,  8154, 6170 },
+  { 3423, 6928,  5192, 5699, 5575,  6852,  8083,  7546,  8019, 8464, 8910,
+    9251, 11401, 8637, 9356, 9671,  10065, 12652, 12275, 9662, 9627, 5550,
+    9836, 10565, 9075, 9350, 11656, 8549,  8120,  4437,  5501, 6658 },
+  { 5859, 5714, 6766, 5830, 7266,  4208,  5956,  8173,  10615, 7557,  10533,
+    8101, 7530, 9292, 9312, 9603,  11268, 14896, 12761, 10435, 10584, 10602,
+    7945, 6677, 7798, 9184, 11805, 9688,  12921, 9831,  9425,  9409 },
+  { 5068,  7732,  8953,  7750,  6739,  7145,  7635,  7400,  9896,  11465, 12344,
+    14483, 13309, 11497, 10778, 11614, 13096, 11519, 12197, 13573, 14652, 12324,
+    7270,  8764,  10162, 11289, 13446, 10681, 7564,  7663,  7650,  3879 },
+  { 6073,  8775,  7134, 7485,  8815,  9982,  9893,  11182, 10807, 12415, 10385,
+    13211, 13198, 9974, 13590, 13229, 14029, 10733, 10710, 10950, 11286, 12150,
+    10133, 10858, 8958, 9903,  12033, 9177,  9756,  8710,  8055,  3108 },
+  { 8368,  10916, 7650,  6261,  8713,  10236, 12507, 10373, 12385, 11135, 11343,
+    12039, 12114, 14871, 13861, 13742, 11649, 13839, 13207, 13160, 11863, 11950,
+    12423, 10188, 7712,  8705,  11270, 12864, 13370, 11422, 7881,  7390 },
+  { 10805, 12233, 10301, 9238,  9352,  7871,  10959, 12870, 11641, 9692, 12373,
+    13839, 12380, 14055, 14653, 13348, 11227, 12844, 14769, 12714, 9815, 10484,
+    12966, 10123, 8644,  11791, 9911,  7598,  13225, 9539,  6774,  8055 },
+  { 7987,  9257,  6281,  7446,  8911,  10506, 7039,  9031,  9319,  10294, 13979,
+    15391, 14445, 11372, 14852, 14690, 14954, 14129, 16319, 13385, 10855, 12837,
+    13065, 10647, 12815, 13043, 9686,  7003,  12028, 10211, 10237, 11699 },
+  { 6073,  7893,  7571,  5698,  8244,  7305,  6581,  9719,  9746,  11432, 12215,
+    16346, 17408, 17379, 13508, 14637, 10471, 13204, 13089, 13632, 10135, 12397,
+    12431, 13511, 13140, 13999, 14081, 10639, 7173,  7807,  9433,  4659 },
+  { 6634,  10941, 11920, 9920,  11356, 10608, 10624, 12593, 11330, 11413, 13971,
+    18455, 16400, 16654, 15373, 16023, 15144, 15413, 14357, 16626, 10718, 12841,
+    16053, 14104, 13496, 13334, 10605, 11490, 12221, 6956,  9178,  8213 },
+  { 7366,  9121,  9253,  11198, 9839,  11458, 10864, 8319,  12656, 12437, 13128,
+    15378, 14565, 16278, 15940, 14457, 15156, 13972, 14035, 13587, 10888, 11376,
+    15176, 18483, 13236, 12754, 12347, 13247, 11785, 10432, 13455, 7419 },
+  { 7665,  10318, 12372, 11702, 11166, 12470, 11859, 10983, 12921, 13947, 12106,
+    14300, 13037, 17367, 14444, 15259, 15107, 14974, 11715, 14835, 15525, 18775,
+    17479, 13835, 9101,  10034, 18554, 10201, 8666,  11181, 11767, 6530 },
+  { 11169, 7696,  11879, 11938, 10302, 13271, 12067, 13360, 9715,  12528, 13879,
+    15312, 17012, 15194, 12951, 17211, 14989, 14796, 15695, 14942, 13140, 17003,
+    18104, 14131, 14490, 11607, 9697,  10346, 6890,  7337,  12248, 7668 },
+  { 7494,  9902,  9327,  10081, 9955,  10895, 12521, 13971, 11975, 12950, 13579,
+    19214, 16537, 17208, 15292, 17698, 16633, 14485, 17676, 15920, 11698, 13314,
+    13747, 11163, 10360, 13396, 13119, 7073,  11331, 8217,  8258,  8754 },
+  { 9934,  11319, 10239, 9047,  11387, 10784, 12566, 13038, 13663, 12717, 14675,
+    14008, 14178, 15820, 14510, 16181, 15440, 15283, 15009, 13767, 11372, 13359,
+    14352, 14480, 17066, 10914, 11175, 8554,  7428,  10827, 10561, 6443 },
+  { 10016, 9986,  12912, 11133, 8475,  9995,  12150, 14006, 15182, 16531, 13117,
+    14634, 15313, 15598, 16928, 14269, 14814, 17080, 12532, 12849, 13261, 12479,
+    14442, 9716,  15960, 13029, 13398, 10927, 9854,  10849, 12580, 10547 },
+  { 9295,  7913,  11422, 9455,  10319, 11278, 11274, 13394, 13038, 13821, 15044,
+    14686, 17187, 14091, 14823, 14137, 14455, 15111, 15447, 13582, 14076, 14295,
+    15643, 11185, 16015, 10747, 11235, 11551, 12009, 13990, 8881,  5003 },
+  { 11095, 8615,  12138, 8821,  9239,  6419,  11207, 11937, 12556, 14236, 12501,
+    14976, 13740, 15006, 17876, 15826, 16800, 16761, 13880, 15072, 16296, 16857,
+    14333, 11125, 12310, 13605, 10932, 12928, 5472,  11185, 9435,  5957 },
+  { 7725,  6887,  7535,  8957,  9967,  9700,  10640, 10680, 13275, 12682, 11517,
+    15207, 15552, 17018, 16856, 14725, 16692, 12845, 14748, 14656, 14606, 16310,
+    14672, 15510, 13069, 9039,  8315,  8606,  8826,  8214,  8487,  7999 },
+  { 9071,  9686,  10375, 11046, 7539,  7106,  10540, 13531, 13747, 9927,  14071,
+    15876, 15935, 13026, 15104, 15296, 16773, 16198, 16098, 13165, 13227, 15002,
+    12319, 13015, 14240, 10673, 12818, 10497, 5016,  8298,  5706,  6088 },
+  { 9366,  8741,  8215,  11450, 8961,  10464, 10575, 13631, 13635, 13752, 12735,
+    17169, 16010, 15438, 15786, 13083, 18481, 17990, 12316, 16370, 13953, 16000,
+    14693, 15392, 15242, 15049, 10809, 7658,  12399, 7866,  7570,  5544 },
+  { 6903,  5972,  7864,  7864,  8655,  13231, 12904, 14949, 15064, 15007, 14738,
+    15847, 14769, 14910, 15543, 17103, 15630, 15115, 19594, 16319, 13352, 10936,
+    15453, 13064, 13305, 12008, 7408,  8514,  14898, 8171,  5583,  9657 },
+  { 1309,  4431,  10551, 8701,  8152,  8547,  11642, 9601,  12635, 14116, 12560,
+    14796, 14370, 14959, 15558, 17801, 14148, 16067, 16927, 16084, 15633, 13749,
+    16805, 13274, 7467,  12136, 9815,  6584,  10514, 9020,  9109,  10981 },
+  { 10778, 9464,  8877,  8157,  7779,  9056,  13584, 11871, 13714, 16259, 13305,
+    13956, 14785, 16328, 16541, 15199, 15586, 18478, 16668, 13019, 14279, 13814,
+    15684, 15613, 15050, 14345, 14327, 15869, 14316, 13744, 10738, 8497 },
+  { 9411,  9691,  11139, 8582,  8038,  9492,  10534, 12154, 9249,  16286, 16839,
+    15572, 13252, 16207, 14760, 15743, 15428, 14223, 15971, 16378, 16607, 16993,
+    15698, 15766, 14771, 13969, 14551, 13631, 10451, 9360,  15908, 7460 },
+  { 5565,  3814,  5832,  4698,  7091,  10412, 8442,  9852,  9831,  10137, 9167,
+    11864, 11520, 12092, 11930, 12431, 14914, 16568, 13978, 14847, 14215, 14290,
+    13812, 15033, 15711, 15541, 13908, 14681, 12577, 9266,  12542, 5718 },
+  { 3740,  2245,  1259,  3575,  4190,  8150,  9742,  8948,  11592, 12108, 10225,
+    12748, 12684, 12687, 11339, 10475, 13481, 15937, 14669, 13780, 12167, 11074,
+    16225, 14201, 13966, 9544,  12974, 12797, 13248, 13990, 14819, 7995 },
+  { 2296,  817,   3435,  3505,  3507,  9072,  7580,  10139, 7087,  12821, 13297,
+    12396, 12113, 10999, 9149,  14466, 15677, 11290, 11487, 10612, 8552,  15725,
+    16233, 17367, 12511, 13088, 10898, 12875, 13386, 15384, 14845, 9849 },
+  { 2320,  1714,  3209,  4858,  11853, 8126,  7775,  6246,  10834, 12812, 9996,
+    8379,  10020, 11558, 10914, 12851, 11272, 13723, 7409,  11919, 10393, 12987,
+    13756, 11382, 13258, 9754,  12513, 10697, 14356, 14065, 10023, 8748 },
+  { 5715,  4721,  4773,  6968, 7426,  6196,  7322,  11771, 8704,  7198,  8944,
+    12478, 6336,  10064, 9132, 10252, 11884, 12483, 11504, 12168, 11346, 13354,
+    11779, 12178, 8942,  8770, 11937, 13047, 12938, 11277, 4002,  710 },
+  { 7743,  4184,  5058,  4276,  5576,  5393,  5919,  5500,  7881, 8102,  11726,
+    10912, 10943, 10344, 10654, 9537,  12118, 10565, 11112, 9964, 11328, 13005,
+    8273,  10626, 11596, 12198, 13157, 13884, 13912, 10737, 6497, 2938 }
+};
+
+void get_default_ncobmc_kernels(AV1_COMMON *cm) {
+  av1_copy(cm->ncobmc_kernels[0][0].KERNEL[0], default_ncobmc_krnl_0_0_0);
+  av1_copy(cm->ncobmc_kernels[0][0].KERNEL[1], default_ncobmc_krnl_0_0_1);
+  av1_copy(cm->ncobmc_kernels[0][0].KERNEL[2], default_ncobmc_krnl_0_0_2);
+  av1_copy(cm->ncobmc_kernels[0][0].KERNEL[3], default_ncobmc_krnl_0_0_3);
+  av1_copy(cm->ncobmc_kernels[0][1].KERNEL[0], default_ncobmc_krnl_0_1_0);
+  av1_copy(cm->ncobmc_kernels[0][1].KERNEL[1], default_ncobmc_krnl_0_1_1);
+  av1_copy(cm->ncobmc_kernels[0][1].KERNEL[2], default_ncobmc_krnl_0_1_2);
+  av1_copy(cm->ncobmc_kernels[0][1].KERNEL[3], default_ncobmc_krnl_0_1_3);
+  av1_copy(cm->ncobmc_kernels[1][0].KERNEL[0], default_ncobmc_krnl_1_0_0);
+  av1_copy(cm->ncobmc_kernels[1][0].KERNEL[1], default_ncobmc_krnl_1_0_1);
+  av1_copy(cm->ncobmc_kernels[1][0].KERNEL[2], default_ncobmc_krnl_1_0_2);
+  av1_copy(cm->ncobmc_kernels[1][0].KERNEL[3], default_ncobmc_krnl_1_0_3);
+  av1_copy(cm->ncobmc_kernels[1][1].KERNEL[0], default_ncobmc_krnl_1_1_0);
+  av1_copy(cm->ncobmc_kernels[1][1].KERNEL[1], default_ncobmc_krnl_1_1_1);
+  av1_copy(cm->ncobmc_kernels[1][1].KERNEL[2], default_ncobmc_krnl_1_1_2);
+  av1_copy(cm->ncobmc_kernels[1][1].KERNEL[3], default_ncobmc_krnl_1_1_3);
+  av1_copy(cm->ncobmc_kernels[2][0].KERNEL[0], default_ncobmc_krnl_2_0_0);
+  av1_copy(cm->ncobmc_kernels[2][0].KERNEL[1], default_ncobmc_krnl_2_0_1);
+  av1_copy(cm->ncobmc_kernels[2][0].KERNEL[2], default_ncobmc_krnl_2_0_2);
+  av1_copy(cm->ncobmc_kernels[2][0].KERNEL[3], default_ncobmc_krnl_2_0_3);
+  av1_copy(cm->ncobmc_kernels[2][1].KERNEL[0], default_ncobmc_krnl_2_1_0);
+  av1_copy(cm->ncobmc_kernels[2][1].KERNEL[1], default_ncobmc_krnl_2_1_1);
+  av1_copy(cm->ncobmc_kernels[2][1].KERNEL[2], default_ncobmc_krnl_2_1_2);
+  av1_copy(cm->ncobmc_kernels[2][1].KERNEL[3], default_ncobmc_krnl_2_1_3);
+}
diff --git a/third_party/aom/av1/common/ncobmc_kernels.h b/third_party/aom/av1/common/ncobmc_kernels.h
new file mode 100644
index 000000000..358b7b7c8
--- /dev/null
+++ b/third_party/aom/av1/common/ncobmc_kernels.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdio.h>
+#include "av1/common/enums.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/common.h"
+
+#ifndef AV1_COMMON_NCOBMC_KERNELS_H_
+#define AV1_COMMON_NCOBMC_KERNELS_H_
+
+void get_default_ncobmc_kernels(AV1_COMMON *cm);
+
+#endif  // AV1_COMMON_NCOBMC_KERNELS_H_
diff --git a/third_party/aom/av1/common/obmc.h b/third_party/aom/av1/common/obmc.h
new file mode 100644
index 000000000..f3940490f
--- /dev/null
+++ b/third_party/aom/av1/common/obmc.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_COMMON_OBMC_H_
+#define AV1_COMMON_OBMC_H_
+
+#if CONFIG_MOTION_VAR
+typedef void (*overlappable_nb_visitor_t)(MACROBLOCKD *xd, int rel_mi_pos,
+                                          uint8_t nb_mi_size, MODE_INFO *nb_mi,
+                                          void *fun_ctxt);
+
+static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm,
+                                                 MACROBLOCKD *xd, int mi_col,
+                                                 int nb_max,
+                                                 overlappable_nb_visitor_t fun,
+                                                 void *fun_ctxt) {
+  if (!xd->up_available) return;
+
+  int nb_count = 0;
+
+  // prev_row_mi points into the mi array, starting at the beginning of the
+  // previous row.
+  MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
+  const int end_col = AOMMIN(mi_col + xd->n8_w, cm->mi_cols);
+  uint8_t mi_step;
+  for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max;
+       above_mi_col += mi_step) {
+    MODE_INFO **above_mi = prev_row_mi + above_mi_col;
+    mi_step = AOMMIN(mi_size_wide[above_mi[0]->mbmi.sb_type],
+                     mi_size_wide[BLOCK_64X64]);
+#if CONFIG_CHROMA_SUB8X8
+    // If we're considering a block with width 4, it should be treated as
+    // half of a pair of blocks with chroma information in the second. Move
+    // above_mi_col back to the start of the pair if needed, set above_mbmi
+    // to point at the block with chroma information, and set mi_step to 2 to
+    // step over the entire pair at the end of the iteration.
+    if (mi_step == 1) {
+      above_mi_col &= ~1;
+      above_mi = prev_row_mi + above_mi_col + 1;
+      mi_step = 2;
+    }
+#endif  // CONFIG_CHROMA_SUB8X8
+    MB_MODE_INFO *above_mbmi = &above_mi[0]->mbmi;
+    if (is_neighbor_overlappable(above_mbmi)) {
+      ++nb_count;
+      fun(xd, above_mi_col - mi_col, AOMMIN(xd->n8_w, mi_step), *above_mi,
+          fun_ctxt);
+    }
+  }
+}
+
+static INLINE void foreach_overlappable_nb_left(const AV1_COMMON *cm,
+                                                MACROBLOCKD *xd, int mi_row,
+                                                int nb_max,
+                                                overlappable_nb_visitor_t fun,
+                                                void *fun_ctxt) {
+  if (!xd->left_available) return;
+
+  int nb_count = 0;
+
+  // prev_col_mi points into the mi array, starting at the top of the
+  // previous column
+  MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
+  const int end_row = AOMMIN(mi_row + xd->n8_h, cm->mi_rows);
+  uint8_t mi_step;
+  for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max;
+       left_mi_row += mi_step) {
+    MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
+    mi_step = AOMMIN(mi_size_high[left_mi[0]->mbmi.sb_type],
+                     mi_size_high[BLOCK_64X64]);
+#if CONFIG_CHROMA_SUB8X8
+    if (mi_step == 1) {
+      left_mi_row &= ~1;
+      left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride;
+      mi_step = 2;
+    }
+#endif  // CONFIG_CHROMA_SUB8X8
+    MB_MODE_INFO *left_mbmi = &left_mi[0]->mbmi;
+    if (is_neighbor_overlappable(left_mbmi)) {
+      ++nb_count;
+      fun(xd, left_mi_row - mi_row, AOMMIN(xd->n8_h, mi_step), *left_mi,
+          fun_ctxt);
+    }
+  }
+}
+
+#endif  // CONFIG_MOTION_VAR
+#endif  // AV1_COMMON_OBMC_H_
diff --git a/third_party/aom/av1/common/od_dering.c b/third_party/aom/av1/common/od_dering.c
deleted file mode 100644
index df4fb2ab5..000000000
--- a/third_party/aom/av1/common/od_dering.c
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#ifdef HAVE_CONFIG_H
-#include "./config.h"
-#endif
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-#include "./cdef.h"
-
-/* Generated from gen_filter_tables.c. */
-const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
-  { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
-    -3 * OD_FILT_BSTRIDE + 3 },
-  { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
-    -1 * OD_FILT_BSTRIDE + 3 },
-  { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
-  { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
-  { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
-  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
-  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
-  { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
-};
-
-/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
-   The search minimizes the weighted variance along all the lines in a
-   particular direction, i.e. the squared error between the input and a
-   "predicted" block where each pixel is replaced by the average along a line
-   in a particular direction. Since each direction have the same sum(x^2) term,
-   that term is never computed. See Section 2, step 2, of:
-   http://jmvalin.ca/notes/intra_paint.pdf */
-int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
-                   int coeff_shift) {
-  int i;
-  int32_t cost[8] = { 0 };
-  int partial[8][15] = { { 0 } };
-  int32_t best_cost = 0;
-  int best_dir = 0;
-  /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
-     The output is then 840 times larger, but we don't care for finding
-     the max. */
-  static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
-  for (i = 0; i < 8; i++) {
-    int j;
-    for (j = 0; j < 8; j++) {
-      int x;
-      /* We subtract 128 here to reduce the maximum range of the squared
-         partial sums. */
-      x = (img[i * stride + j] >> coeff_shift) - 128;
-      partial[0][i + j] += x;
-      partial[1][i + j / 2] += x;
-      partial[2][i] += x;
-      partial[3][3 + i - j / 2] += x;
-      partial[4][7 + i - j] += x;
-      partial[5][3 - i / 2 + j] += x;
-      partial[6][j] += x;
-      partial[7][i / 2 + j] += x;
-    }
-  }
-  for (i = 0; i < 8; i++) {
-    cost[2] += partial[2][i] * partial[2][i];
-    cost[6] += partial[6][i] * partial[6][i];
-  }
-  cost[2] *= div_table[8];
-  cost[6] *= div_table[8];
-  for (i = 0; i < 7; i++) {
-    cost[0] += (partial[0][i] * partial[0][i] +
-                partial[0][14 - i] * partial[0][14 - i]) *
-               div_table[i + 1];
-    cost[4] += (partial[4][i] * partial[4][i] +
-                partial[4][14 - i] * partial[4][14 - i]) *
-               div_table[i + 1];
-  }
-  cost[0] += partial[0][7] * partial[0][7] * div_table[8];
-  cost[4] += partial[4][7] * partial[4][7] * div_table[8];
-  for (i = 1; i < 8; i += 2) {
-    int j;
-    for (j = 0; j < 4 + 1; j++) {
-      cost[i] += partial[i][3 + j] * partial[i][3 + j];
-    }
-    cost[i] *= div_table[8];
-    for (j = 0; j < 4 - 1; j++) {
-      cost[i] += (partial[i][j] * partial[i][j] +
-                  partial[i][10 - j] * partial[i][10 - j]) *
-                 div_table[2 * j + 2];
-    }
-  }
-  for (i = 0; i < 8; i++) {
-    if (cost[i] > best_cost) {
-      best_cost = cost[i];
-      best_dir = i;
-    }
-  }
-  /* Difference between the optimal variance and the variance along the
-     orthogonal direction. Again, the sum(x^2) terms cancel out. */
-  *var = best_cost - cost[(best_dir + 4) & 7];
-  /* We'd normally divide by 840, but dividing by 1024 is close enough
-     for what we're going to do with this. */
-  *var >>= 10;
-  return best_dir;
-}
-
-/* Smooth in the direction detected. */
-void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
-                                      const uint16_t *in, int threshold,
-                                      int dir, int damping) {
-  int i;
-  int j;
-  int k;
-  static const int taps[3] = { 3, 2, 1 };
-  for (i = 0; i < 8; i++) {
-    for (j = 0; j < 8; j++) {
-      int16_t sum;
-      int16_t xx;
-      int16_t yy;
-      xx = in[i * OD_FILT_BSTRIDE + j];
-      sum = 0;
-      for (k = 0; k < 3; k++) {
-        int16_t p0;
-        int16_t p1;
-        p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
-             xx;
-        p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
-             xx;
-        sum += taps[k] * constrain(p0, threshold, damping);
-        sum += taps[k] * constrain(p1, threshold, damping);
-      }
-      sum = (sum + 8) >> 4;
-      yy = xx + sum;
-      y[i * ystride + j] = yy;
-    }
-  }
-}
-
-/* Smooth in the direction detected. */
-void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
-                                      const uint16_t *in, int threshold,
-                                      int dir, int damping) {
-  int i;
-  int j;
-  int k;
-  static const int taps[2] = { 4, 1 };
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      int16_t sum;
-      int16_t xx;
-      int16_t yy;
-      xx = in[i * OD_FILT_BSTRIDE + j];
-      sum = 0;
-      for (k = 0; k < 2; k++) {
-        int16_t p0;
-        int16_t p1;
-        p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
-             xx;
-        p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
-             xx;
-        sum += taps[k] * constrain(p0, threshold, damping);
-        sum += taps[k] * constrain(p1, threshold, damping);
-      }
-      sum = (sum + 8) >> 4;
-      yy = xx + sum;
-      y[i * ystride + j] = yy;
-    }
-  }
-}
-
-/* Compute deringing filter threshold for an 8x8 block based on the
-   directional variance difference. A high variance difference means that we
-   have a highly directional pattern (e.g. a high contrast edge), so we can
-   apply more deringing. A low variance means that we either have a low
-   contrast edge, or a non-directional texture, so we want to be careful not
-   to blur. */
-static INLINE int od_adjust_thresh(int threshold, int32_t var) {
-  const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
-  /* We use the variance of 8x8 blocks to adjust the threshold. */
-  return var ? (threshold * (4 + i) + 8) >> 4 : 0;
-}
-
-void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
-                               int sstride) {
-  int i, j;
-  for (i = 0; i < 8; i++)
-    for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
-                               int sstride) {
-  int i, j;
-  for (i = 0; i < 4; i++)
-    for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
-}
-
-static void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride,
-                                       uint16_t *src, dering_list *dlist,
-                                       int dering_count, int bsize) {
-  int bi, bx, by;
-
-  if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                              &src[bi << (3 + 3)], 8);
-    }
-  } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
-                              &src[bi << (3 + 2)], 4);
-      copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
-                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
-    }
-  } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
-                              &src[bi << (2 + 3)], 8);
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
-                              dstride, &src[(bi << (2 + 3)) + 4], 8);
-    }
-  } else {
-    assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
-                              &src[bi << (2 + 2)], 4);
-    }
-  }
-}
-
-void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
-                              int sstride) {
-  int i, j;
-  for (i = 0; i < 8; i++)
-    for (j = 0; j < 8; j++)
-      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
-                              int sstride) {
-  int i, j;
-  for (i = 0; i < 4; i++)
-    for (j = 0; j < 4; j++)
-      dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
-}
-
-static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
-                                      const uint16_t *src, dering_list *dlist,
-                                      int dering_count, int bsize) {
-  int bi, bx, by;
-  if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
-                             &src[bi << (3 + 3)], 8);
-    }
-  } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
-                             &src[bi << (3 + 2)], 4);
-      copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
-                             dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
-    }
-  } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
-                             &src[bi << (2 + 3)], 8);
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
-                             &src[(bi << (2 + 3)) + 4], 8);
-    }
-  } else {
-    assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
-                             &src[bi << (2 * 2)], 4);
-    }
-  }
-}
-
-int get_filter_skip(int level) {
-  int filter_skip = level & 1;
-  if (level == 1) filter_skip = 0;
-  return filter_skip;
-}
-
-void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
-               int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
-               int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
-               int pli, dering_list *dlist, int dering_count, int level,
-               int clpf_strength, int clpf_damping, int dering_damping,
-               int coeff_shift, int skip_dering, int hbd) {
-  int bi;
-  int bx;
-  int by;
-  int bsize, bsizex, bsizey;
-
-  int threshold = (level >> 1) << coeff_shift;
-  int filter_skip = get_filter_skip(level);
-  if (level == 1) threshold = 31 << coeff_shift;
-
-  od_filter_dering_direction_func filter_dering_direction[] = {
-    od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
-  };
-  clpf_damping += coeff_shift - (pli != AOM_PLANE_Y);
-  dering_damping += coeff_shift - (pli != AOM_PLANE_Y);
-  bsize =
-      ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
-  bsizex = 3 - xdec;
-  bsizey = 3 - ydec;
-
-  if (!skip_dering) {
-    if (pli == 0) {
-      if (!dirinit || !*dirinit) {
-        for (bi = 0; bi < dering_count; bi++) {
-          by = dlist[bi].by;
-          bx = dlist[bi].bx;
-          dir[by][bx] =
-              od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
-                           OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
-        }
-        if (dirinit) *dirinit = 1;
-      }
-    }
-    // Only run dering for non-zero threshold (which is always the case for
-    // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
-    // something out in y[] later.
-    if (threshold != 0) {
-      assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
-      for (bi = 0; bi < dering_count; bi++) {
-        int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
-        by = dlist[bi].by;
-        bx = dlist[bi].bx;
-        (filter_dering_direction[bsize == BLOCK_8X8])(
-            &y[bi << (bsizex + bsizey)], 1 << bsizex,
-            &in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)],
-            pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx],
-            dering_damping);
-      }
-    }
-  }
-
-  if (clpf_strength) {
-    if (threshold && !skip_dering)
-      copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
-                                 bsize);
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      int py = by << bsizey;
-      int px = bx << bsizex;
-
-      if (!filter_skip && dlist[bi].skip) continue;
-      if (!dst || hbd) {
-        // 16 bit destination if high bitdepth or 8 bit destination not given
-        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
-                                                        : aom_clpf_hblock_hbd)(
-            dst ? (uint16_t *)dst + py * dstride + px
-                : &y[bi << (bsizex + bsizey)],
-            in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
-            OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
-            clpf_strength << coeff_shift, clpf_damping);
-      } else {
-        // Do clpf and write the result to an 8 bit destination
-        (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
-                                                        : aom_clpf_hblock)(
-            dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
-            OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
-            clpf_strength << coeff_shift, clpf_damping);
-      }
-    }
-  } else if (threshold != 0) {
-    // No clpf, so copy instead
-    if (hbd) {
-      copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
-                                 dering_count, bsize);
-    } else {
-      copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
-    }
-  } else if (dirinit) {
-    // If we're here, both dering and clpf are off, and we still haven't written
-    // anything to y[] yet, so we just copy the input to y[]. This is necessary
-    // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
-    for (bi = 0; bi < dering_count; bi++) {
-      by = dlist[bi].by;
-      bx = dlist[bi].bx;
-      int iy, ix;
-      // TODO(stemidts/jmvalin): SIMD optimisations
-      for (iy = 0; iy < 1 << bsizey; iy++)
-        for (ix = 0; ix < 1 << bsizex; ix++)
-          y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
-              in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix];
-    }
-  }
-}
diff --git a/third_party/aom/av1/common/od_dering.h b/third_party/aom/av1/common/od_dering.h
deleted file mode 100644
index 031112b32..000000000
--- a/third_party/aom/av1/common/od_dering.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#if !defined(_dering_H)
-#define _dering_H (1)
-
-#include "odintrin.h"
-
-#define OD_DERING_NBLOCKS (MAX_SB_SIZE / 8)
-
-/* We need to buffer three vertical lines. */
-#define OD_FILT_VBORDER (3)
-/* We only need to buffer three horizontal pixels too, but let's align to
-   16 bytes (8 x 16 bits) to make vectorization easier. */
-#define OD_FILT_HBORDER (8)
-#define OD_FILT_BSTRIDE ALIGN_POWER_OF_TWO(MAX_SB_SIZE + 2 * OD_FILT_HBORDER, 3)
-
-#define OD_DERING_VERY_LARGE (30000)
-#define OD_DERING_INBUF_SIZE \
-  (OD_FILT_BSTRIDE * (MAX_SB_SIZE + 2 * OD_FILT_VBORDER))
-
-extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
-
-typedef struct {
-  uint8_t by;
-  uint8_t bx;
-  uint8_t skip;
-} dering_list;
-
-typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
-                                                const uint16_t *in,
-                                                int threshold, int dir,
-                                                int damping);
-
-int get_filter_skip(int level);
-
-void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
-               int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
-               int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
-               int pli, dering_list *dlist, int dering_count, int level,
-               int clpf_strength, int clpf_damping, int dering_damping,
-               int coeff_shift, int skip_dering, int hbd);
-#endif
diff --git a/third_party/aom/av1/common/od_dering_simd.h b/third_party/aom/av1/common/od_dering_simd.h
deleted file mode 100644
index 4074e7e50..000000000
--- a/third_party/aom/av1/common/od_dering_simd.h
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "./av1_rtcd.h"
-#include "./cdef_simd.h"
-#include "./od_dering.h"
-
-/* partial A is a 16-bit vector of the form:
-   [x8 x7 x6 x5 x4 x3 x2 x1] and partial B has the form:
-   [0  y1 y2 y3 y4 y5 y6 y7].
-   This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ...
-   (x7^2+y2^7)*C7 + (x8^2+0^2)*C8 where the C1..C8 constants are in const1
-   and const2. */
-static INLINE v128 fold_mul_and_sum(v128 partiala, v128 partialb, v128 const1,
-                                    v128 const2) {
-  v128 tmp;
-  /* Reverse partial B. */
-  partialb = v128_shuffle_8(
-      partialb, v128_from_32(0x0f0e0100, 0x03020504, 0x07060908, 0x0b0a0d0c));
-  /* Interleave the x and y values of identical indices and pair x8 with 0. */
-  tmp = partiala;
-  partiala = v128_ziplo_16(partialb, partiala);
-  partialb = v128_ziphi_16(partialb, tmp);
-  /* Square and add the corresponding x and y values. */
-  partiala = v128_madd_s16(partiala, partiala);
-  partialb = v128_madd_s16(partialb, partialb);
-  /* Multiply by constant. */
-  partiala = v128_mullo_s32(partiala, const1);
-  partialb = v128_mullo_s32(partialb, const2);
-  /* Sum all results. */
-  partiala = v128_add_32(partiala, partialb);
-  return partiala;
-}
-
-static INLINE v128 hsum4(v128 x0, v128 x1, v128 x2, v128 x3) {
-  v128 t0, t1, t2, t3;
-  t0 = v128_ziplo_32(x1, x0);
-  t1 = v128_ziplo_32(x3, x2);
-  t2 = v128_ziphi_32(x1, x0);
-  t3 = v128_ziphi_32(x3, x2);
-  x0 = v128_ziplo_64(t1, t0);
-  x1 = v128_ziphi_64(t1, t0);
-  x2 = v128_ziplo_64(t3, t2);
-  x3 = v128_ziphi_64(t3, t2);
-  return v128_add_32(v128_add_32(x0, x1), v128_add_32(x2, x3));
-}
-
-/* Computes cost for directions 0, 5, 6 and 7. We can call this function again
-   to compute the remaining directions. */
-static INLINE v128 compute_directions(v128 lines[8], int32_t tmp_cost1[4]) {
-  v128 partial4a, partial4b, partial5a, partial5b, partial7a, partial7b;
-  v128 partial6;
-  v128 tmp;
-  /* Partial sums for lines 0 and 1. */
-  partial4a = v128_shl_n_byte(lines[0], 14);
-  partial4b = v128_shr_n_byte(lines[0], 2);
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[1], 12));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[1], 4));
-  tmp = v128_add_16(lines[0], lines[1]);
-  partial5a = v128_shl_n_byte(tmp, 10);
-  partial5b = v128_shr_n_byte(tmp, 6);
-  partial7a = v128_shl_n_byte(tmp, 4);
-  partial7b = v128_shr_n_byte(tmp, 12);
-  partial6 = tmp;
-
-  /* Partial sums for lines 2 and 3. */
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[2], 10));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[2], 6));
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[3], 8));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[3], 8));
-  tmp = v128_add_16(lines[2], lines[3]);
-  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 8));
-  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 8));
-  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 6));
-  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 10));
-  partial6 = v128_add_16(partial6, tmp);
-
-  /* Partial sums for lines 4 and 5. */
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[4], 6));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[4], 10));
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[5], 4));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[5], 12));
-  tmp = v128_add_16(lines[4], lines[5]);
-  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 6));
-  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 10));
-  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 8));
-  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 8));
-  partial6 = v128_add_16(partial6, tmp);
-
-  /* Partial sums for lines 6 and 7. */
-  partial4a = v128_add_16(partial4a, v128_shl_n_byte(lines[6], 2));
-  partial4b = v128_add_16(partial4b, v128_shr_n_byte(lines[6], 14));
-  partial4a = v128_add_16(partial4a, lines[7]);
-  tmp = v128_add_16(lines[6], lines[7]);
-  partial5a = v128_add_16(partial5a, v128_shl_n_byte(tmp, 4));
-  partial5b = v128_add_16(partial5b, v128_shr_n_byte(tmp, 12));
-  partial7a = v128_add_16(partial7a, v128_shl_n_byte(tmp, 10));
-  partial7b = v128_add_16(partial7b, v128_shr_n_byte(tmp, 6));
-  partial6 = v128_add_16(partial6, tmp);
-
-  /* Compute costs in terms of partial sums. */
-  partial4a =
-      fold_mul_and_sum(partial4a, partial4b, v128_from_32(210, 280, 420, 840),
-                       v128_from_32(105, 120, 140, 168));
-  partial7a =
-      fold_mul_and_sum(partial7a, partial7b, v128_from_32(210, 420, 0, 0),
-                       v128_from_32(105, 105, 105, 140));
-  partial5a =
-      fold_mul_and_sum(partial5a, partial5b, v128_from_32(210, 420, 0, 0),
-                       v128_from_32(105, 105, 105, 140));
-  partial6 = v128_madd_s16(partial6, partial6);
-  partial6 = v128_mullo_s32(partial6, v128_dup_32(105));
-
-  partial4a = hsum4(partial4a, partial5a, partial6, partial7a);
-  v128_store_unaligned(tmp_cost1, partial4a);
-  return partial4a;
-}
-
-/* transpose and reverse the order of the lines -- equivalent to a 90-degree
-   counter-clockwise rotation of the pixels. */
-static INLINE void array_reverse_transpose_8x8(v128 *in, v128 *res) {
-  const v128 tr0_0 = v128_ziplo_16(in[1], in[0]);
-  const v128 tr0_1 = v128_ziplo_16(in[3], in[2]);
-  const v128 tr0_2 = v128_ziphi_16(in[1], in[0]);
-  const v128 tr0_3 = v128_ziphi_16(in[3], in[2]);
-  const v128 tr0_4 = v128_ziplo_16(in[5], in[4]);
-  const v128 tr0_5 = v128_ziplo_16(in[7], in[6]);
-  const v128 tr0_6 = v128_ziphi_16(in[5], in[4]);
-  const v128 tr0_7 = v128_ziphi_16(in[7], in[6]);
-
-  const v128 tr1_0 = v128_ziplo_32(tr0_1, tr0_0);
-  const v128 tr1_1 = v128_ziplo_32(tr0_5, tr0_4);
-  const v128 tr1_2 = v128_ziphi_32(tr0_1, tr0_0);
-  const v128 tr1_3 = v128_ziphi_32(tr0_5, tr0_4);
-  const v128 tr1_4 = v128_ziplo_32(tr0_3, tr0_2);
-  const v128 tr1_5 = v128_ziplo_32(tr0_7, tr0_6);
-  const v128 tr1_6 = v128_ziphi_32(tr0_3, tr0_2);
-  const v128 tr1_7 = v128_ziphi_32(tr0_7, tr0_6);
-
-  res[7] = v128_ziplo_64(tr1_1, tr1_0);
-  res[6] = v128_ziphi_64(tr1_1, tr1_0);
-  res[5] = v128_ziplo_64(tr1_3, tr1_2);
-  res[4] = v128_ziphi_64(tr1_3, tr1_2);
-  res[3] = v128_ziplo_64(tr1_5, tr1_4);
-  res[2] = v128_ziphi_64(tr1_5, tr1_4);
-  res[1] = v128_ziplo_64(tr1_7, tr1_6);
-  res[0] = v128_ziphi_64(tr1_7, tr1_6);
-}
-
-int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
-                            int coeff_shift) {
-  int i;
-  int32_t cost[8];
-  int32_t best_cost = 0;
-  int best_dir = 0;
-  v128 lines[8];
-  for (i = 0; i < 8; i++) {
-    lines[i] = v128_load_unaligned(&img[i * stride]);
-    lines[i] =
-        v128_sub_16(v128_shr_s16(lines[i], coeff_shift), v128_dup_16(128));
-  }
-
-#if defined(__SSE4_1__)
-  /* Compute "mostly vertical" directions. */
-  __m128i dir47 = compute_directions(lines, cost + 4);
-
-  array_reverse_transpose_8x8(lines, lines);
-
-  /* Compute "mostly horizontal" directions. */
-  __m128i dir03 = compute_directions(lines, cost);
-
-  __m128i max = _mm_max_epi32(dir03, dir47);
-  max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(1, 0, 3, 2)));
-  max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(2, 3, 0, 1)));
-  best_cost = _mm_cvtsi128_si32(max);
-  __m128i t =
-      _mm_packs_epi32(_mm_cmpeq_epi32(max, dir03), _mm_cmpeq_epi32(max, dir47));
-  best_dir = _mm_movemask_epi8(_mm_packs_epi16(t, t));
-  best_dir = get_msb(best_dir ^ (best_dir - 1));  // Count trailing zeros
-#else
-  /* Compute "mostly vertical" directions. */
-  compute_directions(lines, cost + 4);
-
-  array_reverse_transpose_8x8(lines, lines);
-
-  /* Compute "mostly horizontal" directions. */
-  compute_directions(lines, cost);
-
-  for (i = 0; i < 8; i++) {
-    if (cost[i] > best_cost) {
-      best_cost = cost[i];
-      best_dir = i;
-    }
-  }
-#endif
-
-  /* Difference between the optimal variance and the variance along the
-     orthogonal direction. Again, the sum(x^2) terms cancel out. */
-  *var = best_cost - cost[(best_dir + 4) & 7];
-  /* We'd normally divide by 840, but dividing by 1024 is close enough
-     for what we're going to do with this. */
-  *var >>= 10;
-  return best_dir;
-}
-
-void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
-                                               const uint16_t *in,
-                                               int threshold, int dir,
-                                               int damping) {
-  int i;
-  v128 p0, p1, sum, row, res;
-  int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
-  int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
-
-  if (threshold) damping -= get_msb(threshold);
-  for (i = 0; i < 4; i += 2) {
-    sum = v128_zero();
-    row = v128_from_v64(v64_load_aligned(&in[i * OD_FILT_BSTRIDE]),
-                        v64_load_aligned(&in[(i + 1) * OD_FILT_BSTRIDE]));
-
-    // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]),
-                       v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o1]));
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]),
-                       v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o1]));
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 4 * (p0 + p1)
-    sum = v128_add_16(sum, v128_shl_n_16(v128_add_16(p0, p1), 2));
-
-    // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]),
-                       v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE + o2]));
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_from_v64(v64_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]),
-                       v64_load_unaligned(&in[(i + 1) * OD_FILT_BSTRIDE - o2]));
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 1 * (p0 + p1)
-    sum = v128_add_16(sum, v128_add_16(p0, p1));
-
-    // res = row + ((sum + 8) >> 4)
-    res = v128_add_16(sum, v128_dup_16(8));
-    res = v128_shr_n_s16(res, 4);
-    res = v128_add_16(row, res);
-    v64_store_aligned(&y[i * ystride], v128_high_v64(res));
-    v64_store_aligned(&y[(i + 1) * ystride], v128_low_v64(res));
-  }
-}
-
-void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
-                                               const uint16_t *in,
-                                               int threshold, int dir,
-                                               int damping) {
-  int i;
-  v128 sum, p0, p1, row, res;
-  int o1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
-  int o2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
-  int o3 = OD_DIRECTION_OFFSETS_TABLE[dir][2];
-
-  if (threshold) damping -= get_msb(threshold);
-  for (i = 0; i < 8; i++) {
-    sum = v128_zero();
-    row = v128_load_aligned(&in[i * OD_FILT_BSTRIDE]);
-
-    // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o1]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o1]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 3 * (p0 + p1)
-    p0 = v128_add_16(p0, p1);
-    p0 = v128_add_16(p0, v128_shl_n_16(p0, 1));
-    sum = v128_add_16(sum, p0);
-
-    // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o2]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o2]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += 2 * (p0 + p1)
-    p0 = v128_shl_n_16(v128_add_16(p0, p1), 1);
-    sum = v128_add_16(sum, p0);
-
-    // p0 = constrain16(in[i*OD_FILT_BSTRIDE + offset], row, threshold, damping)
-    p0 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE + o3]);
-    p0 = constrain16(p0, row, threshold, damping);
-
-    // p1 = constrain16(in[i*OD_FILT_BSTRIDE - offset], row, threshold, damping)
-    p1 = v128_load_unaligned(&in[i * OD_FILT_BSTRIDE - o3]);
-    p1 = constrain16(p1, row, threshold, damping);
-
-    // sum += (p0 + p1)
-    p0 = v128_add_16(p0, p1);
-    sum = v128_add_16(sum, p0);
-
-    // res = row + ((sum + 8) >> 4)
-    res = v128_add_16(sum, v128_dup_16(8));
-    res = v128_shr_n_s16(res, 4);
-    res = v128_add_16(row, res);
-    v128_store_unaligned(&y[i * ystride], res);
-  }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
-                                       const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 8; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    row = v128_pack_s16_u8(row, row);
-    v64_store_unaligned(&dst[i * dstride], v128_low_v64(row));
-  }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_8bit)(uint8_t *dst, int dstride,
-                                       const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 4; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    row = v128_pack_s16_u8(row, row);
-    u32_store_unaligned(&dst[i * dstride], v128_low_u32(row));
-  }
-}
-
-void SIMD_FUNC(copy_8x8_16bit_to_16bit)(uint16_t *dst, int dstride,
-                                        const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 8; i++) {
-    v128 row = v128_load_unaligned(&src[i * sstride]);
-    v128_store_unaligned(&dst[i * dstride], row);
-  }
-}
-
-void SIMD_FUNC(copy_4x4_16bit_to_16bit)(uint16_t *dst, int dstride,
-                                        const uint16_t *src, int sstride) {
-  int i;
-  for (i = 0; i < 4; i++) {
-    v64 row = v64_load_unaligned(&src[i * sstride]);
-    v64_store_unaligned(&dst[i * dstride], row);
-  }
-}
-
-void SIMD_FUNC(copy_rect8_8bit_to_16bit)(uint16_t *dst, int dstride,
-                                         const uint8_t *src, int sstride, int v,
-                                         int h) {
-  int i, j;
-  for (i = 0; i < v; i++) {
-    for (j = 0; j < (h & ~0x7); j += 8) {
-      v64 row = v64_load_unaligned(&src[i * sstride + j]);
-      v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
-    }
-    for (; j < h; j++) {
-      dst[i * dstride + j] = src[i * sstride + j];
-    }
-  }
-}
-
-void SIMD_FUNC(copy_rect8_16bit_to_16bit)(uint16_t *dst, int dstride,
-                                          const uint16_t *src, int sstride,
-                                          int v, int h) {
-  int i, j;
-  for (i = 0; i < v; i++) {
-    for (j = 0; j < (h & ~0x7); j += 8) {
-      v128 row = v128_load_unaligned(&src[i * sstride + j]);
-      v128_store_unaligned(&dst[i * dstride + j], row);
-    }
-    for (; j < h; j++) {
-      dst[i * dstride + j] = src[i * sstride + j];
-    }
-  }
-}
diff --git a/third_party/aom/av1/common/onyxc_int.h b/third_party/aom/av1/common/onyxc_int.h
index 8647e113b..2396ce2f3 100644
--- a/third_party/aom/av1/common/onyxc_int.h
+++ b/third_party/aom/av1/common/onyxc_int.h
@@ -38,6 +38,10 @@
 #if CONFIG_CFL
 #include "av1/common/cfl.h"
 #endif
+#if CONFIG_HASH_ME
+// TODO(youzhou@microsoft.com): Encoder only. Move it out of common
+#include "av1/encoder/hash_motion.h"
+#endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -60,7 +64,13 @@ extern "C" {
 #define FRAME_ID_NUMBERS_PRESENT_FLAG 1
 #define FRAME_ID_LENGTH_MINUS7 8         // Allows frame id up to 2^15-1
 #define DELTA_FRAME_ID_LENGTH_MINUS2 12  // Allows frame id deltas up to 2^14-1
-#endif
+#endif                                   // CONFIG_REFERENCE_BUFFER
+
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+#define FRAME_CONTEXTS (FRAME_BUFFERS + 1)
+// Extra frame context which is always kept at default values
+#define FRAME_CONTEXT_DEFAULTS (FRAME_CONTEXTS - 1)
+#else
 
 #if CONFIG_EXT_REFS
 #define FRAME_CONTEXTS_LOG2 3
@@ -69,6 +79,7 @@ extern "C" {
 #endif
 
 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
 
 #define NUM_PING_PONG_BUFFERS 2
 
@@ -79,11 +90,13 @@ typedef enum {
   REFERENCE_MODES = 3,
 } REFERENCE_MODE;
 
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
 typedef enum {
   RESET_FRAME_CONTEXT_NONE = 0,
   RESET_FRAME_CONTEXT_CURRENT = 1,
   RESET_FRAME_CONTEXT_ALL = 2,
 } RESET_FRAME_CONTEXT_MODE;
+#endif
 
 typedef enum {
   /**
@@ -98,6 +111,14 @@ typedef enum {
   REFRESH_FRAME_CONTEXT_BACKWARD,
 } REFRESH_FRAME_CONTEXT_MODE;
 
+#if CONFIG_MFMV
+#define MFMV_STACK_SIZE INTER_REFS_PER_FRAME
+
+typedef struct {
+  int_mv mfmv[INTER_REFS_PER_FRAME][MFMV_STACK_SIZE];
+} TPL_MV_REF;
+#endif
+
 typedef struct {
   int_mv mv[2];
   int_mv pred_mv[2];
@@ -106,14 +127,38 @@ typedef struct {
 
 typedef struct {
   int ref_count;
+
+#if CONFIG_FRAME_MARKER
+  int cur_frame_offset;
+  int lst_frame_offset;
+  int alt_frame_offset;
+  int gld_frame_offset;
+#if CONFIG_EXT_REFS
+  int lst2_frame_offset;
+  int lst3_frame_offset;
+  int bwd_frame_offset;
+  int alt2_frame_offset;
+#endif
+#endif  // CONFIG_FRAME_MARKER
+
+#if CONFIG_MFMV
+  TPL_MV_REF *tpl_mvs;
+#endif
   MV_REF *mvs;
   int mi_rows;
   int mi_cols;
+  // Width and height give the size of the buffer (before any upscaling, unlike
+  // the sizes that can be derived from the buf structure)
+  int width;
+  int height;
 #if CONFIG_GLOBAL_MOTION
   WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
 #endif  // CONFIG_GLOBAL_MOTION
   aom_codec_frame_buffer_t raw_frame_buffer;
   YV12_BUFFER_CONFIG buf;
+#if CONFIG_HASH_ME
+  hash_table hash_table;
+#endif
 #if CONFIG_TEMPMV_SIGNALING
   uint8_t intra_only;
 #endif
@@ -150,13 +195,29 @@ typedef struct BufferPool {
   InternalFrameBufferList int_frame_buffers;
 } BufferPool;
 
+#if CONFIG_LV_MAP
+typedef struct {
+  int base_ctx_table[2 /*row*/][2 /*col*/][2 /*sig_map*/]
+                    [BASE_CONTEXT_POSITION_NUM + 1];
+} LV_MAP_CTX_TABLE;
+typedef int BASE_CTX_TABLE[2 /*col*/][2 /*sig_map*/]
+                          [BASE_CONTEXT_POSITION_NUM + 1];
+#endif
+
+#if CONFIG_REFERENCE_BUFFER
+/* Initial version of sequence header structure */
+typedef struct SequenceHeader {
+  int frame_id_numbers_present_flag;
+  int frame_id_length_minus7;
+  int delta_frame_id_length_minus2;
+} SequenceHeader;
+#endif  // CONFIG_REFERENCE_BUFFER
+
 typedef struct AV1Common {
   struct aom_internal_error_info error;
   aom_color_space_t color_space;
-#if CONFIG_COLORSPACE_HEADERS
   aom_transfer_function_t transfer_function;
   aom_chroma_sample_position_t chroma_sample_position;
-#endif
   int color_range;
   int width;
   int height;
@@ -211,21 +272,24 @@ typedef struct AV1Common {
   uint8_t last_intra_only;
 
   int allow_high_precision_mv;
+#if CONFIG_AMVR
+  int seq_mv_precision_level;        // 0 the default in AOM, 1 only integer, 2
+                                     // adaptive
+  int cur_frame_mv_precision_level;  // 0 the default in AOM, 1 only integer
+#endif
 
-#if CONFIG_PALETTE || CONFIG_INTRABC
   int allow_screen_content_tools;
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
   int allow_interintra_compound;
 #endif  // CONFIG_INTERINTRA
 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   int allow_masked_compound;
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
-#endif  // CONFIG_EXT_INTER
 
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   // Flag signaling which frame contexts should be reset to default values.
   RESET_FRAME_CONTEXT_MODE reset_frame_context;
+#endif
 
   // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
   // MODE_INFO (8-pixel) units.
@@ -304,9 +368,8 @@ typedef struct AV1Common {
 
   loop_filter_info_n lf_info;
 #if CONFIG_FRAME_SUPERRES
-  // The numerator of the superres scale; the denominator is fixed.
-  uint8_t superres_scale_numerator;
-  uint8_t superres_kf_scale_numerator;
+  // The denominator of the superres scale; the numerator is fixed.
+  uint8_t superres_scale_denominator;
   int superres_upscaled_width;
   int superres_upscaled_height;
 #endif  // CONFIG_FRAME_SUPERRES
@@ -343,9 +406,15 @@ typedef struct AV1Common {
   FRAME_CONTEXT *fc;              /* this frame entropy */
   FRAME_CONTEXT *frame_contexts;  // FRAME_CONTEXTS
   FRAME_CONTEXT *pre_fc;          // Context referenced in this frame
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   unsigned int frame_context_idx; /* Context to use/update */
+#endif
   FRAME_COUNTS counts;
 
+#if CONFIG_FRAME_MARKER
+  unsigned int frame_offset;
+#endif
+
   unsigned int current_video_frame;
   BITSTREAM_PROFILE profile;
 
@@ -355,9 +424,30 @@ typedef struct AV1Common {
 
   int error_resilient_mode;
 
-  int log2_tile_cols, log2_tile_rows;  // Used in non-large_scale_tile_coding.
   int tile_cols, tile_rows;
-  int tile_width, tile_height;  // In MI units
+  int last_tile_cols, last_tile_rows;
+
+#if CONFIG_MAX_TILE
+  int min_log2_tile_cols;
+  int max_log2_tile_cols;
+  int max_log2_tile_rows;
+  int min_log2_tile_rows;
+  int min_log2_tiles;
+  int max_tile_width_sb;
+  int max_tile_height_sb;
+  int uniform_tile_spacing_flag;
+  int log2_tile_cols;                        // only valid for uniform tiles
+  int log2_tile_rows;                        // only valid for uniform tiles
+  int tile_col_start_sb[MAX_TILE_COLS + 1];  // valid for 0 <= i <= tile_cols
+  int tile_row_start_sb[MAX_TILE_ROWS + 1];  // valid for 0 <= i <= tile_rows
+#if CONFIG_DEPENDENT_HORZTILES
+  int tile_row_independent[MAX_TILE_ROWS];  // valid for 0 <= i <  tile_rows
+#endif
+#else
+  int log2_tile_cols, log2_tile_rows;  // Used in non-large_scale_tile_coding.
+  int tile_width, tile_height;         // In MI units
+#endif  // CONFIG_MAX_TILE
+
 #if CONFIG_EXT_TILE
   unsigned int large_scale_tile;
   unsigned int single_tile_decoding;
@@ -407,15 +497,14 @@ typedef struct AV1Common {
   int mib_size;        // Size of the superblock in units of MI blocks
   int mib_size_log2;   // Log 2 of above.
 #if CONFIG_CDEF
-  int cdef_dering_damping;
-  int cdef_clpf_damping;
+  int cdef_pri_damping;
+  int cdef_sec_damping;
   int nb_cdef_strengths;
   int cdef_strengths[CDEF_MAX_STRENGTHS];
   int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
   int cdef_bits;
 #endif
 
-#if CONFIG_DELTA_Q
   int delta_q_present_flag;
   // Resolution of delta quant
   int delta_q_res;
@@ -423,29 +512,39 @@ typedef struct AV1Common {
   int delta_lf_present_flag;
   // Resolution of delta lf level
   int delta_lf_res;
-#endif
+#if CONFIG_LOOPFILTER_LEVEL
+  // This is a flag for number of deltas of loop filter level
+  // 0: use 1 delta, for y_vertical, y_horizontal, u, and v
+  // 1: use separate deltas for each filter level
+  int delta_lf_multi;
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
   int num_tg;
 #if CONFIG_REFERENCE_BUFFER
+  SequenceHeader seq_params;
   int current_frame_id;
   int ref_frame_id[REF_FRAMES];
   int valid_for_referencing[REF_FRAMES];
   int refresh_mask;
   int invalid_delta_frame_id_minus1;
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 #if CONFIG_ANS && ANS_MAX_SYMBOLS
   int ans_window_size_log2;
 #endif
-} AV1_COMMON;
-
-#if CONFIG_REFERENCE_BUFFER
-/* Initial version of sequence header structure */
-typedef struct SequenceHeader {
-  int frame_id_numbers_present_flag;
-  int frame_id_length_minus7;
-  int delta_frame_id_length_minus2;
-} SequenceHeader;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  NCOBMC_KERNELS ncobmc_kernels[ADAPT_OVERLAP_BLOCKS][ALL_NCOBMC_MODES];
+  uint8_t *ncobmcaw_buf[4];
+#endif
+#if CONFIG_LV_MAP
+  LV_MAP_CTX_TABLE coeff_ctx_table;
+#endif
+#if CONFIG_LPF_SB
+  int final_lpf_encode;
 #endif
+#if CONFIG_ADAPT_SCAN
+  int use_adapt_scan;
+#endif
+} AV1_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
 // frame reference count.
@@ -507,15 +606,57 @@ static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
   bufs[new_idx].ref_count++;
 }
 
+#if CONFIG_TEMPMV_SIGNALING
+// Returns 1 if this frame might use mvs from some previous frame. This
+// function doesn't consider whether prev_frame is actually suitable (see
+// frame_can_use_prev_frame_mvs for that)
+static INLINE int frame_might_use_prev_frame_mvs(const AV1_COMMON *cm) {
+  return !cm->error_resilient_mode && !cm->intra_only;
+}
+
+// Returns 1 if this frame really can use MVs from some previous frame.
+static INLINE int frame_can_use_prev_frame_mvs(const AV1_COMMON *cm) {
+  return (frame_might_use_prev_frame_mvs(cm) && cm->last_show_frame &&
+          cm->prev_frame && !cm->prev_frame->intra_only &&
+          cm->width == cm->prev_frame->width &&
+          cm->height == cm->prev_frame->height);
+}
+#endif
+
+static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) {
+  if (buf->mvs == NULL || buf->mi_rows < cm->mi_rows ||
+      buf->mi_cols < cm->mi_cols) {
+    aom_free(buf->mvs);
+    buf->mi_rows = cm->mi_rows;
+    buf->mi_cols = cm->mi_cols;
+#if CONFIG_TMV
+    CHECK_MEM_ERROR(cm, buf->mvs,
+                    (MV_REF *)aom_calloc(
+                        ((cm->mi_rows + 1) >> 1) * ((cm->mi_cols + 1) >> 1),
+                        sizeof(*buf->mvs)));
+#else
+    CHECK_MEM_ERROR(
+        cm, buf->mvs,
+        (MV_REF *)aom_calloc(cm->mi_rows * cm->mi_cols, sizeof(*buf->mvs)));
+#endif  // CONFIG_TMV
+
+#if CONFIG_MFMV
+    aom_free(buf->tpl_mvs);
+    CHECK_MEM_ERROR(
+        cm, buf->tpl_mvs,
+        (TPL_MV_REF *)aom_calloc((cm->mi_rows + MAX_MIB_SIZE) * cm->mi_stride,
+                                 sizeof(*buf->tpl_mvs)));
+#endif
+  }
+}
+
 #if CONFIG_VAR_REFS
 #define LAST_IS_VALID(cm) ((cm)->frame_refs[LAST_FRAME - 1].is_valid)
 #define LAST2_IS_VALID(cm) ((cm)->frame_refs[LAST2_FRAME - 1].is_valid)
 #define LAST3_IS_VALID(cm) ((cm)->frame_refs[LAST3_FRAME - 1].is_valid)
 #define GOLDEN_IS_VALID(cm) ((cm)->frame_refs[GOLDEN_FRAME - 1].is_valid)
 #define BWDREF_IS_VALID(cm) ((cm)->frame_refs[BWDREF_FRAME - 1].is_valid)
-#if CONFIG_ALTREF2
 #define ALTREF2_IS_VALID(cm) ((cm)->frame_refs[ALTREF2_FRAME - 1].is_valid)
-#endif  // CONFIG_ALTREF2
 #define ALTREF_IS_VALID(cm) ((cm)->frame_refs[ALTREF_FRAME - 1].is_valid)
 
 #define L_OR_L2(cm) (LAST_IS_VALID(cm) || LAST2_IS_VALID(cm))
@@ -526,10 +667,8 @@ static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
 #define L3_OR_G(cm) (LAST3_IS_VALID(cm) || GOLDEN_IS_VALID(cm))
 #define L3_AND_G(cm) (LAST3_IS_VALID(cm) && GOLDEN_IS_VALID(cm))
 
-#if CONFIG_ALTREF2
 #define BWD_OR_ALT2(cm) (BWDREF_IS_VALID(cm) || ALTREF2_IS_VALID(cm))
 #define BWD_AND_ALT2(cm) (BWDREF_IS_VALID(cm) && ALTREF2_IS_VALID(cm))
-#endif  // CONFIG_ALTREF2
 #define BWD_OR_ALT(cm) (BWDREF_IS_VALID(cm) || ALTREF_IS_VALID(cm))
 #define BWD_AND_ALT(cm) (BWDREF_IS_VALID(cm) && ALTREF_IS_VALID(cm))
 #endif  // CONFIG_VAR_REFS
@@ -546,6 +685,15 @@ static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) {
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
+#if CONFIG_CFL
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+static INLINE void cfl_clear_sub8x8_val(CFL_CTX *cfl) {
+  memset(cfl->sub8x8_val, 0, sizeof(cfl->sub8x8_val));
+}
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
+#endif  // CONFIG_CFL
+
 static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_PVQ
                                         tran_low_t *pvq_ref_coeff,
@@ -602,11 +750,12 @@ static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblockd_plane *const pd = &xd->plane[i];
 #if CONFIG_CHROMA_SUB8X8
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
-      // Offset the buffer pointer
-      if (pd->subsampling_y && (mi_row & 0x01)) row_offset = mi_row - 1;
-      if (pd->subsampling_x && (mi_col & 0x01)) col_offset = mi_col - 1;
-    }
+    // Offset the buffer pointer
+    const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+    if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
+      row_offset = mi_row - 1;
+    if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
+      col_offset = mi_col - 1;
 #endif
     int above_idx = col_offset << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
     int left_idx = (row_offset & MAX_MIB_MASK)
@@ -713,7 +862,14 @@ static INLINE aom_cdf_prob *get_y_mode_cdf(FRAME_CONTEXT *tile_ctx,
                                            int block) {
   const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, block);
   const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, block);
+
+#if CONFIG_KF_CTX
+  int above_ctx = intra_mode_context[above];
+  int left_ctx = intra_mode_context[left];
+  return tile_ctx->kf_y_cdf[above_ctx][left_ctx];
+#else
   return tile_ctx->kf_y_cdf[above][left];
+#endif
 }
 
 static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
@@ -796,14 +952,54 @@ static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
 }
 #endif
 
+static INLINE aom_cdf_prob cdf_element_prob(const aom_cdf_prob *cdf,
+                                            size_t element) {
+  assert(cdf != NULL);
+#if !CONFIG_ANS
+  return (element > 0 ? cdf[element - 1] : CDF_PROB_TOP) - cdf[element];
+#else
+  return cdf[element] - (element > 0 ? cdf[element - 1] : 0);
+#endif
+}
+
+static INLINE void partition_gather_horz_alike(aom_cdf_prob *out,
+                                               const aom_cdf_prob *const in) {
+  out[0] = CDF_PROB_TOP;
+  out[0] -= cdf_element_prob(in, PARTITION_HORZ);
+  out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
+#if CONFIG_EXT_PARTITION_TYPES
+  out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
+  out[0] -= cdf_element_prob(in, PARTITION_HORZ_B);
+  out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
+#endif
+  out[0] = AOM_ICDF(out[0]);
+  out[1] = AOM_ICDF(CDF_PROB_TOP);
+}
+
+static INLINE void partition_gather_vert_alike(aom_cdf_prob *out,
+                                               const aom_cdf_prob *const in) {
+  out[0] = CDF_PROB_TOP;
+  out[0] -= cdf_element_prob(in, PARTITION_VERT);
+  out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
+#if CONFIG_EXT_PARTITION_TYPES
+  out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
+  out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
+  out[0] -= cdf_element_prob(in, PARTITION_VERT_B);
+#endif
+  out[0] = AOM_ICDF(out[0]);
+  out[1] = AOM_ICDF(CDF_PROB_TOP);
+}
+
 #if CONFIG_EXT_PARTITION_TYPES
 static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
                                                 int mi_col, BLOCK_SIZE subsize,
                                                 BLOCK_SIZE bsize,
                                                 PARTITION_TYPE partition) {
   if (bsize >= BLOCK_8X8) {
+#if !CONFIG_EXT_PARTITION_TYPES_AB
     const int hbs = mi_size_wide[bsize] / 2;
     BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
     switch (partition) {
       case PARTITION_SPLIT:
         if (bsize != BLOCK_8X8) break;
@@ -814,6 +1010,30 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
       case PARTITION_VERT_4:
         update_partition_context(xd, mi_row, mi_col, subsize, bsize);
         break;
+#if CONFIG_EXT_PARTITION_TYPES_AB
+      case PARTITION_HORZ_A:
+        update_partition_context(xd, mi_row, mi_col,
+                                 get_subsize(bsize, PARTITION_HORZ_4), subsize);
+        update_partition_context(xd, mi_row + mi_size_high[bsize] / 2, mi_col,
+                                 subsize, subsize);
+        break;
+      case PARTITION_HORZ_B:
+        update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+        update_partition_context(xd, mi_row + mi_size_high[bsize] / 2, mi_col,
+                                 get_subsize(bsize, PARTITION_HORZ_4), subsize);
+        break;
+      case PARTITION_VERT_A:
+        update_partition_context(xd, mi_row, mi_col,
+                                 get_subsize(bsize, PARTITION_VERT_4), subsize);
+        update_partition_context(xd, mi_row, mi_col + mi_size_wide[bsize] / 2,
+                                 subsize, subsize);
+        break;
+      case PARTITION_VERT_B:
+        update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+        update_partition_context(xd, mi_row, mi_col + mi_size_wide[bsize] / 2,
+                                 get_subsize(bsize, PARTITION_VERT_4), subsize);
+        break;
+#else
       case PARTITION_HORZ_A:
         update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
         update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
@@ -830,6 +1050,7 @@ static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
         update_partition_context(xd, mi_row, mi_col, subsize, subsize);
         update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
         break;
+#endif
       default: assert(0 && "Invalid partition type");
     }
   }
@@ -842,7 +1063,6 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
                                           int has_rows, int has_cols,
 #endif
                                           BLOCK_SIZE bsize) {
-#if CONFIG_UNPOISON_PARTITION_CTX
   const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
   const PARTITION_CONTEXT *left_ctx =
       xd->left_seg_context + (mi_row & MAX_MIB_MASK);
@@ -853,6 +1073,7 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
   assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
   assert(bsl >= 0);
 
+#if CONFIG_UNPOISON_PARTITION_CTX
   if (has_rows && has_cols)
     return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
   else if (has_rows && !has_cols)
@@ -860,18 +1081,8 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
   else if (!has_rows && has_cols)
     return PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES + bsl;
   else
-    return PARTITION_CONTEXTS;  // Bogus context, forced SPLIT
+    return INVALID_PARTITION_CTX;  // Bogus context, forced SPLIT
 #else
-  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
-  const PARTITION_CONTEXT *left_ctx =
-      xd->left_seg_context + (mi_row & MAX_MIB_MASK);
-  // Minimum partition point is 8x8. Offset the bsl accordingly.
-  const int bsl = mi_width_log2_lookup[bsize] - mi_width_log2_lookup[BLOCK_8X8];
-  int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
-
-  assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
-  assert(bsl >= 0);
-
   return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
 #endif
 }
@@ -997,18 +1208,22 @@ static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
 }
 
 static INLINE TX_SIZE get_sqr_tx_size(int tx_dim) {
-  TX_SIZE tx_size;
   switch (tx_dim) {
 #if CONFIG_EXT_PARTITION
     case 128:
-#endif
+#endif  // CONFIG_EXT_PARTITION
     case 64:
-    case 32: tx_size = TX_32X32; break;
-    case 16: tx_size = TX_16X16; break;
-    case 8: tx_size = TX_8X8; break;
-    default: tx_size = TX_4X4;
+#if CONFIG_TX64X64
+      return TX_64X64;
+#else
+      return TX_32X32;
+#endif  // CONFIG_TX64X64
+      break;
+    case 32: return TX_32X32; break;
+    case 16: return TX_16X16; break;
+    case 8: return TX_8X8; break;
+    default: return TX_4X4;
   }
-  return tx_size;
 }
 
 static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
@@ -1035,49 +1250,114 @@ static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
 }
 #endif
 
+// Compute the next partition in the direction of the sb_type stored in the mi
+// array, starting with bsize.
 static INLINE PARTITION_TYPE get_partition(const AV1_COMMON *const cm,
                                            int mi_row, int mi_col,
                                            BLOCK_SIZE bsize) {
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
-    return PARTITION_INVALID;
-  } else {
-    const int offset = mi_row * cm->mi_stride + mi_col;
-    MODE_INFO **mi = cm->mi_grid_visible + offset;
-    const MB_MODE_INFO *const mbmi = &mi[0]->mbmi;
-    const int bsl = b_width_log2_lookup[bsize];
-    const PARTITION_TYPE partition = partition_lookup[bsl][mbmi->sb_type];
-#if !CONFIG_EXT_PARTITION_TYPES
-    return partition;
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return PARTITION_INVALID;
+
+  const int offset = mi_row * cm->mi_stride + mi_col;
+  MODE_INFO **mi = cm->mi_grid_visible + offset;
+  const BLOCK_SIZE subsize = mi[0]->mbmi.sb_type;
+
+  if (subsize == bsize) return PARTITION_NONE;
+
+  const int bhigh = mi_size_high[bsize];
+  const int bwide = mi_size_wide[bsize];
+  const int sshigh = mi_size_high[subsize];
+  const int sswide = mi_size_wide[subsize];
+
+#if CONFIG_EXT_PARTITION_TYPES
+  if (bsize > BLOCK_8X8 && mi_row + bwide / 2 < cm->mi_rows &&
+      mi_col + bhigh / 2 < cm->mi_cols) {
+    // In this case, the block might be using an extended partition
+    // type.
+    const MB_MODE_INFO *const mbmi_right = &mi[bwide / 2]->mbmi;
+    const MB_MODE_INFO *const mbmi_below = &mi[bhigh / 2 * cm->mi_stride]->mbmi;
+
+    if (sswide == bwide) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+      // Smaller height but same width. Is PARTITION_HORZ, PARTITION_HORZ_4,
+      // PARTITION_HORZ_A or PARTITION_HORZ_B.
+      if (sshigh * 2 == bhigh)
+        return (mbmi_below->sb_type == subsize) ? PARTITION_HORZ
+                                                : PARTITION_HORZ_B;
+      assert(sshigh * 4 == bhigh);
+      return (mbmi_below->sb_type == subsize) ? PARTITION_HORZ_4
+                                              : PARTITION_HORZ_A;
 #else
-    const int hbs = mi_size_wide[bsize] / 2;
+      // Smaller height but same width. Is PARTITION_HORZ_4, PARTITION_HORZ or
+      // PARTITION_HORZ_B. To distinguish the latter two, check if the lower
+      // half was split.
+      if (sshigh * 4 == bhigh) return PARTITION_HORZ_4;
+      assert(sshigh * 2 == bhigh);
+
+      if (mbmi_below->sb_type == subsize)
+        return PARTITION_HORZ;
+      else
+        return PARTITION_HORZ_B;
+#endif
+    } else if (sshigh == bhigh) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+      // Smaller width but same height. Is PARTITION_VERT, PARTITION_VERT_4,
+      // PARTITION_VERT_A or PARTITION_VERT_B.
+      if (sswide * 2 == bwide)
+        return (mbmi_right->sb_type == subsize) ? PARTITION_VERT
+                                                : PARTITION_VERT_B;
+      assert(sswide * 4 == bwide);
+      return (mbmi_right->sb_type == subsize) ? PARTITION_VERT_4
+                                              : PARTITION_VERT_A;
+#else
+      // Smaller width but same height. Is PARTITION_VERT_4, PARTITION_VERT or
+      // PARTITION_VERT_B. To distinguish the latter two, check if the right
+      // half was split.
+      if (sswide * 4 == bwide) return PARTITION_VERT_4;
+      assert(sswide * 2 == bhigh);
+
+      if (mbmi_right->sb_type == subsize)
+        return PARTITION_VERT;
+      else
+        return PARTITION_VERT_B;
+#endif
+    } else {
+#if !CONFIG_EXT_PARTITION_TYPES_AB
+      // Smaller width and smaller height. Might be PARTITION_SPLIT or could be
+      // PARTITION_HORZ_A or PARTITION_VERT_A. If subsize isn't halved in both
+      // dimensions, we immediately know this is a split (which will recurse to
+      // get to subsize). Otherwise look down and to the right. With
+      // PARTITION_VERT_A, the right block will have height bhigh; with
+      // PARTITION_HORZ_A, the lower block with have width bwide. Otherwise
+      // it's PARTITION_SPLIT.
+      if (sswide * 2 != bwide || sshigh * 2 != bhigh) return PARTITION_SPLIT;
+
+      if (mi_size_wide[mbmi_below->sb_type] == bwide) return PARTITION_HORZ_A;
+      if (mi_size_high[mbmi_right->sb_type] == bhigh) return PARTITION_VERT_A;
+#endif
 
-    assert(cm->mi_grid_visible[offset] == &cm->mi[offset]);
-
-    if (partition == PARTITION_HORZ_4 || partition == PARTITION_VERT_4)
-      return partition;
-
-    if (partition != PARTITION_NONE && bsize > BLOCK_8X8 &&
-        mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
-      const BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A);
-      const BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A);
-      const MB_MODE_INFO *const mbmi_right = &mi[hbs]->mbmi;
-      const MB_MODE_INFO *const mbmi_below = &mi[hbs * cm->mi_stride]->mbmi;
-      if (mbmi->sb_type == h) {
-        return mbmi_below->sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B;
-      } else if (mbmi->sb_type == v) {
-        return mbmi_right->sb_type == v ? PARTITION_VERT : PARTITION_VERT_B;
-      } else if (mbmi_below->sb_type == h) {
-        return PARTITION_HORZ_A;
-      } else if (mbmi_right->sb_type == v) {
-        return PARTITION_VERT_A;
-      } else {
-        return PARTITION_SPLIT;
-      }
+      return PARTITION_SPLIT;
     }
-
-    return partition;
-#endif  // !CONFIG_EXT_PARTITION_TYPES
   }
+#endif
+  const int vert_split = sswide < bwide;
+  const int horz_split = sshigh < bhigh;
+  const int split_idx = (vert_split << 1) | horz_split;
+  assert(split_idx != 0);
+
+  static const PARTITION_TYPE base_partitions[4] = {
+    PARTITION_INVALID, PARTITION_HORZ, PARTITION_VERT, PARTITION_SPLIT
+  };
+
+  return base_partitions[split_idx];
+}
+
+static INLINE void set_use_reference_buffer(AV1_COMMON *const cm, int use) {
+#if CONFIG_REFERENCE_BUFFER
+  cm->seq_params.frame_id_numbers_present_flag = use;
+#else
+  (void)cm;
+  (void)use;
+#endif
 }
 
 static INLINE void set_sb_size(AV1_COMMON *const cm, BLOCK_SIZE sb_size) {
@@ -1106,6 +1386,17 @@ static INLINE int all_lossless(const AV1_COMMON *cm, const MACROBLOCKD *xd) {
   return all_lossless;
 }
 
+static INLINE int use_compressed_header(const AV1_COMMON *cm) {
+  (void)cm;
+#if CONFIG_RESTRICT_COMPRESSED_HDR && CONFIG_NEW_MULTISYMBOL
+  return 0;
+#elif CONFIG_RESTRICT_COMPRESSED_HDR
+  return cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD;
+#else
+  return 1;
+#endif  // CONFIG_RESTRICT_COMPRESSED_HDR && CONFIG_NEW_MULTISYMBOL
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/pred_common.c b/third_party/aom/av1/common/pred_common.c
index 0417a67f8..51fd0389e 100644
--- a/third_party/aom/av1/common/pred_common.c
+++ b/third_party/aom/av1/common/pred_common.c
@@ -22,19 +22,16 @@
 static InterpFilter get_ref_filter_type(const MODE_INFO *mi,
                                         const MACROBLOCKD *xd, int dir,
                                         MV_REFERENCE_FRAME ref_frame) {
-  InterpFilter ref_type = SWITCHABLE_FILTERS;
   const MB_MODE_INFO *ref_mbmi = &mi->mbmi;
   int use_subpel[2] = {
     has_subpel_mv_component(mi, xd, dir),
     has_subpel_mv_component(mi, xd, dir + 2),
   };
 
-  if (ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0])
-    ref_type = ref_mbmi->interp_filter[(dir & 0x01)];
-  else if (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1])
-    ref_type = ref_mbmi->interp_filter[(dir & 0x01) + 2];
-
-  return ref_type;
+  return (((ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0]) ||
+           (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1]))
+              ? av1_extract_interp_filter(ref_mbmi->interp_filters, dir & 0x01)
+              : SWITCHABLE_FILTERS);
 }
 
 int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
@@ -79,13 +76,15 @@ int av1_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
   // left of the entries corresponding to real macroblocks.
   // The prediction flags in these dummy entries are initialized to 0.
   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int left_type = xd->left_available && is_inter_block(left_mbmi)
-                            ? left_mbmi->interp_filter
-                            : SWITCHABLE_FILTERS;
+  const int left_type =
+      xd->left_available && is_inter_block(left_mbmi)
+          ? av1_extract_interp_filter(left_mbmi->interp_filters, 0)
+          : SWITCHABLE_FILTERS;
   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const int above_type = xd->up_available && is_inter_block(above_mbmi)
-                             ? above_mbmi->interp_filter
-                             : SWITCHABLE_FILTERS;
+  const int above_type =
+      xd->up_available && is_inter_block(above_mbmi)
+          ? av1_extract_interp_filter(above_mbmi->interp_filters, 0)
+          : SWITCHABLE_FILTERS;
 
   if (left_type == above_type) {
     return left_type;
@@ -110,11 +109,7 @@ static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) {
   if (ref_mbmi->sb_type >= BLOCK_8X8) {
     const PREDICTION_MODE mode = ref_mbmi->mode;
     if (is_inter_block(ref_mbmi)) {
-#if CONFIG_DUAL_FILTER
-      switch (ref_mbmi->interp_filter[0]) {
-#else
-      switch (ref_mbmi->interp_filter) {
-#endif
+      switch (av1_extract_interp_filter(ref_mbmi->interp_filters, 0)) {
         case EIGHTTAP_REGULAR: ref_type = INTRA_FILTER_8TAP; break;
         case EIGHTTAP_SMOOTH: ref_type = INTRA_FILTER_8TAP_SMOOTH; break;
         case MULTITAP_SHARP: ref_type = INTRA_FILTER_8TAP_SHARP; break;
@@ -153,9 +148,14 @@ int av1_get_pred_context_intra_interp(const MACROBLOCKD *xd) {
 #endif  // CONFIG_INTRA_INTERP
 #endif  // CONFIG_EXT_INTRA
 
-#if CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
-int av1_get_palette_cache(const MODE_INFO *above_mi, const MODE_INFO *left_mi,
-                          int plane, uint16_t *cache) {
+#if CONFIG_PALETTE_DELTA_ENCODING
+int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
+                          uint16_t *cache) {
+  const int row = -xd->mb_to_top_edge >> 3;
+  // Do not refer to above SB row when on SB boundary.
+  const MODE_INFO *const above_mi =
+      (row % (1 << MIN_SB_SIZE_LOG2)) ? xd->above_mi : NULL;
+  const MODE_INFO *const left_mi = xd->left_mi;
   int above_n = 0, left_n = 0;
   if (above_mi)
     above_n = above_mi->mbmi.palette_mode_info.palette_size[plane != 0];
@@ -166,8 +166,9 @@ int av1_get_palette_cache(const MODE_INFO *above_mi, const MODE_INFO *left_mi,
   int left_idx = plane * PALETTE_MAX_SIZE;
   int n = 0;
   const uint16_t *above_colors =
-      above_mi->mbmi.palette_mode_info.palette_colors;
-  const uint16_t *left_colors = left_mi->mbmi.palette_mode_info.palette_colors;
+      above_mi ? above_mi->mbmi.palette_mode_info.palette_colors : NULL;
+  const uint16_t *left_colors =
+      left_mi ? left_mi->mbmi.palette_mode_info.palette_colors : NULL;
   // Merge the sorted lists of base colors from above and left to get
   // combined sorted color cache.
   while (above_n > 0 && left_n > 0) {
@@ -193,7 +194,7 @@ int av1_get_palette_cache(const MODE_INFO *above_mi, const MODE_INFO *left_mi,
   assert(n <= 2 * PALETTE_MAX_SIZE);
   return n;
 }
-#endif  // CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
+#endif  // CONFIG_PALETTE_DELTA_ENCODING
 
 // The mode info data structure has a one element border above and to the
 // left of the entries corresponding to real macroblocks.
@@ -219,7 +220,7 @@ int av1_get_intra_inter_context(const MACROBLOCKD *xd) {
   }
 }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 // The compound/single mode info data structure has one element border above and
 // to the left of the entries corresponding to real macroblocks.
 // The prediction flags in these dummy entries are initialized to 0.
@@ -253,7 +254,7 @@ int av1_get_inter_mode_context(const MACROBLOCKD *xd) {
     return 2;
   }
 }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if CONFIG_EXT_REFS
 #define CHECK_BACKWARD_REFS(ref_frame) \
@@ -314,8 +315,6 @@ int av1_get_reference_mode_context(const AV1_COMMON *cm,
 }
 
 #if CONFIG_EXT_COMP_REFS
-#define CHECK_BWDREF_OR_ALTREF(ref_frame) \
-  ((ref_frame) == BWDREF_FRAME || (ref_frame) == ALTREF_FRAME)
 // TODO(zoeliu): To try on the design of 3 contexts, instead of 5:
 //               COMP_REF_TYPE_CONTEXTS = 3
 int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
@@ -345,9 +344,9 @@ int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
       const MV_REFERENCE_FRAME frfl = left_mbmi->ref_frame[0];
 
       if (a_sg && l_sg) {  // single/single
-        pred_context = 1 +
-                       2 * (!(CHECK_BWDREF_OR_ALTREF(frfa) ^
-                              CHECK_BWDREF_OR_ALTREF(frfl)));
+        pred_context =
+            1 +
+            2 * (!(IS_BACKWARD_REF_FRAME(frfa) ^ IS_BACKWARD_REF_FRAME(frfl)));
       } else if (l_sg || a_sg) {  // single/comp
         const int uni_rfc =
             a_sg ? has_uni_comp_refs(left_mbmi) : has_uni_comp_refs(above_mbmi);
@@ -355,8 +354,8 @@ int av1_get_comp_reference_type_context(const MACROBLOCKD *xd) {
         if (!uni_rfc)  // comp bidir
           pred_context = 1;
         else  // comp unidir
-          pred_context = 3 + (!(CHECK_BWDREF_OR_ALTREF(frfa) ^
-                                CHECK_BWDREF_OR_ALTREF(frfl)));
+          pred_context = 3 + (!(IS_BACKWARD_REF_FRAME(frfa) ^
+                                IS_BACKWARD_REF_FRAME(frfl)));
       } else {  // comp/comp
         const int a_uni_rfc = has_uni_comp_refs(above_mbmi);
         const int l_uni_rfc = has_uni_comp_refs(left_mbmi);
@@ -580,12 +579,12 @@ int av1_get_pred_context_comp_ref_p(const AV1_COMMON *cm,
 // The mode info data structure has a one element border above and to the
 // left of the entries correpsonding to real macroblocks.
 // The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS  // No change to bitstream
+#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
   const int bwd_ref_sign_idx = 1;
 #else
   const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
 
   (void)cm;
@@ -690,12 +689,12 @@ int av1_get_pred_context_comp_ref_p1(const AV1_COMMON *cm,
 // The mode info data structure has a one element border above and to the
 // left of the entries correpsonding to real macroblocks.
 // The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS  // No change to bitstream
+#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
   const int bwd_ref_sign_idx = 1;
 #else
   const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif  //  CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#endif  //  CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
 
   (void)cm;
@@ -798,12 +797,11 @@ int av1_get_pred_context_comp_ref_p2(const AV1_COMMON *cm,
 // The mode info data structure has a one element border above and to the
 // left of the entries correpsonding to real macroblocks.
 // The prediction flags in these dummy entries are initialised to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS  // No change to bitstream
-  // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
+#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   const int bwd_ref_sign_idx = 1;
 #else
   const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
   const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
 
   (void)cm;
@@ -887,8 +885,6 @@ int av1_get_pred_context_comp_ref_p2(const AV1_COMMON *cm,
   return pred_context;
 }
 
-#if CONFIG_ALTREF2
-
 // Obtain contexts to signal a reference frame be either BWDREF/ALTREF2, or
 // ALTREF.
 int av1_get_pred_context_brfarf2_or_arf(const MACROBLOCKD *xd) {
@@ -989,132 +985,6 @@ int av1_get_pred_context_comp_bwdref_p1(const AV1_COMMON *cm,
   return av1_get_pred_context_brf_or_arf2(xd);
 }
 
-#else                                                  // !CONFIG_ALTREF2
-
-// Returns a context number for the given MB prediction signal
-int av1_get_pred_context_comp_bwdref_p(const AV1_COMMON *cm,
-                                       const MACROBLOCKD *xd) {
-  int pred_context;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int above_in_image = xd->up_available;
-  const int left_in_image = xd->left_available;
-
-// Note:
-// The mode info data structure has a one element border above and to the
-// left of the entries corresponding to real macroblocks.
-// The prediction flags in these dummy entries are initialized to 0.
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS  // No change to bitstream
-  // Code seems to assume that signbias of cm->comp_bwd_ref[0] is always 1
-  const int bwd_ref_sign_idx = 1;
-#else
-  const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
-#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
-  const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
-
-  (void)cm;
-
-  if (above_in_image && left_in_image) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-
-    if (above_intra && left_intra) {  // intra/intra (2)
-      pred_context = 2;
-    } else if (above_intra || left_intra) {  // intra/inter
-      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-
-      if (!has_second_ref(edge_mbmi))  // single pred (1/3)
-        pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
-      else  // comp pred (1/3)
-        pred_context =
-            1 +
-            2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
-    } else {  // inter/inter
-      const int l_comp = has_second_ref(left_mbmi);
-      const int a_comp = has_second_ref(above_mbmi);
-
-      const MV_REFERENCE_FRAME l_brf =
-          l_comp ? left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE_FRAME;
-      const MV_REFERENCE_FRAME a_brf =
-          a_comp ? above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE_FRAME;
-
-      const MV_REFERENCE_FRAME l_frf =
-          !l_comp ? left_mbmi->ref_frame[0]
-                  : left_mbmi->ref_frame[fwd_ref_sign_idx];
-      const MV_REFERENCE_FRAME a_frf =
-          !a_comp ? above_mbmi->ref_frame[0]
-                  : above_mbmi->ref_frame[fwd_ref_sign_idx];
-
-      if (l_comp && a_comp) {  // comp/comp
-        if (l_brf == a_brf && l_brf == cm->comp_bwd_ref[1]) {
-          pred_context = 0;
-        } else if (l_brf == cm->comp_bwd_ref[1] ||
-                   a_brf == cm->comp_bwd_ref[1]) {
-          pred_context = 1;
-        } else {
-// NOTE: Backward ref should be either BWDREF or ALTREF.
-#if !USE_UNI_COMP_REFS
-          // TODO(zoeliu): To further study the UNIDIR scenario
-          assert(l_brf == a_brf && l_brf != cm->comp_bwd_ref[1]);
-#endif  // !USE_UNI_COMP_REFS
-          pred_context = 3;
-        }
-      } else if (!l_comp && !a_comp) {  // single/single
-        if (l_frf == a_frf && l_frf == cm->comp_bwd_ref[1]) {
-          pred_context = 0;
-        } else if (l_frf == cm->comp_bwd_ref[1] ||
-                   a_frf == cm->comp_bwd_ref[1]) {
-          pred_context = 1;
-        } else if (l_frf == a_frf) {
-          pred_context = 3;
-        } else {
-#if !USE_UNI_COMP_REFS
-          // TODO(zoeliu): To further study the UNIDIR scenario
-          assert(l_frf != a_frf && l_frf != cm->comp_bwd_ref[1] &&
-                 a_frf != cm->comp_bwd_ref[1]);
-#endif  // !USE_UNI_COMP_REFS
-          pred_context = 4;
-        }
-      } else {  // comp/single
-        assert((l_comp && !a_comp) || (!l_comp && a_comp));
-
-        if ((l_comp && l_brf == cm->comp_bwd_ref[1] &&
-             a_frf == cm->comp_bwd_ref[1]) ||
-            (a_comp && a_brf == cm->comp_bwd_ref[1] &&
-             l_frf == cm->comp_bwd_ref[1])) {
-          pred_context = 1;
-        } else if ((l_comp && l_brf == cm->comp_bwd_ref[1]) ||
-                   (a_comp && a_brf == cm->comp_bwd_ref[1]) ||
-                   (!l_comp && l_frf == cm->comp_bwd_ref[1]) ||
-                   (!a_comp && a_frf == cm->comp_bwd_ref[1])) {
-          pred_context = 2;
-        } else {
-          pred_context = 4;
-        }
-      }
-    }
-  } else if (above_in_image || left_in_image) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
-
-    if (!is_inter_block(edge_mbmi)) {
-      pred_context = 2;
-    } else {
-      if (has_second_ref(edge_mbmi)) {
-        pred_context =
-            4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
-      } else {
-        pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
-      }
-    }
-  } else {  // no edges available (2)
-    pred_context = 2;
-  }
-  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-
-  return pred_context;
-}
-#endif  // CONFIG_ALTREF2
-
 #else  // !CONFIG_EXT_REFS
 
 // Returns a context number for the given MB prediction signal
@@ -1270,96 +1140,7 @@ int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
 // non-ALTREF backward reference frame, knowing that it shall be either of
 // these 2 choices.
 int av1_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
-#if CONFIG_ALTREF2
   return av1_get_pred_context_brfarf2_or_arf(xd);
-#else   // !CONFIG_ALTREF2
-  int pred_context;
-  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
-  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
-  const int has_above = xd->up_available;
-  const int has_left = xd->left_available;
-
-  // Note:
-  // The mode info data structure has a one element border above and to the
-  // left of the entries correpsonding to real macroblocks.
-  // The prediction flags in these dummy entries are initialised to 0.
-  if (has_above && has_left) {  // both edges available
-    const int above_intra = !is_inter_block(above_mbmi);
-    const int left_intra = !is_inter_block(left_mbmi);
-
-    if (above_intra && left_intra) {  // intra/intra
-      pred_context = 2;
-    } else if (above_intra || left_intra) {  // intra/inter or inter/intra
-      const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
-      if (!has_second_ref(edge_mbmi)) {  // single
-        if (!CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]))
-          pred_context = 3;
-        else
-          pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
-      } else {  // comp
-        pred_context = 1 +
-                       2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
-                            edge_mbmi->ref_frame[1] == BWDREF_FRAME);
-      }
-    } else {  // inter/inter
-      const int above_has_second = has_second_ref(above_mbmi);
-      const int left_has_second = has_second_ref(left_mbmi);
-      const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
-      const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
-      const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
-
-      if (above_has_second && left_has_second) {  // comp/comp
-        if (above0 == left0 && above1 == left1)
-          pred_context =
-              3 * (above0 == BWDREF_FRAME || above1 == BWDREF_FRAME ||
-                   left0 == BWDREF_FRAME || left1 == BWDREF_FRAME);
-        else
-          pred_context = 2;
-      } else if (above_has_second || left_has_second) {  // single/comp
-        const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
-        const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
-        const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
-
-        if (rfs == BWDREF_FRAME)
-          pred_context = 3 + (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
-        else if (rfs == ALTREF_FRAME)
-          pred_context = (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
-        else
-          pred_context = 1 + 2 * (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
-      } else {  // single/single
-        if (!CHECK_BACKWARD_REFS(above0) && !CHECK_BACKWARD_REFS(left0)) {
-          pred_context = 2 + (above0 == left0);
-        } else if (!CHECK_BACKWARD_REFS(above0) ||
-                   !CHECK_BACKWARD_REFS(left0)) {
-          const MV_REFERENCE_FRAME edge0 =
-              !CHECK_BACKWARD_REFS(above0) ? left0 : above0;
-          pred_context = 4 * (edge0 == BWDREF_FRAME);
-        } else {
-          pred_context =
-              2 * (above0 == BWDREF_FRAME) + 2 * (left0 == BWDREF_FRAME);
-        }
-      }
-    }
-  } else if (has_above || has_left) {  // one edge available
-    const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
-
-    if (!is_inter_block(edge_mbmi) ||
-        (!CHECK_BACKWARD_REFS(edge_mbmi->ref_frame[0]) &&
-         !has_second_ref(edge_mbmi)))
-      pred_context = 2;
-    else if (!has_second_ref(edge_mbmi))  // single
-      pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
-    else  // comp
-      pred_context = 3 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
-                          edge_mbmi->ref_frame[1] == BWDREF_FRAME);
-  } else {  // no edges available (2)
-    pred_context = 2;
-  }
-
-  assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
-  return pred_context;
-#endif  // CONFIG_ALTREF2
 }
 
 // For the bit to signal whether the single reference is LAST3/GOLDEN or
@@ -1640,13 +1421,11 @@ int av1_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
   return pred_context;
 }
 
-#if CONFIG_ALTREF2
 // For the bit to signal whether the single reference is ALTREF2_FRAME or
 // BWDREF_FRAME, knowing that it shall be either of these 2 choices.
 int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd) {
   return av1_get_pred_context_brf_or_arf2(xd);
 }
-#endif  // CONFIG_ALTREF2
 
 #else  // !CONFIG_EXT_REFS
 
diff --git a/third_party/aom/av1/common/pred_common.h b/third_party/aom/av1/common/pred_common.h
index aab026eeb..db4618a59 100644
--- a/third_party/aom/av1/common/pred_common.h
+++ b/third_party/aom/av1/common/pred_common.h
@@ -86,14 +86,14 @@ int av1_get_pred_context_intra_interp(const MACROBLOCKD *xd);
 #endif  // CONFIG_INTRA_INTERP
 #endif  // CONFIG_EXT_INTRA
 
-#if CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
+#if CONFIG_PALETTE_DELTA_ENCODING
 // Get a list of palette base colors that are used in the above and left blocks,
 // referred to as "color cache". The return value is the number of colors in the
 // cache (<= 2 * PALETTE_MAX_SIZE). The color values are stored in "cache"
 // in ascending order.
-int av1_get_palette_cache(const MODE_INFO *above_mi, const MODE_INFO *left_mi,
-                          int plane, uint16_t *cache);
-#endif  // CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
+int av1_get_palette_cache(const MACROBLOCKD *const xd, int plane,
+                          uint16_t *cache);
+#endif  // CONFIG_PALETTE_DELTA_ENCODING
 
 int av1_get_intra_inter_context(const MACROBLOCKD *xd);
 
@@ -243,17 +243,22 @@ static INLINE aom_prob av1_get_pred_prob_comp_bwdref_p(const AV1_COMMON *cm,
   return cm->fc->comp_bwdref_prob[pred_context][0];
 }
 
-#if CONFIG_ALTREF2
-// TODO(zoeliu): ALTREF2 to work with NEW_MULTISYMBOL
 int av1_get_pred_context_comp_bwdref_p1(const AV1_COMMON *cm,
                                         const MACROBLOCKD *xd);
 
+#if CONFIG_NEW_MULTISYMBOL
+static INLINE aom_cdf_prob *av1_get_pred_cdf_comp_bwdref_p1(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd) {
+  const int pred_context = av1_get_pred_context_comp_bwdref_p1(cm, xd);
+  return xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
+}
+#endif  // CONFIG_NEW_MULTISYMBOL
+
 static INLINE aom_prob av1_get_pred_prob_comp_bwdref_p1(const AV1_COMMON *cm,
                                                         const MACROBLOCKD *xd) {
   const int pred_context = av1_get_pred_context_comp_bwdref_p1(cm, xd);
   return cm->fc->comp_bwdref_prob[pred_context][1];
 }
-#endif  // CONFIG_ALTREF2
 #endif  // CONFIG_EXT_REFS
 
 int av1_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
@@ -292,14 +297,12 @@ static INLINE aom_prob av1_get_pred_prob_single_ref_p5(const AV1_COMMON *cm,
   return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p5(xd)][4];
 }
 
-#if CONFIG_ALTREF2
 int av1_get_pred_context_single_ref_p6(const MACROBLOCKD *xd);
 
 static INLINE aom_prob av1_get_pred_prob_single_ref_p6(const AV1_COMMON *cm,
                                                        const MACROBLOCKD *xd) {
   return cm->fc->single_ref_prob[av1_get_pred_context_single_ref_p6(xd)][5];
 }
-#endif  // CONFIG_ALTREF2
 #endif  // CONFIG_EXT_REFS
 
 #if CONFIG_NEW_MULTISYMBOL
@@ -334,17 +337,23 @@ static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p5(
   return xd->tile_ctx
       ->single_ref_cdf[av1_get_pred_context_single_ref_p5(xd)][4];
 }
+static INLINE aom_cdf_prob *av1_get_pred_cdf_single_ref_p6(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd) {
+  (void)cm;
+  return xd->tile_ctx
+      ->single_ref_cdf[av1_get_pred_context_single_ref_p6(xd)][5];
+}
 #endif  // CONFIG_EXT_REFS
 #endif  // CONFIG_NEW_MULTISYMBOL
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 int av1_get_inter_mode_context(const MACROBLOCKD *xd);
 
 static INLINE aom_prob av1_get_inter_mode_prob(const AV1_COMMON *cm,
                                                const MACROBLOCKD *xd) {
   return cm->fc->comp_inter_mode_prob[av1_get_inter_mode_context(xd)];
 }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 // Returns a context number for the given MB prediction signal
 // The mode info data structure has a one element border above and to the
diff --git a/third_party/aom/av1/common/pvq.c b/third_party/aom/av1/common/pvq.c
index 75fe761d7..221c90c04 100644
--- a/third_party/aom/av1/common/pvq.c
+++ b/third_party/aom/av1/common/pvq.c
@@ -591,7 +591,7 @@ static int32_t od_pow(int32_t x, od_val16 beta)
   /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in
      Q(OD_LOG2_OUTSHIFT).*/
   logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE;
-  logr = OD_MULT16_32_QBETA(beta, logr);
+  logr = (od_val32)OD_MULT16_32_QBETA(beta, logr);
   return od_exp2(logr);
 }
 #endif
@@ -974,7 +974,7 @@ void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
       od_val32 x;
       /* This multiply doesn't round, so it introduces some bias.
          It would be nice (but not critical) to fix this. */
-      x = OD_MULT16_32_Q16(ypulse[i], scale);
+      x = (od_val32)OD_MULT16_32_Q16(ypulse[i], scale);
 #if defined(OD_FLOAT_PVQ)
       xcoeff[i] = (od_coeff)floor(.5
        + x*(qm_inv[i]*OD_QM_INV_SCALE_1));
diff --git a/third_party/aom/av1/common/pvq.h b/third_party/aom/av1/common/pvq.h
index 17e54d4c5..4adf22f02 100644
--- a/third_party/aom/av1/common/pvq.h
+++ b/third_party/aom/av1/common/pvq.h
@@ -19,11 +19,7 @@
 extern const uint16_t EXP_CDF_TABLE[][16];
 extern const uint16_t LAPLACE_OFFSET[];
 
-#if CONFIG_DAALA_DIST
-#define AV1_PVQ_ENABLE_ACTIVITY_MASKING (1)
-#else
 #define AV1_PVQ_ENABLE_ACTIVITY_MASKING (0)
-#endif
 
 # define PVQ_MAX_PARTITIONS (1 + 3*(OD_TXSIZES-1))
 
diff --git a/third_party/aom/av1/common/quant_common.c b/third_party/aom/av1/common/quant_common.c
index 5210ba812..ea7140cdc 100644
--- a/third_party/aom/av1/common/quant_common.c
+++ b/third_party/aom/av1/common/quant_common.c
@@ -360,21 +360,28 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
 static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE];
 
 void aom_qm_init(AV1_COMMON *cm) {
-  int q, c, f, t, size;
+  int q, c, f, t;
   int current;
   for (q = 0; q < NUM_QM_LEVELS; ++q) {
     for (c = 0; c < 2; ++c) {
       for (f = 0; f < 2; ++f) {
         current = 0;
         for (t = 0; t < TX_SIZES_ALL; ++t) {
-          size = tx_size_2d[t];
-          cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
-              NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
-                                                   [current];
-          cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
-              NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
+          const int size = tx_size_2d[t];
+          // Don't use QM for sizes > 32x32
+          if (q == NUM_QM_LEVELS - 1 || size > 1024) {
+            cm->gqmatrix[q][c][f][t] = NULL;
+            cm->giqmatrix[q][c][f][t] = NULL;
+          } else {
+            assert(current + size <= QM_TOTAL_SIZE);
+            cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
+                NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
                                                      [current];
-          current += size;
+            cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
+                NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
+                                                       [current];
+            current += size;
+          }
         }
       }
     }
@@ -14039,7 +14046,7 @@ static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][QM_TOTAL_SIZE] = {
 };
 #endif
 
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ
 /* Quantization matrices for 8x8. For other block sizes, we currently just do
    resampling. */
 /* Flat quantization, i.e. optimize for PSNR. */
diff --git a/third_party/aom/av1/common/quant_common.h b/third_party/aom/av1/common/quant_common.h
index f28ffe7ac..92843fe4d 100644
--- a/third_party/aom/av1/common/quant_common.h
+++ b/third_party/aom/av1/common/quant_common.h
@@ -48,9 +48,7 @@ int av1_get_qindex(const struct segmentation *seg, int segment_id,
 // Reduce the large number of quantizers to a smaller number of levels for which
 // different matrices may be defined
 static INLINE int aom_get_qmlevel(int qindex, int first, int last) {
-  int qmlevel = (qindex * (last + 1 - first) + QINDEX_RANGE / 2) / QINDEX_RANGE;
-  qmlevel = AOMMIN(qmlevel + first, NUM_QM_LEVELS - 1);
-  return qmlevel;
+  return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
 }
 void aom_qm_init(struct AV1Common *cm);
 qm_val_t *aom_iqmatrix(struct AV1Common *cm, int qindex, int comp,
@@ -99,7 +97,7 @@ static INLINE int get_dq_profile_from_ctx(int qindex, int q_ctx, int is_inter,
 }
 #endif  // CONFIG_NEW_QUANT
 
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
+#if CONFIG_PVQ
 extern const int OD_QM8_Q4_FLAT[];
 extern const int OD_QM8_Q4_HVS[];
 #endif
diff --git a/third_party/aom/av1/common/reconinter.c b/third_party/aom/av1/common/reconinter.c
index a1b5c1f67..d7e39b45c 100644
--- a/third_party/aom/av1/common/reconinter.c
+++ b/third_party/aom/av1/common/reconinter.c
@@ -10,6 +10,8 @@
  */
 
 #include <assert.h>
+#include <stdio.h>
+#include <limits.h>
 
 #include "./aom_scale_rtcd.h"
 #include "./aom_dsp_rtcd.h"
@@ -23,9 +25,186 @@
 #include "av1/common/reconintra.h"
 #if CONFIG_MOTION_VAR
 #include "av1/common/onyxc_int.h"
+#include "av1/common/obmc.h"
 #endif  // CONFIG_MOTION_VAR
 
-#if CONFIG_EXT_INTER
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+// This function will determine whether or not to create a warped
+// prediction and return the appropriate motion model depending
+// on the configuration. Behavior will change with different
+// combinations of GLOBAL_MOTION, WARPED_MOTION and MOTION_VAR.
+static INLINE int allow_warp(const MODE_INFO *const mi,
+                             const WarpTypesAllowed *const warp_types,
+#if CONFIG_GLOBAL_MOTION
+                             const WarpedMotionParams *const gm_params,
+#endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_MOTION_VAR
+                             int build_for_obmc,
+#endif  // CONFIG_MOTION_VAR
+                             WarpedMotionParams *final_warp_params) {
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+  *final_warp_params = default_warp_params;
+
+// Only global motion configured
+#if CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
+  (void)mbmi;
+  if (warp_types->global_warp_allowed) {
+    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
+    return 1;
+  }
+#endif  // CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
+
+// Only warped motion configured
+#if CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
+  if (warp_types->local_warp_allowed) {
+    memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
+    return 1;
+  }
+#endif  // CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
+
+// Warped and global motion configured
+#if CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
+  // When both are enabled, warped will take priority. The global parameters
+  // will only be used to compute projection samples to find the warped model.
+  // Note that when a block chooses global, it will not be possible to
+  // select WARPED_CAUSAL.
+  if (warp_types->local_warp_allowed) {
+    memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
+    return 1;
+  } else if (warp_types->global_warp_allowed) {
+    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
+    return 1;
+  }
+#endif  // CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
+
+// Motion var and global motion configured
+#if CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
+  // We warp if either case is true:
+  //   1.) We are predicting a block which uses global motion
+  //   2.) We are predicting a neighboring block of a block using OBMC,
+  //       the neighboring block uses global motion, and we have enabled
+  //       WARP_GM_NEIGHBORS_WITH_OBMC
+  (void)mbmi;
+  if (warp_types->global_warp_allowed &&
+      (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
+    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
+    return 1;
+  }
+#endif  // CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
+
+// Motion var and warped motion configured
+#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
+  // We warp if either case is true:
+  //   1.) We are predicting a block with motion mode WARPED_CAUSAL
+  //   2.) We are predicting a neighboring block of a block using OBMC,
+  //       the neighboring block has mode WARPED_CAUSAL, and we have enabled
+  //       WARP_WM_NEIGHBORS_WITH_OBMC
+  if (warp_types->local_warp_allowed) {
+    if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
+      memcpy(final_warp_params, &mbmi->wm_params[0],
+             sizeof(*final_warp_params));
+      return 1;
+    }
+  }
+#endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
+
+// Motion var, warped motion and global motion all configured
+#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
+  if (warp_types->local_warp_allowed) {
+    if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
+      memcpy(final_warp_params, &mbmi->wm_params[0],
+             sizeof(*final_warp_params));
+      return 1;
+    }
+  } else if (warp_types->global_warp_allowed &&
+             (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
+    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
+    return 1;
+  }
+#endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
+
+  return 0;
+}
+#endif  // CONFIG_GLOBAL_MOTION ||CONFIG_WARPED_MOTION
+
+static INLINE void av1_make_inter_predictor(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
+    int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
+#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+    const WarpTypesAllowed *warp_types, int p_col, int p_row, int plane,
+    int ref,
+#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR
+    const MODE_INFO *mi, int build_for_obmc,
+#endif
+    int xs, int ys, const MACROBLOCKD *xd) {
+  (void)xd;
+
+#if !CONFIG_MOTION_VAR
+  const MODE_INFO *mi = xd->mi[0];
+  (void)mi;
+#endif  // CONFIG_MOTION_VAR
+
+// Make sure the selected motion mode is valid for this configuration
+#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+  assert_motion_mode_valid(mi->mbmi.motion_mode,
+#if CONFIG_GLOBAL_MOTION
+                           0, xd->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+                           xd,
+#endif
+                           mi);
+#endif  // CONFIG MOTION_VAR || CONFIG_WARPED_MOTION
+
+#if CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
+  WarpedMotionParams final_warp_params;
+  const int do_warp = allow_warp(
+      mi, warp_types,
+#if CONFIG_GLOBAL_MOTION
+#if CONFIG_COMPOUND_SINGLEREF
+      // TODO(zoeliu): To further check the single
+      // ref comp mode to work together with
+      //               global motion.
+      has_second_ref(&mi->mbmi) ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
+                                : &xd->global_motion[mi->mbmi.ref_frame[0]],
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+      &xd->global_motion[mi->mbmi.ref_frame[ref]],
+#endif  // CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_MOTION_VAR
+      build_for_obmc,
+#endif  // CONFIG_MOTION_VAR
+      &final_warp_params);
+  if (do_warp
+#if CONFIG_AMVR
+      && xd->cur_frame_mv_precision_level == 0
+#endif
+      ) {
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    const struct buf_2d *const pre_buf = &pd->pre[ref];
+    av1_warp_plane(&final_warp_params,
+#if CONFIG_HIGHBITDEPTH
+                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif  // CONFIG_HIGHBITDEPTH
+                   pre_buf->buf0, pre_buf->width, pre_buf->height,
+                   pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
+                   pd->subsampling_x, pd->subsampling_y, xs, ys, conv_params);
+    return;
+  }
+#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                           sf, w, h, conv_params, interp_filters, xs, ys,
+                           xd->bd);
+    return;
+  }
+#endif  // CONFIG_HIGHBITDEPTH
+  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+                  h, conv_params, interp_filters, xs, ys);
+}
 
 #define NSMOOTHERS 1
 
@@ -183,12 +362,6 @@ const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
     wedge_masks[BLOCK_32X16] },
   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
     wedge_masks[BLOCK_32X32] },
-  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
-    wedge_masks[BLOCK_32X64] },
-  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
-    wedge_masks[BLOCK_64X32] },
-  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
-    wedge_masks[BLOCK_64X64] },
 #else
   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
     wedge_masks[BLOCK_8X8] },
@@ -204,26 +377,40 @@ const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
     wedge_masks[BLOCK_32X16] },
   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
     wedge_masks[BLOCK_32X32] },
-  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
-    wedge_masks[BLOCK_32X64] },
-  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
-    wedge_masks[BLOCK_64X32] },
-  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
-    wedge_masks[BLOCK_64X64] },
 #endif  // CONFIG_WEDGE
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
 #if CONFIG_EXT_PARTITION
   { 0, NULL, NULL, 0, NULL },
   { 0, NULL, NULL, 0, NULL },
   { 0, NULL, NULL, 0, NULL },
 #endif  // CONFIG_EXT_PARTITION
-  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
+#if CONFIG_WEDGE
+  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
     wedge_masks[BLOCK_4X16] },
-  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
+  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
     wedge_masks[BLOCK_16X4] },
   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
     wedge_masks[BLOCK_8X32] },
   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
     wedge_masks[BLOCK_32X8] },
+#else
+  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
+    wedge_masks[BLOCK_4X16] },
+  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
+    wedge_masks[BLOCK_16X4] },
+  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
+    wedge_masks[BLOCK_8X32] },
+  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
+    wedge_masks[BLOCK_32X8] },
+#endif  // CONFIG_WEDGE
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+#if CONFIG_EXT_PARTITION
+  { 0, NULL, NULL, 0, NULL },
+  { 0, NULL, NULL, 0, NULL },
+#endif  // CONFIG_EXT_PARTITION
 };
 
 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
@@ -457,12 +644,12 @@ void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                     BLOCK_SIZE sb_type, int h, int w, int bd) {
   switch (mask_type) {
     case DIFFWTD_38:
-      diffwtd_mask_highbd(mask, 0, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
+      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
                           CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                           bd);
       break;
     case DIFFWTD_38_INV:
-      diffwtd_mask_highbd(mask, 1, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
+      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
                           CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                           bd);
       break;
@@ -754,26 +941,19 @@ static void build_masked_compound_highbd(
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_SUPERTX
 
-void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
-                                     uint8_t *dst, int dst_stride,
-                                     const int subpel_x, const int subpel_y,
-                                     const struct scale_factors *sf, int w,
-                                     int h, ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                                     const InterpFilter *interp_filter,
-#else
-                                     const InterpFilter interp_filter,
-#endif
-                                     int xs, int ys,
+void av1_make_masked_inter_predictor(
+    const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
+    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
+    int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
+    int xs, int ys,
 #if CONFIG_SUPERTX
-                                     int wedge_offset_x, int wedge_offset_y,
+    int wedge_offset_x, int wedge_offset_y,
 #endif  // CONFIG_SUPERTX
-                                     int plane,
+    int plane,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-                                     const WarpTypesAllowed *warp_types,
-                                     int p_col, int p_row, int ref,
+    const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-                                     MACROBLOCKD *xd) {
+    MACROBLOCKD *xd) {
   const MODE_INFO *mi = xd->mi[0];
 
   const INTERINTER_COMPOUND_DATA comp_data = {
@@ -788,52 +968,81 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
     mi->mbmi.interinter_compound_type
   };
 
+// We're going to call av1_make_inter_predictor to generate a prediction into
+// a temporary buffer, then will blend that temporary buffer with that from
+// the other reference.
+//
+// With CONFIG_CONVOLVE_ROUND, if the rounding mode is CONVOLVE_OPT_NO_ROUND
+// then the predictions are at 32-bits, so we'll need 32 bits per
+// pixel. Otherwise, we'll need up to 16 bits per pixel if
+// CONFIG_HIGHBITDEPTH or just 8 otherwise.
+#if CONFIG_CONVOLVE_ROUND
+#define INTER_PRED_BYTES_PER_PIXEL 4
+#elif CONFIG_HIGHBITDEPTH
+#define INTER_PRED_BYTES_PER_PIXEL 2
+#else
+#define INTER_PRED_BYTES_PER_PIXEL 1
+#endif
+  DECLARE_ALIGNED(16, uint8_t,
+                  tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
+#undef INTER_PRED_BYTES_PER_PIXEL
+
 #if CONFIG_HIGHBITDEPTH
+  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+                         ? CONVERT_TO_BYTEPTR(tmp_buf)
+                         : tmp_buf;
+  const int bd = xd->bd;
+#else
+  uint8_t *tmp_dst = tmp_buf;
+  const int bd = 8;
+#endif
+
 #if CONFIG_CONVOLVE_ROUND
-  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
-  int tmp_dst2_stride = MAX_SB_SIZE;
+  const int tmp_buf_stride = MAX_SB_SIZE;
+  const int is_conv_no_round = conv_params->round == CONVOLVE_OPT_NO_ROUND;
   CONV_BUF_TYPE *org_dst = conv_params->dst;
   int org_dst_stride = conv_params->dst_stride;
-  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-    memset(tmp_dst2, 0, sizeof(tmp_dst2));
-    conv_params->dst = tmp_dst2;
-    conv_params->dst_stride = tmp_dst2_stride;
-    // mask compound has its own average mechanism
-    conv_params->do_average = 0;
+  CONV_BUF_TYPE *tmp_buf32 = (CONV_BUF_TYPE *)tmp_buf;
+  if (is_conv_no_round) {
+    conv_params->dst = tmp_buf32;
+    conv_params->dst_stride = tmp_buf_stride;
+    assert(conv_params->do_average == 0);
   }
 #endif  // CONFIG_CONVOLVE_ROUND
-  DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
-  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-                         ? CONVERT_TO_BYTEPTR(tmp_dst_)
-                         : tmp_dst_;
+
+  // This will generate a prediction in tmp_buf for the second reference
   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
-                           subpel_y, sf, w, h, conv_params, interp_filter,
+                           subpel_y, sf, w, h, conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                            warp_types, p_col, p_row, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-                           0, 0,
+                           mi, 0,
 #endif
                            xs, ys, xd);
+
 #if CONFIG_COMPOUND_SEGMENT
   if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
 #if CONFIG_CONVOLVE_ROUND
-    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-      build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type,
-                                  org_dst, org_dst_stride, tmp_dst2,
-                                  tmp_dst2_stride, mi->mbmi.sb_type, h, w,
-                                  conv_params, xd->bd);
+    if (is_conv_no_round) {
+      build_compound_seg_mask_d32(
+          comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
+          tmp_buf32, tmp_buf_stride, mi->mbmi.sb_type, h, w, conv_params, bd);
     } else {
 #endif  // CONFIG_CONVOLVE_ROUND
+#if CONFIG_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
                                        dst, dst_stride, tmp_dst, MAX_SB_SIZE,
-                                       mi->mbmi.sb_type, h, w, xd->bd);
+                                       mi->mbmi.sb_type, h, w, bd);
       } else {
+#endif
         build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                                 dst_stride, tmp_dst, MAX_SB_SIZE,
                                 mi->mbmi.sb_type, h, w);
+#if CONFIG_HIGHBITDEPTH
       }
+#endif
 #if CONFIG_CONVOLVE_ROUND
     }
 #endif
@@ -841,116 +1050,56 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
 #endif  // CONFIG_COMPOUND_SEGMENT
 
 #if CONFIG_SUPERTX
+#if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     build_masked_compound_wedge_extend_highbd(
         dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
         mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
   else
+#endif  // CONFIG_HIGHBITDEPTH
     build_masked_compound_wedge_extend(
         dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
         mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
 #else
 #if CONFIG_CONVOLVE_ROUND
-  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+  if (is_conv_no_round) {
     build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
-                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
+                                   org_dst_stride, tmp_buf32, tmp_buf_stride,
                                    &comp_data, mi->mbmi.sb_type, h, w);
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-      av1_highbd_convolve_rounding(
-          org_dst, org_dst_stride, dst, dst_stride, w, h,
-          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1,
-          xd->bd);
-    } else {
-      av1_convolve_rounding(
-          org_dst, org_dst_stride, dst, dst_stride, w, h,
-          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
-    }
+
+    const int convolve_rounding_bits =
+        FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+#if CONFIG_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      av1_highbd_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w,
+                                   h, convolve_rounding_bits, xd->bd);
+    else
+#endif
+      av1_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w, h,
+                            convolve_rounding_bits);
+
     conv_params->do_post_rounding = 0;
   } else {
 #endif  // CONFIG_CONVOLVE_ROUND
-    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+
+#if CONFIG_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
       build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
                                    MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
                                    w, xd->bd);
-    } else {
+    else
+#endif  // CONFIG_HIGHBITDEPTH
       build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                             MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
-    }
 #if CONFIG_CONVOLVE_ROUND
   }
 #endif  // CONFIG_CONVOLVE_ROUND
 #endif  // CONFIG_SUPERTX
 
-#else  // CONFIG_HIGHBITDEPTH
-
-#if CONFIG_CONVOLVE_ROUND
-  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
-  int tmp_dst2_stride = MAX_SB_SIZE;
-  CONV_BUF_TYPE *org_dst = conv_params->dst;
-  int org_dst_stride = conv_params->dst_stride;
-  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-    memset(tmp_dst2, 0, sizeof(tmp_dst2));
-    conv_params->dst = tmp_dst2;
-    conv_params->dst_stride = tmp_dst2_stride;
-    // mask compound has its own average mechanism
-    conv_params->do_average = 0;
-  }
-#endif
-  DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
-  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
-                           subpel_y, sf, w, h, conv_params, interp_filter,
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-                           warp_types, p_col, p_row, plane, ref,
-#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
-                           0, 0,
-#endif
-                           xs, ys, xd);
-#if CONFIG_COMPOUND_SEGMENT
-  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
-#if CONFIG_CONVOLVE_ROUND
-    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-      build_compound_seg_mask_d32(
-          comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
-          tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params, 8);
-    } else {
-#endif  // CONFIG_CONVOLVE_ROUND
-      build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
-                              dst_stride, tmp_dst, MAX_SB_SIZE,
-                              mi->mbmi.sb_type, h, w);
-#if CONFIG_CONVOLVE_ROUND
-    }
-#endif
-  }
-#endif  // CONFIG_COMPOUND_SEGMENT
-#if CONFIG_SUPERTX
-  build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
-                                     MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
-                                     wedge_offset_x, wedge_offset_y, h, w);
-#else
-#if CONFIG_CONVOLVE_ROUND
-  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
-                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
-                                   &comp_data, mi->mbmi.sb_type, h, w);
-    av1_convolve_rounding(
-        org_dst, org_dst_stride, dst, dst_stride, w, h,
-        FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
-    conv_params->do_post_rounding = 0;
-  } else {
-#endif  // CONFIG_CONVOLVE_ROUND
-    build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
-                          MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
-#if CONFIG_CONVOLVE_ROUND
-  }
-#endif  // CONFIG_CONVOLVE_ROUND
-#endif  // CONFIG_SUPERTX
-#endif  // CONFIG_HIGHBITDEPTH
 #if CONFIG_COMPOUND_SEGMENT
   (void)plane;
 #endif  // CONFIG_COMPOUND_SEGMENT
 }
-#endif  // CONFIG_EXT_INTER
 
 // TODO(sarahparker) av1_highbd_build_inter_predictor and
 // av1_build_inter_predictor should be combined with
@@ -959,11 +1108,7 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
 void av1_highbd_build_inter_predictor(
     const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
     const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
-#if CONFIG_DUAL_FILTER
-    const InterpFilter *interp_filter,
-#else
-    const InterpFilter interp_filter,
-#endif
+    InterpFilters interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     const WarpTypesAllowed *warp_types, int p_col, int p_row,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -983,12 +1128,12 @@ void av1_highbd_build_inter_predictor(
          (mv.col >> SCALE_SUBPEL_BITS);
 
   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                           sf, w, h, &conv_params, interp_filter,
+                           sf, w, h, &conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                            warp_types, p_col, p_row, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-                           0, 0,
+                           xd->mi[0], 0,
 #endif
                            sf->x_step_q4, sf->y_step_q4, xd);
 }
@@ -998,11 +1143,7 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                                int dst_stride, const MV *src_mv,
                                const struct scale_factors *sf, int w, int h,
                                ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                               const InterpFilter *interp_filter,
-#else
-                               const InterpFilter interp_filter,
-#endif
+                               InterpFilters interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                const WarpTypesAllowed *warp_types, int p_col,
                                int p_row, int plane, int ref,
@@ -1022,12 +1163,12 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
          (mv.col >> SCALE_SUBPEL_BITS);
 
   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                           sf, w, h, conv_params, interp_filter,
+                           sf, w, h, conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                            warp_types, p_col, p_row, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-                           0, 0,
+                           xd->mi[0], 0,
 #endif
                            sf->x_step_q4, sf->y_step_q4, xd);
 }
@@ -1039,27 +1180,25 @@ typedef struct SubpelParams {
   int subpel_y;
 } SubpelParams;
 
-void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
+static INLINE void build_inter_predictors(
+    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
 #if CONFIG_MOTION_VAR
-                            int mi_col_offset, int mi_row_offset,
+    const MODE_INFO *mi, int build_for_obmc,
 #endif  // CONFIG_MOTION_VAR
-                            int block, int bw, int bh, int x, int y, int w,
-                            int h,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-                            int wedge_offset_x, int wedge_offset_y,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-                            int mi_x, int mi_y) {
+    int block, int bw, int bh, int x, int y, int w, int h,
+#if CONFIG_SUPERTX
+    int wedge_offset_x, int wedge_offset_y,
+#endif  // CONFIG_SUPERTX
+    int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-#if CONFIG_MOTION_VAR
-  const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
-#else
+#if !CONFIG_MOTION_VAR
   const MODE_INFO *mi = xd->mi[0];
 #endif  // CONFIG_MOTION_VAR
   int is_compound = has_second_ref(&mi->mbmi);
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   int is_comp_mode_pred =
       is_compound || is_inter_singleref_comp_mode(mi->mbmi.mode);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   int ref;
 #if CONFIG_INTRABC
   const int is_intrabc = is_intrabc_block(&mi->mbmi);
@@ -1071,9 +1210,9 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
     WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
     is_global[ref] = is_global_mv_block(mi, block, wm->wmtype);
   }
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!is_compound && is_comp_mode_pred) is_global[1] = is_global[0];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #endif  // CONFIG_GLOBAL_MOTION
 
 #if CONFIG_CB4X4
@@ -1081,34 +1220,32 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
   (void)cm;
 #endif
 
-#if CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
-  const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
-#endif  // CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
-
 #if CONFIG_CHROMA_SUB8X8
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
   const int ss_x = pd->subsampling_x;
   const int ss_y = pd->subsampling_y;
   int sub8x8_inter = bsize < BLOCK_8X8 && (ss_x || ss_y);
+
+#if CONFIG_INTRABC
+  if (is_intrabc) {
+    sub8x8_inter = 0;
+  }
+#endif
+
+#if CONFIG_MOTION_VAR
+  sub8x8_inter = sub8x8_inter && !build_for_obmc;
+#endif  // CONFIG_MOTION_VAR
   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
 
-#if CONFIG_MOTION_VAR
-  if (!build_for_obmc && sub8x8_inter) {
-#else
   if (sub8x8_inter) {
-#endif  // CONFIG_MOTION_VAR
     for (int row = row_start; row <= 0 && sub8x8_inter; ++row)
       for (int col = col_start; col <= 0; ++col)
         if (!is_inter_block(&xd->mi[row * xd->mi_stride + col]->mbmi))
           sub8x8_inter = 0;
   }
 
-#if CONFIG_MOTION_VAR
-  if (!build_for_obmc && sub8x8_inter) {
-#else
   if (sub8x8_inter) {
-#endif  // CONFIG_MOTION_VAR
     // block size
     const int b4_w = block_size_wide[bsize] >> ss_x;
     const int b4_h = block_size_high[bsize] >> ss_y;
@@ -1128,11 +1265,25 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
       for (idx = 0; idx < b8_w; idx += b4_w) {
         MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
         is_compound = has_second_ref(this_mbmi);
+#if CONFIG_CONVOLVE_ROUND
+        DECLARE_ALIGNED(16, int32_t, tmp_dst[8 * 8]);
+        int tmp_dst_stride = 8;
+        assert(w <= 8 && h <= 8);
+#endif  // CONFIG_CONVOLVE_ROUND
+#if CONFIG_CONVOLVE_ROUND
+        ConvolveParams conv_params =
+            get_conv_params_no_round(0, 0, plane, tmp_dst, tmp_dst_stride);
+#else
+        ConvolveParams conv_params = get_conv_params(0, 0, plane);
+#endif
+        struct buf_2d *const dst_buf = &pd->dst;
+        x = x_base + idx;
+        y = y_base + idy;
+        uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
+
         // TODO(zoeliu): If single ref comp modes are considered here, a
         //               mismatch was caused. Need a further investigation.
         for (ref = 0; ref < 1 + is_compound; ++ref) {
-          struct buf_2d *const dst_buf = &pd->dst;
-
           const RefBuffer *ref_buf =
               &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
 
@@ -1156,7 +1307,6 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
           const struct scale_factors *const sf = &ref_buf->sf;
           struct buf_2d *const pre_buf = &pd->pre[ref];
 #endif  // CONFIG_INTRABC
-          uint8_t *dst = dst_buf->buf;
 
           const MV mv = this_mbmi->mv[ref].as_mv;
 
@@ -1174,11 +1324,6 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
 #endif  // CONFIG_WARPED_MOTION
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 
-          x = x_base + idx;
-          y = y_base + idy;
-
-          dst += dst_buf->stride * y + x;
-
           if (is_scaled) {
             int ssx = pd->subsampling_x;
             int ssy = pd->subsampling_y;
@@ -1218,17 +1363,21 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
                   (x + (mv_q4.col >> SUBPEL_BITS));
           }
 
-          ConvolveParams conv_params = get_conv_params(ref, ref, plane);
-#if CONFIG_EXT_INTER
+          conv_params.ref = ref;
+          conv_params.do_average = ref;
           if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) {
-            // TODO(angiebird): use get_conv_params_no_round() here
             // masked compound type has its own average mechanism
+            conv_params.do_average = 0;
+#if CONFIG_CONVOLVE_ROUND && CONFIG_COMPOUND_SEGMENT && CONFIG_SUPERTX
+            // TODO(angiebird): convolve_round does not support compound_segment
+            // when supertx is on
             conv_params = get_conv_params(ref, 0, plane);
+#endif
           }
           if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
             av1_make_masked_inter_predictor(
                 pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
-                sf, w, h, &conv_params, mi->mbmi.interp_filter, xs, ys,
+                sf, b4_w, b4_h, &conv_params, mi->mbmi.interp_filters, xs, ys,
 #if CONFIG_SUPERTX
                 wedge_offset_x, wedge_offset_y,
 #endif  // CONFIG_SUPERTX
@@ -1239,19 +1388,42 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                 xd);
           else
-#endif  // CONFIG_EXT_INTER
             av1_make_inter_predictor(
                 pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
-                sf, b4_w, b4_h, &conv_params, this_mbmi->interp_filter,
+                sf, b4_w, b4_h, &conv_params, this_mbmi->interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                 &warp_types, (mi_x >> pd->subsampling_x) + x,
                 (mi_y >> pd->subsampling_y) + y, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-                mi_col_offset, mi_row_offset,
+                mi, build_for_obmc,
 #endif  // CONFIG_MOTION_VAR
                 xs, ys, xd);
+        }  // for (ref = 0; ref < 1 + is_compound; ++ref)
+#if CONFIG_CONVOLVE_ROUND
+        if (conv_params.do_post_rounding) {
+#if CONFIG_HIGHBITDEPTH
+          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+            av1_highbd_convolve_rounding(
+                tmp_dst, tmp_dst_stride, dst, dst_buf->stride, b4_w, b4_h,
+                FILTER_BITS * 2 + is_compound - conv_params.round_0 -
+                    conv_params.round_1,
+                xd->bd);
+          else
+#endif  // CONFIG_HIGHBITDEPTH
+#if CONFIG_COMPOUND_SINGLEREF
+            av1_convolve_rounding(
+                tmp_dst, tmp_dst_stride, dst, dst_buf->stride, b4_w, b4_h,
+                FILTER_BITS * 2 + is_comp_mode_pred - conv_params.round_0 -
+                    conv_params.round_1);
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+          av1_convolve_rounding(tmp_dst, tmp_dst_stride, dst, dst_buf->stride,
+                                b4_w, b4_h,
+                                FILTER_BITS * 2 + is_compound -
+                                    conv_params.round_0 - conv_params.round_1);
+#endif  // CONFIG_COMPOUND_SINGLEREF
         }
+#endif  // CONFIG_CONVOLVE_ROUND
         ++col;
       }
       ++row;
@@ -1271,14 +1443,14 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
     SubpelParams subpel_params[2];
 #if CONFIG_CONVOLVE_ROUND
     DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
-    av1_zero(tmp_dst);
 #endif  // CONFIG_CONVOLVE_ROUND
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-    for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref)
 #else
-    for (ref = 0; ref < 1 + is_compound; ++ref) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + is_compound; ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+    {
 #if CONFIG_INTRABC
       const struct scale_factors *const sf =
           is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
@@ -1360,11 +1532,12 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
     ConvolveParams conv_params = get_conv_params(ref, ref, plane);
 #endif  // CONFIG_CONVOLVE_ROUND
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-    for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + is_comp_mode_pred; ++ref)
 #else
-    for (ref = 0; ref < 1 + is_compound; ++ref) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + is_compound; ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+    {
 #if CONFIG_INTRABC
       const struct scale_factors *const sf =
           is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
@@ -1384,7 +1557,6 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
       conv_params.ref = ref;
       conv_params.do_average = ref;
-#if CONFIG_EXT_INTER
       if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) {
         // masked compound type has its own average mechanism
         conv_params.do_average = 0;
@@ -1399,7 +1571,7 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
         av1_make_masked_inter_predictor(
             pre[ref], pre_buf->stride, dst, dst_buf->stride,
             subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
-            &conv_params, mi->mbmi.interp_filter, subpel_params[ref].xs,
+            &conv_params, mi->mbmi.interp_filters, subpel_params[ref].xs,
             subpel_params[ref].ys,
 #if CONFIG_SUPERTX
             wedge_offset_x, wedge_offset_y,
@@ -1411,17 +1583,16 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
             xd);
       else
-#endif  // CONFIG_EXT_INTER
         av1_make_inter_predictor(
             pre[ref], pre_buf->stride, dst, dst_buf->stride,
             subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
-            &conv_params, mi->mbmi.interp_filter,
+            &conv_params, mi->mbmi.interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
             &warp_types, (mi_x >> pd->subsampling_x) + x,
             (mi_y >> pd->subsampling_y) + y, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-            mi_col_offset, mi_row_offset,
+            mi, build_for_obmc,
 #endif  // CONFIG_MOTION_VAR
             subpel_params[ref].xs, subpel_params[ref].ys, xd);
     }
@@ -1431,22 +1602,22 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
     if (conv_params.do_post_rounding) {
 #if CONFIG_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-        av1_highbd_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride,
-                                     w, h, FILTER_BITS * 2 + is_compound -
-                                               conv_params.round_0 -
-                                               conv_params.round_1,
-                                     xd->bd);
+        av1_highbd_convolve_rounding(
+            tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
+            FILTER_BITS * 2 + is_compound - conv_params.round_0 -
+                conv_params.round_1,
+            xd->bd);
       else
 #endif  // CONFIG_HIGHBITDEPTH
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
         av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
                               FILTER_BITS * 2 + is_comp_mode_pred -
                                   conv_params.round_0 - conv_params.round_1);
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
       av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
                             FILTER_BITS * 2 + is_compound -
                                 conv_params.round_0 - conv_params.round_1);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
     }
 #endif  // CONFIG_CONVOLVE_ROUND
   }
@@ -1491,22 +1662,22 @@ static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
         for (x = 0; x < num_4x4_w; ++x)
           build_inter_predictors(cm, xd, plane,
 #if CONFIG_MOTION_VAR
-                                 0, 0,
+                                 xd->mi[0], 0,
 #endif  // CONFIG_MOTION_VAR
                                  y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
                                  0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+#endif  // CONFIG_SUPERTX
                                  mi_x, mi_y);
     } else {
       build_inter_predictors(cm, xd, plane,
 #if CONFIG_MOTION_VAR
-                             0, 0,
+                             xd->mi[0], 0,
 #endif  // CONFIG_MOTION_VAR
                              0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
                              0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+#endif  // CONFIG_SUPERTX
                              mi_x, mi_y);
     }
   }
@@ -1516,17 +1687,17 @@ void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                     int mi_row, int mi_col, BUFFER_SET *ctx,
                                     BLOCK_SIZE bsize) {
   build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
     BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL },
                                { xd->plane[0].dst.stride, 0, 0 } };
     if (!ctx) ctx = &default_ctx;
-    av1_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
+    av1_build_interintra_predictors_sby(cm, xd, xd->plane[0].dst.buf,
                                         xd->plane[0].dst.stride, ctx, bsize);
   }
 #else
   (void)ctx;
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 }
 
 void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -1534,7 +1705,7 @@ void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                      BLOCK_SIZE bsize) {
   build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
                                     MAX_MB_PLANE - 1);
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
     BUFFER_SET default_ctx = {
       { NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
@@ -1542,12 +1713,12 @@ void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
     };
     if (!ctx) ctx = &default_ctx;
     av1_build_interintra_predictors_sbuv(
-        xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
-        xd->plane[2].dst.stride, ctx, bsize);
+        cm, xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+        xd->plane[1].dst.stride, xd->plane[2].dst.stride, ctx, bsize);
   }
 #else
   (void)ctx;
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 }
 
 void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -1560,8 +1731,6 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
 void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
                           BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col) {
-  uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
-                                           src->v_buffer };
   const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
                                      src->uv_crop_width };
   const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
@@ -1572,7 +1741,7 @@ void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblockd_plane *const pd = &planes[i];
-    setup_pred_plane(&pd->dst, bsize, buffers[i], widths[i], heights[i],
+    setup_pred_plane(&pd->dst, bsize, src->buffers[i], widths[i], heights[i],
                      strides[i], mi_row, mi_col, NULL, pd->subsampling_x,
                      pd->subsampling_y);
   }
@@ -1733,13 +1902,10 @@ void av1_build_masked_inter_predictor_complex(
 }
 
 void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
-                                                MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
-                                                int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                                                int mi_row, int mi_col,
-                                                int plane, BLOCK_SIZE bsize,
-                                                int block) {
+                                                MACROBLOCKD *xd, int mi_row_ori,
+                                                int mi_col_ori, int mi_row,
+                                                int mi_col, int plane,
+                                                BLOCK_SIZE bsize, int block) {
   // Prediction function used in supertx:
   // Use the mv at current block (which is less than 8x8)
   // to get prediction of a block located at (mi_row, mi_col) at size of bsize
@@ -1747,10 +1913,8 @@ void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
   // block (0-3): the sub8x8 location of current block
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
-#if CONFIG_EXT_INTER
   const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
   const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
-#endif  // CONFIG_EXT_INTER
 
   // For sub8x8 uv:
   // Skip uv prediction in supertx except the first block (block = 0)
@@ -1765,40 +1929,30 @@ void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
 
   build_inter_predictors(cm, xd, plane,
 #if CONFIG_MOTION_VAR
-                         0, 0,
+                         xd->mi[0], 0,
 #endif  // CONFIG_MOTION_VAR
-                         block, bw, bh, 0, 0, bw, bh,
-#if CONFIG_EXT_INTER
-                         wedge_offset_x, wedge_offset_y,
-#endif  // CONFIG_EXT_INTER
-                         mi_x, mi_y);
+                         block, bw, bh, 0, 0, bw, bh, wedge_offset_x,
+                         wedge_offset_y, mi_x, mi_y);
 }
 
 void av1_build_inter_predictor_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
                                          int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
                                          int mi_row, int mi_col, int plane,
                                          BLOCK_SIZE bsize) {
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
-#if CONFIG_EXT_INTER
   const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
   const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
-#endif  // CONFIG_EXT_INTER
   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
   const int bw = block_size_wide[plane_bsize];
   const int bh = block_size_high[plane_bsize];
 
   build_inter_predictors(cm, xd, plane,
 #if CONFIG_MOTION_VAR
-                         0, 0,
+                         xd->mi[0], 0,
 #endif  // CONFIG_MOTION_VAR
-                         0, bw, bh, 0, 0, bw, bh,
-#if CONFIG_EXT_INTER
-                         wedge_offset_x, wedge_offset_y,
-#endif  // CONFIG_EXT_INTER
-                         mi_x, mi_y);
+                         0, bw, bh, 0, 0, bw, bh, wedge_offset_x,
+                         wedge_offset_y, mi_x, mi_y);
 }
 #endif  // CONFIG_SUPERTX
 
@@ -1887,62 +2041,29 @@ const uint8_t *av1_get_obmc_mask_flipped(int length) {
 }
 #endif  // CONFIG_NCOBMC
 
+static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
+                                     uint8_t mi_hw, MODE_INFO *mi,
+                                     void *fun_ctxt) {
+  (void)xd;
+  (void)rel_mi_rc;
+  (void)mi_hw;
+  (void)mi;
+  ++*(int *)fun_ctxt;
+}
+
 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                       int mi_row, int mi_col) {
-  int i, mi_step;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
-  xd->mi[0]->mbmi.overlappable_neighbors[0] = 0;
-  xd->mi[0]->mbmi.overlappable_neighbors[1] = 0;
+  mbmi->overlappable_neighbors[0] = 0;
+  mbmi->overlappable_neighbors[1] = 0;
 
   if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
 
-  if (xd->up_available) {
-    const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
-    for (i = 0; i < ilimit; i += mi_step) {
-      int mi_row_offset = -1;
-      int mi_col_offset = i;
-      MODE_INFO *above_mi =
-          xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-      MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
-#if CONFIG_CHROMA_SUB8X8
-      if (above_mbmi->sb_type < BLOCK_8X8) {
-        ++mi_col_offset;
-        above_mbmi =
-            &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-      }
-#endif
-      BLOCK_SIZE above_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
-      mi_step = AOMMIN(xd->n8_w, mi_size_wide[above_bsize]);
-
-      if (is_neighbor_overlappable(above_mbmi))
-        xd->mi[0]->mbmi.overlappable_neighbors[0]++;
-    }
-  }
-
-  if (xd->left_available) {
-    const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
-    for (i = 0; i < ilimit; i += mi_step) {
-      int mi_row_offset = i;
-      int mi_col_offset = -1;
-      MODE_INFO *left_mi =
-          xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-      MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
-
-#if CONFIG_CHROMA_SUB8X8
-      if (left_mbmi->sb_type < BLOCK_8X8) {
-        ++mi_row_offset;
-        left_mbmi =
-            &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-      }
-#endif
-      BLOCK_SIZE left_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
-      mi_step = AOMMIN(xd->n8_h, mi_size_high[left_bsize]);
-
-      if (is_neighbor_overlappable(left_mbmi))
-        xd->mi[0]->mbmi.overlappable_neighbors[1]++;
-    }
-  }
+  foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr,
+                                &mbmi->overlappable_neighbors[0]);
+  foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr,
+                               &mbmi->overlappable_neighbors[1]);
 }
 
 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, for
@@ -1974,146 +2095,113 @@ int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
   }
 }
 
-// This function combines motion compensated predictions that is generated by
-// top/left neighboring blocks' inter predictors with the regular inter
-// prediction. We assume the original prediction (bmc) is stored in
-// xd->plane[].dst.buf
-void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                     int mi_row, int mi_col,
-                                     uint8_t *above[MAX_MB_PLANE],
-                                     int above_stride[MAX_MB_PLANE],
-                                     uint8_t *left[MAX_MB_PLANE],
-                                     int left_stride[MAX_MB_PLANE]) {
+struct obmc_inter_pred_ctxt {
+  uint8_t **adjacent;
+  int *adjacent_stride;
+};
+
+static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
+                                               uint8_t above_mi_width,
+                                               MODE_INFO *above_mi,
+                                               void *fun_ctxt) {
+  (void)above_mi;
+  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  int plane, i;
 #if CONFIG_HIGHBITDEPTH
   const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
 #endif  // CONFIG_HIGHBITDEPTH
+  const int overlap =
+      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
 
-  // handle above row
-  if (xd->up_available) {
-    const int overlap =
-        AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1);
-    const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
-    const int mi_row_offset = -1;
-    const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
-    int neighbor_count = 0;
-
-    assert(miw > 0);
-
-    i = 0;
-    do {  // for each mi in the above row
-      int mi_col_offset = i;
-      MB_MODE_INFO *above_mbmi =
-          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-#if CONFIG_CHROMA_SUB8X8
-      if (above_mbmi->sb_type < BLOCK_8X8) {
-        ++mi_col_offset;
-        above_mbmi =
-            &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-      }
-#endif
-
-      const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
-      const int above_step =
-          AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]);
-      const int mi_step = AOMMIN(xd->n8_w, above_step);
-
-      if (is_neighbor_overlappable(above_mbmi)) {
-        neighbor_count++;
-        if (neighbor_count > neighbor_limit) break;
-        for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-          const struct macroblockd_plane *pd = &xd->plane[plane];
-          const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
-          const int bh = overlap >> pd->subsampling_y;
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const struct macroblockd_plane *pd = &xd->plane[plane];
+    const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
+    const int bh = overlap >> pd->subsampling_y;
+    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
 
-          if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+    if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
 
-          const int dst_stride = pd->dst.stride;
-          uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
-          const int tmp_stride = above_stride[plane];
-          const uint8_t *const tmp =
-              &above[plane][(i * MI_SIZE) >> pd->subsampling_x];
-          const uint8_t *const mask = av1_get_obmc_mask(bh);
+    const int dst_stride = pd->dst.stride;
+    uint8_t *const dst = &pd->dst.buf[plane_col];
+    const int tmp_stride = ctxt->adjacent_stride[plane];
+    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
+    const uint8_t *const mask = av1_get_obmc_mask(bh);
 
 #if CONFIG_HIGHBITDEPTH
-          if (is_hbd)
-            aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
-                                       tmp_stride, mask, bh, bw, xd->bd);
-          else
+    if (is_hbd)
+      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
+                                 tmp_stride, mask, bh, bw, xd->bd);
+    else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
-                                tmp_stride, mask, bh, bw);
-        }
-      }
-      i += mi_step;
-    } while (i < miw);
+      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
+                          mask, bh, bw);
   }
+}
 
-  // handle left column
-  if (xd->left_available) {
-    const int overlap =
-        AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1);
-    const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
-    const int mi_col_offset = -1;
-    const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
-    int neighbor_count = 0;
-
-    assert(mih > 0);
-
-    i = 0;
-    do {  // for each mi in the left column
-      int mi_row_offset = i;
-      MB_MODE_INFO *left_mbmi =
-          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-#if CONFIG_CHROMA_SUB8X8
-      if (left_mbmi->sb_type < BLOCK_8X8) {
-        ++mi_row_offset;
-        left_mbmi =
-            &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-      }
-#endif
+static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
+                                              uint8_t left_mi_height,
+                                              MODE_INFO *left_mi,
+                                              void *fun_ctxt) {
+  (void)left_mi;
+  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  const int overlap =
+      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
+#if CONFIG_HIGHBITDEPTH
+  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif  // CONFIG_HIGHBITDEPTH
+
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const struct macroblockd_plane *pd = &xd->plane[plane];
+    const int bw = overlap >> pd->subsampling_x;
+    const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
+    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
+
+    if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
 
-      const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
-      const int left_step =
-          AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]);
-      const int mi_step = AOMMIN(xd->n8_h, left_step);
-
-      if (is_neighbor_overlappable(left_mbmi)) {
-        neighbor_count++;
-        if (neighbor_count > neighbor_limit) break;
-        for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-          const struct macroblockd_plane *pd = &xd->plane[plane];
-          const int bw = overlap >> pd->subsampling_x;
-          const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
-
-          if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
-
-          const int dst_stride = pd->dst.stride;
-          uint8_t *const dst =
-              &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
-          const int tmp_stride = left_stride[plane];
-          const uint8_t *const tmp =
-              &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
-          const uint8_t *const mask = av1_get_obmc_mask(bw);
+    const int dst_stride = pd->dst.stride;
+    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
+    const int tmp_stride = ctxt->adjacent_stride[plane];
+    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
+    const uint8_t *const mask = av1_get_obmc_mask(bw);
 
 #if CONFIG_HIGHBITDEPTH
-          if (is_hbd)
-            aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
-                                       tmp_stride, mask, bh, bw, xd->bd);
-          else
+    if (is_hbd)
+      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
+                                 tmp_stride, mask, bh, bw, xd->bd);
+    else
 #endif  // CONFIG_HIGHBITDEPTH
-            aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
-                                tmp_stride, mask, bh, bw);
-        }
-      }
-      i += mi_step;
-    } while (i < mih);
+      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
+                          mask, bh, bw);
   }
 }
 
+// This function combines motion compensated predictions that are generated by
+// top/left neighboring blocks' inter predictors with the regular inter
+// prediction. We assume the original prediction (bmc) is stored in
+// xd->plane[].dst.buf
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                     int mi_row, int mi_col,
+                                     uint8_t *above[MAX_MB_PLANE],
+                                     int above_stride[MAX_MB_PLANE],
+                                     uint8_t *left[MAX_MB_PLANE],
+                                     int left_stride[MAX_MB_PLANE]) {
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+
+  // handle above row
+  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
+  foreach_overlappable_nb_above(cm, xd, mi_col,
+                                max_neighbor_obmc[b_width_log2_lookup[bsize]],
+                                build_obmc_inter_pred_above, &ctxt_above);
+
+  // handle left column
+  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
+  foreach_overlappable_nb_left(cm, xd, mi_row,
+                               max_neighbor_obmc[b_height_log2_lookup[bsize]],
+                               build_obmc_inter_pred_left, &ctxt_left);
+}
+
 void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
-#if CONFIG_EXT_INTER
   if (is_interintra_pred(mbmi)) {
     mbmi->ref_frame[1] = NONE_FRAME;
   } else if (has_second_ref(mbmi) &&
@@ -2129,109 +2217,190 @@ void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
     mbmi->mv[0].as_int = mbmi->mv[1].as_int;
 #endif  // CONFIG_COMPOUND_SINGLEREF
   }
-#endif  // CONFIG_EXT_INTER
   if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME;
   return;
 }
 
+struct build_prediction_ctxt {
+  const AV1_COMMON *cm;
+  int mi_row;
+  int mi_col;
+  uint8_t **tmp_buf;
+  int *tmp_width;
+  int *tmp_height;
+  int *tmp_stride;
+  int mb_to_far_edge;
+};
+
+static INLINE void build_prediction_by_above_pred(MACROBLOCKD *xd,
+                                                  int rel_mi_col,
+                                                  uint8_t above_mi_width,
+                                                  MODE_INFO *above_mi,
+                                                  void *fun_ctxt) {
+  MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
+  struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
+  const int above_mi_col = ctxt->mi_col + rel_mi_col;
+
+  MB_MODE_INFO backup_mbmi = *above_mbmi;
+  modify_neighbor_predictor_for_obmc(above_mbmi);
+
+  for (int j = 0; j < MAX_MB_PLANE; ++j) {
+    struct macroblockd_plane *const pd = &xd->plane[j];
+    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
+                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
+                     NULL, pd->subsampling_x, pd->subsampling_y);
+  }
+
+#if CONFIG_COMPOUND_SINGLEREF
+  const int num_refs = 1 + is_inter_anyref_comp_mode(above_mbmi->mode);
+#else
+  const int num_refs = 1 + has_second_ref(above_mbmi);
+#endif
+
+  for (int ref = 0; ref < num_refs; ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+    const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi)
+                                         ? above_mbmi->ref_frame[ref]
+                                         : above_mbmi->ref_frame[0];
+#else
+    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+
+    const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
+
+    xd->block_refs[ref] = ref_buf;
+    if ((!av1_is_valid_scale(&ref_buf->sf)))
+      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                         "Reference frame has invalid dimensions");
+    av1_setup_pre_planes(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col,
+                         &ref_buf->sf);
+  }
+
+  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
+  xd->mb_to_right_edge = ctxt->mb_to_far_edge +
+                         (xd->n8_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
+
+  int mi_x = above_mi_col << MI_SIZE_LOG2;
+  int mi_y = ctxt->mi_row << MI_SIZE_LOG2;
+
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+
+  for (int j = 0; j < MAX_MB_PLANE; ++j) {
+    const struct macroblockd_plane *pd = &xd->plane[j];
+    int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
+    int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4,
+                   block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
+
+    if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
+    build_inter_predictors(ctxt->cm, xd, j, above_mi, 1, 0, bw, bh, 0, 0, bw,
+                           bh,
+#if CONFIG_SUPERTX
+                           0, 0,
+#endif  // CONFIG_SUPERTX
+                           mi_x, mi_y);
+  }
+  *above_mbmi = backup_mbmi;
+}
+
 void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                          int mi_row, int mi_col,
                                          uint8_t *tmp_buf[MAX_MB_PLANE],
                                          int tmp_width[MAX_MB_PLANE],
                                          int tmp_height[MAX_MB_PLANE],
                                          int tmp_stride[MAX_MB_PLANE]) {
-  const TileInfo *const tile = &xd->tile;
+  if (!xd->up_available) return;
+
+  // Adjust mb_to_bottom_edge to have the correct value for the OBMC
+  // prediction block. This is half the height of the original block,
+  // except for 128-wide blocks, where we only use a height of 32.
+  int this_height = xd->n8_h * MI_SIZE;
+  int pred_height = AOMMIN(this_height / 2, 32);
+  xd->mb_to_bottom_edge += (this_height - pred_height) * 8;
+
+  struct build_prediction_ctxt ctxt = { cm,         mi_row,
+                                        mi_col,     tmp_buf,
+                                        tmp_width,  tmp_height,
+                                        tmp_stride, xd->mb_to_right_edge };
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  int i, j, mi_step, ref;
-  const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
-  int mb_to_right_edge_base = xd->mb_to_right_edge;
-  const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
-  int neighbor_count = 0;
+  foreach_overlappable_nb_above(cm, xd, mi_col,
+                                max_neighbor_obmc[b_width_log2_lookup[bsize]],
+                                build_prediction_by_above_pred, &ctxt);
 
-  if (mi_row <= tile->mi_row_start) return;
+  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+  xd->mb_to_right_edge = ctxt.mb_to_far_edge;
+  xd->mb_to_bottom_edge -= (this_height - pred_height) * 8;
+}
 
-  xd->mb_to_bottom_edge += xd->n8_h * 32;
-  for (i = 0; i < ilimit; i += mi_step) {
-    int mi_row_offset = -1;
-    int mi_col_offset = i;
-    int mi_x, mi_y, bw, bh;
-    MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-    MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+static INLINE void build_prediction_by_left_pred(MACROBLOCKD *xd,
+                                                 int rel_mi_row,
+                                                 uint8_t left_mi_height,
+                                                 MODE_INFO *left_mi,
+                                                 void *fun_ctxt) {
+  MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
+  struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt;
+  const int left_mi_row = ctxt->mi_row + rel_mi_row;
+
+  MB_MODE_INFO backup_mbmi = *left_mbmi;
+  modify_neighbor_predictor_for_obmc(left_mbmi);
+
+  for (int j = 0; j < MAX_MB_PLANE; ++j) {
+    struct macroblockd_plane *const pd = &xd->plane[j];
+    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
+                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
+                     NULL, pd->subsampling_x, pd->subsampling_y);
+  }
 
-#if CONFIG_CHROMA_SUB8X8
-    if (above_mbmi->sb_type < BLOCK_8X8) {
-      ++mi_col_offset;
-      above_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-    }
+#if CONFIG_COMPOUND_SINGLEREF
+  const int num_refs = 1 + is_inter_anyref_comp_mode(left_mbmi->mode);
+#else
+  const int num_refs = 1 + has_second_ref(left_mbmi);
 #endif
 
-    const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
-    MB_MODE_INFO backup_mbmi;
-
-    const int above_step =
-        AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]);
-    mi_step = AOMMIN(xd->n8_w, above_step);
+  for (int ref = 0; ref < num_refs; ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+    const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi)
+                                         ? left_mbmi->ref_frame[ref]
+                                         : left_mbmi->ref_frame[0];
+#else
+    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-    if (!is_neighbor_overlappable(above_mbmi)) continue;
+    const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
 
-    neighbor_count++;
-    if (neighbor_count > neighbor_limit) break;
+    xd->block_refs[ref] = ref_buf;
+    if ((!av1_is_valid_scale(&ref_buf->sf)))
+      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                         "Reference frame has invalid dimensions");
+    av1_setup_pre_planes(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col,
+                         &ref_buf->sf);
+  }
 
-    backup_mbmi = *above_mbmi;
-    modify_neighbor_predictor_for_obmc(above_mbmi);
+  xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
+  xd->mb_to_bottom_edge =
+      ctxt->mb_to_far_edge +
+      (xd->n8_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
 
-    for (j = 0; j < MAX_MB_PLANE; ++j) {
-      struct macroblockd_plane *const pd = &xd->plane[j];
-      setup_pred_plane(&pd->dst, a_bsize, tmp_buf[j], tmp_width[j],
-                       tmp_height[j], tmp_stride[j], 0, i, NULL,
-                       pd->subsampling_x, pd->subsampling_y);
-    }
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(above_mbmi->mode));
-         ++ref) {
-      const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi)
-                                           ? above_mbmi->ref_frame[ref]
-                                           : above_mbmi->ref_frame[0];
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
-    for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
-      const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+  int mi_x = ctxt->mi_col << MI_SIZE_LOG2;
+  int mi_y = left_mi_row << MI_SIZE_LOG2;
 
-      xd->block_refs[ref] = ref_buf;
-      if ((!av1_is_valid_scale(&ref_buf->sf)))
-        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
-                           "Reference frame has invalid dimensions");
-      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
-                           &ref_buf->sf);
-    }
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
 
-    xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
-    xd->mb_to_right_edge =
-        mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
-    mi_x = (mi_col + i) << MI_SIZE_LOG2;
-    mi_y = mi_row << MI_SIZE_LOG2;
+  for (int j = 0; j < MAX_MB_PLANE; ++j) {
+    const struct macroblockd_plane *pd = &xd->plane[j];
+    int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4,
+                   block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
+    int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y;
 
-    for (j = 0; j < MAX_MB_PLANE; ++j) {
-      const struct macroblockd_plane *pd = &xd->plane[j];
-      bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
-      bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
-                  4);
-      bh = AOMMIN(bh, block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1));
-
-      if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
-      build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
-                             0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-                             0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-                             mi_x, mi_y);
-    }
-    *above_mbmi = backup_mbmi;
+    if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
+    build_inter_predictors(ctxt->cm, xd, j, left_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                           0, 0,
+#endif  // CONFIG_SUPERTX
+                           mi_x, mi_y);
   }
-  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
-  xd->mb_to_right_edge = mb_to_right_edge_base;
-  xd->mb_to_bottom_edge -= xd->n8_h * 32;
+  *left_mbmi = backup_mbmi;
 }
 
 void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -2240,97 +2409,27 @@ void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                         int tmp_width[MAX_MB_PLANE],
                                         int tmp_height[MAX_MB_PLANE],
                                         int tmp_stride[MAX_MB_PLANE]) {
-  const TileInfo *const tile = &xd->tile;
+  if (!xd->left_available) return;
+
+  // Adjust mb_to_right_edge to have the correct value for the OBMC
+  // prediction block. This is half the width of the original block,
+  // except for 128-wide blocks, where we only use a width of 32.
+  int this_width = xd->n8_w * MI_SIZE;
+  int pred_width = AOMMIN(this_width / 2, 32);
+  xd->mb_to_right_edge += (this_width - pred_width) * 8;
+
+  struct build_prediction_ctxt ctxt = { cm,         mi_row,
+                                        mi_col,     tmp_buf,
+                                        tmp_width,  tmp_height,
+                                        tmp_stride, xd->mb_to_bottom_edge };
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  int i, j, mi_step, ref;
-  const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
-  int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
-  const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
-  int neighbor_count = 0;
-
-  if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return;
-
-  xd->mb_to_right_edge += xd->n8_w * 32;
-  for (i = 0; i < ilimit; i += mi_step) {
-    int mi_row_offset = i;
-    int mi_col_offset = -1;
-    int mi_x, mi_y, bw, bh;
-    MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
-    MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
-
-#if CONFIG_CHROMA_SUB8X8
-    if (left_mbmi->sb_type < BLOCK_8X8) {
-      ++mi_row_offset;
-      left_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-    }
-#endif
-
-    const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
-    MB_MODE_INFO backup_mbmi;
-    const int left_step =
-        AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]);
-    mi_step = AOMMIN(xd->n8_h, left_step);
+  foreach_overlappable_nb_left(cm, xd, mi_row,
+                               max_neighbor_obmc[b_height_log2_lookup[bsize]],
+                               build_prediction_by_left_pred, &ctxt);
 
-    if (!is_neighbor_overlappable(left_mbmi)) continue;
-
-    neighbor_count++;
-    if (neighbor_count > neighbor_limit) break;
-
-    backup_mbmi = *left_mbmi;
-    modify_neighbor_predictor_for_obmc(left_mbmi);
-
-    for (j = 0; j < MAX_MB_PLANE; ++j) {
-      struct macroblockd_plane *const pd = &xd->plane[j];
-      setup_pred_plane(&pd->dst, l_bsize, tmp_buf[j], tmp_width[j],
-                       tmp_height[j], tmp_stride[j], i, 0, NULL,
-                       pd->subsampling_x, pd->subsampling_y);
-    }
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(left_mbmi->mode));
-         ++ref) {
-      const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi)
-                                           ? left_mbmi->ref_frame[ref]
-                                           : left_mbmi->ref_frame[0];
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
-    for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
-      const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
-      xd->block_refs[ref] = ref_buf;
-      if ((!av1_is_valid_scale(&ref_buf->sf)))
-        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
-                           "Reference frame has invalid dimensions");
-      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
-                           &ref_buf->sf);
-    }
-
-    xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
-    xd->mb_to_bottom_edge =
-        mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
-    mi_x = mi_col << MI_SIZE_LOG2;
-    mi_y = (mi_row + i) << MI_SIZE_LOG2;
-
-    for (j = 0; j < MAX_MB_PLANE; ++j) {
-      const struct macroblockd_plane *pd = &xd->plane[j];
-      bw = AOMMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
-                  4);
-      bw = AOMMIN(bw, block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1));
-      bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
-
-      if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
-      build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
-                             0, 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-                             0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-                             mi_x, mi_y);
-    }
-    *left_mbmi = backup_mbmi;
-  }
   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
-  xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
-  xd->mb_to_right_edge -= xd->n8_w * 32;
+  xd->mb_to_right_edge -= (this_width - pred_width) * 8;
+  xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
 }
 
 void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -2388,7 +2487,9 @@ void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                           int tmp_height[MAX_MB_PLANE],
                                           int tmp_stride[MAX_MB_PLANE]) {
   const TileInfo *const tile = &xd->tile;
+#if CONFIG_DEBUG
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+#endif
   int i, j, mi_step, ref;
   const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
   int mb_to_right_edge_base = xd->mb_to_right_edge;
@@ -2405,18 +2506,14 @@ void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
     int mi_x, mi_y, bw, bh;
     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
     MB_MODE_INFO *mbmi = &mi->mbmi;
-#if CONFIG_EXT_INTER
     MB_MODE_INFO backup_mbmi;
-#endif  // CONFIG_EXT_INTER
 
     mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
 
     if (!is_neighbor_overlappable(mbmi)) continue;
 
-#if CONFIG_EXT_INTER
     backup_mbmi = *mbmi;
     modify_neighbor_predictor_for_obmc(mbmi);
-#endif  // CONFIG_EXT_INTER
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
       struct macroblockd_plane *const pd = &xd->plane[j];
@@ -2441,12 +2538,12 @@ void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
     xd->mb_to_right_edge =
         mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
     mi_x = (mi_col + i) << MI_SIZE_LOG2;
-    mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * 4;
+    mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * (MI_SIZE >> 1);
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
       const struct macroblockd_plane *pd = &xd->plane[j];
       bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_x;
-      bh = (num_4x4_blocks_high_lookup[bsize] << 1) >> pd->subsampling_y;
+      bh = (xd->n8_h << (MI_SIZE_LOG2 - 1)) >> pd->subsampling_y;
 
       if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
         const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
@@ -2462,28 +2559,26 @@ void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
             if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y != 0)
               continue;
 
-            build_inter_predictors(
-                cm, xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh,
-                (4 * x) >> pd->subsampling_x,
-                xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, pw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-                0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-                mi_x, mi_y);
+            build_inter_predictors(cm, xd, j, mi, 1, y * 2 + x, bw, bh,
+                                   (4 * x) >> pd->subsampling_x,
+                                   xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0,
+                                   pw, bh,
+#if CONFIG_SUPERTX
+                                   0, 0,
+#endif  // CONFIG_SUPERTX
+                                   mi_x, mi_y);
           }
       } else {
-        build_inter_predictors(
-            cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0,
-            xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-            0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-            mi_x, mi_y);
+        build_inter_predictors(cm, xd, j, mi, 1, 0, bw, bh, 0,
+                               xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw,
+                               bh,
+#if CONFIG_SUPERTX
+                               0, 0,
+#endif  // CONFIG_SUPERTX
+                               mi_x, mi_y);
       }
     }
-#if CONFIG_EXT_INTER
     *mbmi = backup_mbmi;
-#endif  // CONFIG_EXT_INTER
   }
   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
   xd->mb_to_right_edge = mb_to_right_edge_base;
@@ -2497,7 +2592,9 @@ void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                          int tmp_height[MAX_MB_PLANE],
                                          const int tmp_stride[MAX_MB_PLANE]) {
   const TileInfo *const tile = &xd->tile;
+#if CONFIG_DEBUG
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+#endif
   int i, j, mi_step, ref;
   const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
   int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
@@ -2506,25 +2603,23 @@ void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
       (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
     return;
 
-  xd->mb_to_left_edge -= xd->n8_w * 32;
+  assert(bsize >= BLOCK_8X8);
+
+  xd->mb_to_left_edge -= xd->n8_w / 2 * MI_SIZE * 8;
   for (i = 0; i < ilimit; i += mi_step) {
     int mi_row_offset = i;
     int mi_col_offset = xd->n8_w;
     int mi_x, mi_y, bw, bh;
     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
     MB_MODE_INFO *mbmi = &mi->mbmi;
-#if CONFIG_EXT_INTER
     MB_MODE_INFO backup_mbmi;
-#endif  // CONFIG_EXT_INTER
 
     mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
 
     if (!is_neighbor_overlappable(mbmi)) continue;
 
-#if CONFIG_EXT_INTER
     backup_mbmi = *mbmi;
     modify_neighbor_predictor_for_obmc(mbmi);
-#endif  // CONFIG_EXT_INTER
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
       struct macroblockd_plane *const pd = &xd->plane[j];
@@ -2547,13 +2642,13 @@ void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
 
     xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
     xd->mb_to_bottom_edge =
-        mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
-    mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * 4;
+        mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * MI_SIZE * 8;
+    mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * (MI_SIZE >> 1);
     mi_y = (mi_row + i) << MI_SIZE_LOG2;
 
     for (j = 0; j < MAX_MB_PLANE; ++j) {
       const struct macroblockd_plane *pd = &xd->plane[j];
-      bw = (num_4x4_blocks_wide_lookup[bsize] << 1) >> pd->subsampling_x;
+      bw = (xd->n8_w << (MI_SIZE_LOG2 - 1)) >> pd->subsampling_x;
       bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
 
       if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
@@ -2570,32 +2665,29 @@ void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
             if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x != 0)
               continue;
 
-            build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset,
-                                   y * 2 + x, bw, bh,
+            build_inter_predictors(cm, xd, j, mi, 1, y * 2 + x, bw, bh,
                                    xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
                                    (4 * y) >> pd->subsampling_y, bw, ph,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
                                    0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+#endif  // CONFIG_SUPERTX
                                    mi_x, mi_y);
           }
       } else {
-        build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw,
-                               bh, xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
-                               0, bw, bh,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+        build_inter_predictors(cm, xd, j, mi, 1, 0, bw, bh,
+                               xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0, 0,
+                               bw, bh,
+#if CONFIG_SUPERTX
                                0, 0,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
+#endif  // CONFIG_SUPERTX
                                mi_x, mi_y);
       }
     }
-#if CONFIG_EXT_INTER
     *mbmi = backup_mbmi;
-#endif  // CONFIG_EXT_INTER
   }
   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
   xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
-  xd->mb_to_left_edge += xd->n8_w * 32;
+  xd->mb_to_left_edge += xd->n8_w / 2 * MI_SIZE * 8;
 }
 
 // This function combines motion compensated predictions that is generated by
@@ -2764,9 +2856,33 @@ void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
                        mi_col);
 }
 #endif  // CONFIG_NCOBMC
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+void reset_xd_boundary(MACROBLOCKD *xd, int mi_row, int bh, int mi_col, int bw,
+                       int mi_rows, int mi_cols) {
+  xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+  xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
+  xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+  xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
+}
+void set_sb_mi_boundaries(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                          const int mi_row, const int mi_col) {
+  const BLOCK_SIZE sb = cm->sb_size;
+  const int num_mi_w = mi_size_wide[sb];
+  const int num_mi_h = mi_size_high[sb];
+
+  xd->sb_mi_bd.mi_col_begin = mi_col;
+  xd->sb_mi_bd.mi_row_begin = mi_row;
+  // points to the last mi
+  xd->sb_mi_bd.mi_col_end =
+      mi_col + num_mi_w > cm->mi_cols ? cm->mi_cols - 1 : mi_col + num_mi_w - 1;
+  xd->sb_mi_bd.mi_row_end =
+      mi_row + num_mi_h > cm->mi_rows ? cm->mi_rows - 1 : mi_row + num_mi_h - 1;
+}
+#endif
+
 #endif  // CONFIG_MOTION_VAR
 
-#if CONFIG_EXT_INTER
 /* clang-format off */
 #if CONFIG_INTERINTRA
 #if CONFIG_EXT_PARTITION
@@ -2785,7 +2901,7 @@ static int ii_size_scales[BLOCK_SIZES_ALL] = {
 #endif
     32, 16, 16, 16, 8, 8, 8, 4,
     4,  4,  2,  2,  2, 1, 1, 1,
-    16, 16, 8, 8,
+    16, 16, 8, 8, 4, 4, 2, 2
 };
 #else
 static const int ii_weights1d[MAX_SB_SIZE] = {
@@ -2800,7 +2916,7 @@ static int ii_size_scales[BLOCK_SIZES_ALL] = {
 #endif
     16, 8, 8, 8, 4, 4, 4,
     2,  2, 2, 1, 1, 1,
-    8, 8, 4, 4,
+    8, 8, 4, 4, 2, 2,
 };
 /* clang-format on */
 #endif  // CONFIG_EXT_PARTITION
@@ -2852,7 +2968,6 @@ static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
       }
       break;
 
-#if CONFIG_ALT_INTRA
     case II_SMOOTH_PRED:
       for (i = 0; i < bh; ++i) {
         for (j = 0; j < bw; ++j) {
@@ -2863,11 +2978,7 @@ static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
         }
       }
       break;
-#endif
 
-#if !CONFIG_ALT_INTRA
-    case II_TM_PRED:
-#endif
     case II_DC_PRED:
     default:
       for (i = 0; i < bh; ++i) {
@@ -2902,8 +3013,8 @@ static void combine_interintra_highbd(
       const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
       const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
-                                interpred8, interstride, mask, bw, bh, bw, subh,
-                                subw, bd);
+                                interpred8, interstride, mask,
+                                block_size_wide[bsize], bh, bw, subh, subw, bd);
     }
     return;
   }
@@ -2931,7 +3042,6 @@ static void combine_interintra_highbd(
       }
       break;
 
-#if CONFIG_ALT_INTRA
     case II_SMOOTH_PRED:
       for (i = 0; i < bh; ++i) {
         for (j = 0; j < bw; ++j) {
@@ -2942,11 +3052,7 @@ static void combine_interintra_highbd(
         }
       }
       break;
-#endif
 
-#if !CONFIG_ALT_INTRA
-    case II_TM_PRED:
-#endif
     case II_DC_PRED:
     default:
       for (i = 0; i < bh; ++i) {
@@ -2960,7 +3066,8 @@ static void combine_interintra_highbd(
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
+void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
+                                               MACROBLOCKD *xd,
                                                BLOCK_SIZE bsize, int plane,
                                                BUFFER_SET *ctx, uint8_t *dst,
                                                int dst_stride) {
@@ -2969,7 +3076,7 @@ void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
   PREDICTION_MODE mode =
       interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode];
 
-  av1_predict_intra_block(xd, pd->width, pd->height, plane_bsize, mode,
+  av1_predict_intra_block(cm, xd, pd->width, pd->height, plane_bsize, mode,
                           ctx->plane[plane], ctx->stride[plane], dst,
                           dst_stride, 0, 0, plane);
 }
@@ -2997,14 +3104,14 @@ void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
                      inter_pred, inter_stride, intra_pred, intra_stride);
 }
 
-void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
-                                         int ystride, BUFFER_SET *ctx,
-                                         BLOCK_SIZE bsize) {
+void av1_build_interintra_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         uint8_t *ypred, int ystride,
+                                         BUFFER_SET *ctx, BLOCK_SIZE bsize) {
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
     av1_build_intra_predictors_for_interintra(
-        xd, bsize, 0, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
+        cm, xd, bsize, 0, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
     av1_combine_interintra(xd, bsize, 0, ypred, ystride,
                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
     return;
@@ -3012,21 +3119,22 @@ void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
 #endif  // CONFIG_HIGHBITDEPTH
   {
     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
-    av1_build_intra_predictors_for_interintra(xd, bsize, 0, ctx, intrapredictor,
-                                              MAX_SB_SIZE);
+    av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, ctx,
+                                              intrapredictor, MAX_SB_SIZE);
     av1_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor,
                            MAX_SB_SIZE);
   }
 }
 
-void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
-                                         int ustride, BUFFER_SET *ctx,
-                                         int plane, BLOCK_SIZE bsize) {
+void av1_build_interintra_predictors_sbc(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         uint8_t *upred, int ustride,
+                                         BUFFER_SET *ctx, int plane,
+                                         BLOCK_SIZE bsize) {
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
     av1_build_intra_predictors_for_interintra(
-        xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(uintrapredictor),
+        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(uintrapredictor),
         MAX_SB_SIZE);
     av1_combine_interintra(xd, bsize, plane, upred, ustride,
                            CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
@@ -3035,28 +3143,29 @@ void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
 #endif  // CONFIG_HIGHBITDEPTH
   {
     DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
-    av1_build_intra_predictors_for_interintra(xd, bsize, plane, ctx,
+    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
                                               uintrapredictor, MAX_SB_SIZE);
     av1_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor,
                            MAX_SB_SIZE);
   }
 }
 
-void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
-                                          uint8_t *vpred, int ustride,
-                                          int vstride, BUFFER_SET *ctx,
-                                          BLOCK_SIZE bsize) {
-  av1_build_interintra_predictors_sbc(xd, upred, ustride, ctx, 1, bsize);
-  av1_build_interintra_predictors_sbc(xd, vpred, vstride, ctx, 2, bsize);
+void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                          uint8_t *upred, uint8_t *vpred,
+                                          int ustride, int vstride,
+                                          BUFFER_SET *ctx, BLOCK_SIZE bsize) {
+  av1_build_interintra_predictors_sbc(cm, xd, upred, ustride, ctx, 1, bsize);
+  av1_build_interintra_predictors_sbc(cm, xd, vpred, vstride, ctx, 2, bsize);
 }
 
-void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
-                                     uint8_t *upred, uint8_t *vpred,
-                                     int ystride, int ustride, int vstride,
-                                     BUFFER_SET *ctx, BLOCK_SIZE bsize) {
-  av1_build_interintra_predictors_sby(xd, ypred, ystride, ctx, bsize);
-  av1_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, ctx,
-                                       bsize);
+void av1_build_interintra_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                     uint8_t *ypred, uint8_t *upred,
+                                     uint8_t *vpred, int ystride, int ustride,
+                                     int vstride, BUFFER_SET *ctx,
+                                     BLOCK_SIZE bsize) {
+  av1_build_interintra_predictors_sby(cm, xd, ypred, ystride, ctx, bsize);
+  av1_build_interintra_predictors_sbuv(cm, xd, upred, vpred, ustride, vstride,
+                                       ctx, bsize);
 }
 #endif  // CONFIG_INTERINTRA
 
@@ -3092,13 +3201,13 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
   WarpTypesAllowed warp_types;
 #if CONFIG_GLOBAL_MOTION
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   WarpedMotionParams *const wm =
       mi->mbmi.ref_frame[ref] > 0 ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
                                   : &xd->global_motion[mi->mbmi.ref_frame[0]];
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
   WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   warp_types.global_warp_allowed = is_global_mv_block(mi, block, wm->wmtype);
 #endif  // CONFIG_GLOBAL_MOTION
 #if CONFIG_WARPED_MOTION
@@ -3144,13 +3253,13 @@ static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
 
   av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
                            subpel_y, sf, w, h, &conv_params,
-                           mi->mbmi.interp_filter,
+                           mi->mbmi.interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                            &warp_types, (mi_x >> pd->subsampling_x) + x,
                            (mi_y >> pd->subsampling_y) + y, plane, ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
-                           0, 0,
+                           mi, 0,
 #endif
                            xs, ys, xd);
 }
@@ -3172,10 +3281,10 @@ void av1_build_inter_predictors_for_planes_single_buf(
       const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
       const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
       assert(bsize == BLOCK_8X8);
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
       assert(has_second_ref(&xd->mi[0]->mbmi) ||
              !is_inter_singleref_comp_mode(xd->mi[0]->mbmi.mode));
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
       for (y = 0; y < num_4x4_h; ++y)
         for (x = 0; x < num_4x4_w; ++x)
           build_inter_predictors_single_buf(
@@ -3215,10 +3324,11 @@ static void build_wedge_inter_predictor_from_buf(
 
 #if CONFIG_COMPOUND_SINGLEREF
   if ((is_compound || is_inter_singleref_comp_mode(mbmi->mode)) &&
-      is_masked_compound_type(mbmi->interinter_compound_type)) {
+      is_masked_compound_type(mbmi->interinter_compound_type))
 #else   // !CONFIG_COMPOUND_SINGLEREF
-  if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type)) {
+  if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type))
 #endif  // CONFIG_COMPOUND_SINGLEREF
+  {
 #if CONFIG_COMPOUND_SEGMENT
     if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
 #if CONFIG_HIGHBITDEPTH
@@ -3314,4 +3424,709 @@ void av1_build_wedge_inter_predictor_from_buf(
     }
   }
 }
-#endif  // CONFIG_EXT_INTER
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+
+void alloc_ncobmc_pred_buffer(MACROBLOCKD *const xd) {
+  int i;
+  // allocate interpolated prediction buffer
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->ncobmc_pred_buf[i] = (uint8_t *)malloc(sizeof(uint8_t) * MAX_SB_SQUARE);
+    av1_zero_array(xd->ncobmc_pred_buf[i], MAX_SB_SQUARE);
+    xd->ncobmc_pred_buf_stride[i] = MAX_SB_SIZE;
+  }
+}
+
+void free_ncobmc_pred_buffer(MACROBLOCKD *const xd) {
+  for (int i = 0; i < MAX_MB_PLANE; ++i) free(xd->ncobmc_pred_buf[i]);
+}
+
+void get_pred_from_intrpl_buf(MACROBLOCKD *xd, int mi_row, int mi_col,
+                              BLOCK_SIZE bsize, int plane) {
+  uint8_t *dst = xd->plane[plane].dst.buf;
+  int ds = xd->plane[plane].dst.stride;
+  int ss_x = xd->plane[plane].subsampling_x;
+  int ss_y = xd->plane[plane].subsampling_y;
+
+  const int ip_wide = mi_size_wide[bsize] * MI_SIZE >> ss_x;
+  const int ip_high = mi_size_high[bsize] * MI_SIZE >> ss_y;
+  // relative coordinates of this MI in the superblock
+  int row_rlt = (mi_row - xd->sb_mi_bd.mi_row_begin) * MI_SIZE >> ss_y;
+  int col_rlt = (mi_col - xd->sb_mi_bd.mi_col_begin) * MI_SIZE >> ss_x;
+  int s = xd->ncobmc_pred_buf_stride[plane];
+  int r, c;
+
+  for (r = 0; r < ip_high; ++r) {
+    for (c = 0; c < ip_wide; ++c) {
+      dst[r * ds + c] =
+          xd->ncobmc_pred_buf[plane][(r + row_rlt) * s + c + col_rlt];
+    }
+  }
+}
+// scaling factors for ncobmc kernels
+#define KERNEL_SCALE_LOG 14
+
+void build_ncobmc_intrpl_pred(const AV1_COMMON *const cm, MACROBLOCKD *xd,
+                              int plane, int pxl_row, int pxl_col,
+                              BLOCK_SIZE bsize, uint8_t *preds[][MAX_MB_PLANE],
+                              int stride[MAX_MB_PLANE],  // pred buffer strides
+                              int mode) {
+  const ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[bsize];
+  const NCOBMC_KERNELS *const knls = &cm->ncobmc_kernels[ao_block][mode];
+  const int wide = mi_size_wide[bsize] * MI_SIZE;
+  const int high = mi_size_high[bsize] * MI_SIZE;
+  const int s = stride[plane];
+  const int ss_x = xd->plane[plane].subsampling_x;
+  const int ss_y = xd->plane[plane].subsampling_y;
+  int row_offset = (pxl_row - xd->sb_mi_bd.mi_row_begin * MI_SIZE) >> ss_y;
+  int col_offset = (pxl_col - xd->sb_mi_bd.mi_col_begin * MI_SIZE) >> ss_x;
+  int dst_stride = xd->ncobmc_pred_buf_stride[plane];
+  int dst_offset = row_offset * dst_stride + col_offset;
+
+#if CONFIG_HIGHBITDEPTH
+  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#else
+  const int is_hbd = 0;
+#endif  // CONFIG_HIGHBITDEPTH
+
+  int r, c, k_r, k_c;
+  int64_t tmp;
+
+  for (r = 0; r < (high >> ss_x); ++r) {
+    for (c = 0; c < (wide >> ss_y); ++c) {
+      int pos = r * s + c;
+      int q_tmp;
+      uint8_t val;
+
+      // TODO(weitinglin): find out the optimal sub-sampling patterns for
+      //                   chroma
+      k_r = (r << ss_y) + ss_y;
+      k_c = (c << ss_x) + ss_x;
+      if (ss_y && k_r >= high) k_r -= 1;
+      if (ss_x && k_c >= wide) k_c -= 1;
+
+      if (!is_hbd) {
+        uint8_t *tmp_p[4];
+        int i;
+        for (i = 0; i < 4; ++i) tmp_p[i] = preds[i][plane];
+
+        tmp = 0;
+        for (i = 0; i < 4; ++i)
+          tmp += knls->KERNEL[i][k_r][k_c] * tmp_p[i][pos];
+
+      } else {
+        uint16_t *tmp_p[4];
+        int i;
+        for (i = 0; i < 4; ++i) tmp_p[i] = CONVERT_TO_SHORTPTR(preds[i][plane]);
+
+        tmp = 0;
+        for (i = 0; i < 4; ++i)
+          tmp += knls->KERNEL[i][k_r][k_c] * tmp_p[i][pos];
+      }
+
+      q_tmp = (tmp <= 0) ? 0 : ROUND_POWER_OF_TWO(tmp, KERNEL_SCALE_LOG);
+      val = clip_pixel(q_tmp);
+
+      xd->ncobmc_pred_buf[plane][r * dst_stride + c + dst_offset] = val;
+
+      assert(r * dst_stride + c + dst_offset < MAX_SB_SQUARE);
+    }
+  }
+}
+
+void get_pred_by_horz_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                               int mi_row, int mi_col,
+                               uint8_t *dst_buf[MAX_MB_PLANE],
+                               int dst_stride[MAX_MB_PLANE]) {
+  const TileInfo *const tile = &xd->tile;
+  const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
+  const int mb_to_top_edge_base = xd->mb_to_top_edge;
+  const int mb_to_left_edge_base = xd->mb_to_left_edge;
+  const int mb_to_right_edge_base = xd->mb_to_right_edge;
+  int overlappable_offset = -1;
+  const int mi_nums = AOMMIN(mi_size_high[bsize], cm->mi_rows - mi_row);
+
+  int i, j, mi_step, ref;
+
+  xd->mb_to_right_edge += mi_size_wide[bsize] * MI_SIZE * 4;
+
+  // build from left neighbors
+  for (i = 0; i < mi_nums; i += mi_step) {
+    int mi_row_offset = i;
+    int mi_col_offset = -1;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *left_mi;
+    MB_MODE_INFO *left_mbmi, backup_mbmi;
+    BLOCK_SIZE l_bsize;
+
+    // create the original prediction if offset exceeds the boundary
+    if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) mi_col_offset = 0;
+
+    left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    left_mbmi = &left_mi->mbmi;
+    l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
+
+    mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
+
+    // reset the mi if it is not overlappble
+    if (!is_neighbor_overlappable(left_mbmi)) {
+      // use left_mbmi->sb_type instead of l_bsize to handle
+      // sub8x8 cases
+      int search_mi_step = mi_size_high[left_mbmi->sb_type];
+      while (!is_neighbor_overlappable(left_mbmi)) {
+        mi_row_offset += search_mi_step;
+        if (mi_row_offset < mi_nums) {
+          left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          left_mbmi = &left_mi->mbmi;
+          search_mi_step = mi_size_high[left_mbmi->sb_type];
+        } else {
+          if (overlappable_offset >= 0) {
+            mi_row_offset = overlappable_offset;
+          } else {
+            mi_row_offset = 0;
+            mi_col_offset = 0;
+          }
+          left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          left_mbmi = &left_mi->mbmi;
+          break;
+        }
+      }
+    } else {
+      // update the available overlappable mi
+      overlappable_offset = mi_row_offset;
+    }
+
+    backup_mbmi = *left_mbmi;
+    modify_neighbor_predictor_for_obmc(left_mbmi);
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, l_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
+                       dst_stride[j], i, 0, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(left_mbmi->mode));
+         ++ref) {
+      const MV_REFERENCE_FRAME frame = has_second_ref(left_mbmi)
+                                           ? left_mbmi->ref_frame[ref]
+                                           : left_mbmi->ref_frame[0];
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+    for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
+      const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!av1_is_valid_scale(&ref_buf->sf)))
+        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+                           &ref_buf->sf);
+    }
+    xd->mb_to_top_edge = -((mi_row + i) * MI_SIZE * 8);
+    xd->mb_to_bottom_edge =
+        mb_to_bottom_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
+    mi_x = mi_col << MI_SIZE_LOG2;
+    mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = mi_size_wide[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
+      bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      build_inter_predictors(cm, xd, j, left_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                             0, 0,
+#endif  // CONFIG_SUPERTX
+                             mi_x, mi_y);
+    }
+    *left_mbmi = backup_mbmi;
+  }
+
+  // build from right neighbors
+  xd->mb_to_right_edge = mb_to_right_edge_base;
+  xd->mb_to_left_edge -= mi_size_wide[bsize] * MI_SIZE * 4;
+
+  overlappable_offset = -1;
+
+  for (i = 0; i < mi_nums; i += mi_step) {
+    int mi_row_offset = i;
+    int mi_col_offset = mi_size_wide[bsize];
+    int mi_x, mi_y, bw, bh;
+    int mi_col_shift = mi_size_wide[bsize] >> 1;
+    MODE_INFO *right_mi;
+    MB_MODE_INFO *right_mbmi, backup_mbmi;
+    BLOCK_SIZE r_bsize;
+
+    // create the original prediction if offset exceeds the boundary
+    if (mi_col + mi_col_offset > xd->sb_mi_bd.mi_col_end) mi_col_offset = 0;
+
+    right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    right_mbmi = &right_mi->mbmi;
+    r_bsize = AOMMAX(right_mbmi->sb_type, BLOCK_8X8);
+
+    mi_step = AOMMIN(mi_nums, mi_size_high[r_bsize]);
+
+    if (!is_neighbor_overlappable(right_mbmi)) {
+      int search_mi_step = mi_size_high[right_mbmi->sb_type];
+      while (!is_neighbor_overlappable(right_mbmi)) {
+        mi_row_offset += search_mi_step;
+        if (mi_row_offset < mi_nums) {
+          right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          right_mbmi = &right_mi->mbmi;
+          search_mi_step = mi_size_high[right_mbmi->sb_type];
+        } else {
+          if (overlappable_offset >= 0) {
+            mi_row_offset = overlappable_offset;
+          } else {
+            mi_row_offset = 0;
+            mi_col_offset = 0;
+          }
+          right_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          right_mbmi = &right_mi->mbmi;
+          break;
+        }
+      }
+    } else {
+      overlappable_offset = mi_row_offset;
+    }
+
+    backup_mbmi = *right_mbmi;
+    modify_neighbor_predictor_for_obmc(right_mbmi);
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, r_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
+                       dst_stride[j], i, mi_col_shift, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(right_mbmi->mode));
+         ++ref) {
+      const MV_REFERENCE_FRAME frame = has_second_ref(right_mbmi)
+                                           ? right_mbmi->ref_frame[ref]
+                                           : right_mbmi->ref_frame[0];
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+    for (ref = 0; ref < 1 + has_second_ref(right_mbmi); ++ref) {
+      const MV_REFERENCE_FRAME frame = right_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+      xd->block_refs[ref] = ref_buf;
+      if ((!av1_is_valid_scale(&ref_buf->sf)))
+        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i,
+                           mi_col + mi_col_shift, &ref_buf->sf);
+    }
+    xd->mb_to_top_edge = -((mi_row + i) * MI_SIZE * 8);
+    xd->mb_to_bottom_edge =
+        mb_to_bottom_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
+    mi_x = (mi_col + mi_col_shift) << MI_SIZE_LOG2;
+    mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = mi_size_wide[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
+      bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      build_inter_predictors(cm, xd, j, right_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                             0, 0,
+#endif  // CONFIG_SUPERTX
+                             mi_x, mi_y);
+    }
+
+    *right_mbmi = backup_mbmi;
+  }
+
+  // restore the boundaries
+  xd->mb_to_top_edge = mb_to_top_edge_base;
+  xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
+  xd->mb_to_left_edge = mb_to_left_edge_base;
+  xd->mb_to_right_edge = mb_to_right_edge_base;
+}
+
+void get_pred_by_vert_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                               int mi_row, int mi_col,
+                               uint8_t *dst_buf[MAX_MB_PLANE],
+                               int dst_stride[MAX_MB_PLANE]) {
+  const TileInfo *const tile = &xd->tile;
+  const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
+  const int mb_to_top_edge_base = xd->mb_to_top_edge;
+  const int mb_to_left_edge_base = xd->mb_to_left_edge;
+  const int mb_to_right_edge_base = xd->mb_to_right_edge;
+  int overlappable_offset = -1;
+  const int mi_nums = AOMMIN(mi_size_wide[bsize], cm->mi_cols - mi_col);
+
+  int i, j, mi_step, ref;
+
+  xd->mb_to_bottom_edge += mi_nums * MI_SIZE * 4;
+
+  // build from above neighbors
+  for (i = 0; i < mi_nums; i += mi_step) {
+    int mi_row_offset = -1;
+    int mi_col_offset = i;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *above_mi;
+    MB_MODE_INFO *above_mbmi, backup_mbmi;
+    BLOCK_SIZE a_bsize;
+
+    // create the original prediction if offset exceeds the boundary
+    if (mi_row <= tile->mi_row_start) mi_row_offset = 0;
+
+    above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    above_mbmi = &above_mi->mbmi;
+    a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
+
+    mi_step = AOMMIN(mi_nums, mi_size_high[a_bsize]);
+
+    // reset the mi if it is not overlappble
+    if (!is_neighbor_overlappable(above_mbmi)) {
+      int search_mi_step = mi_size_high[above_mbmi->sb_type];
+      // backward search
+      while (!is_neighbor_overlappable(above_mbmi)) {
+        mi_col_offset += search_mi_step;
+        if (mi_col_offset < mi_nums) {
+          above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          above_mbmi = &above_mi->mbmi;
+          search_mi_step = mi_size_high[above_mbmi->sb_type];
+        } else {
+          if (overlappable_offset >= 0) {
+            mi_col_offset = overlappable_offset;
+          } else {
+            mi_row_offset = 0;
+            mi_col_offset = 0;
+          }
+          above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          above_mbmi = &above_mi->mbmi;
+          break;
+        }
+      }
+    } else {
+      // update the available overlappable mi
+      overlappable_offset = mi_col_offset;
+    }
+
+    backup_mbmi = *above_mbmi;
+    modify_neighbor_predictor_for_obmc(above_mbmi);
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, a_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
+                       dst_stride[j], 0, i, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(above_mbmi->mode));
+         ++ref) {
+      const MV_REFERENCE_FRAME frame = has_second_ref(above_mbmi)
+                                           ? above_mbmi->ref_frame[ref]
+                                           : above_mbmi->ref_frame[0];
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+    for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+      const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+      xd->block_refs[ref] = ref_buf;
+      if ((!av1_is_valid_scale(&ref_buf->sf)))
+        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+                           &ref_buf->sf);
+    }
+
+    xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+    xd->mb_to_right_edge =
+        mb_to_right_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
+    mi_x = (mi_col + i) << MI_SIZE_LOG2;
+    mi_y = mi_row << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+
+      bh = mi_size_high[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
+      bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      build_inter_predictors(cm, xd, j, above_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                             0, 0,
+#endif  // CONFIG_SUPERTX
+                             mi_x, mi_y);
+    }
+
+    *above_mbmi = backup_mbmi;
+  }
+
+  // build from bottom neighbors
+  xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
+  xd->mb_to_top_edge -= mi_size_high[bsize] * MI_SIZE * 4;
+
+  overlappable_offset = -1;
+
+  for (i = 0; i < mi_nums; i += mi_step) {
+    int mi_row_offset = mi_size_high[bsize];
+    int mi_col_offset = i;
+    int mi_x, mi_y, bw, bh;
+    int mi_row_shift = mi_size_high[bsize] >> 1;
+    MODE_INFO *bottom_mi;
+    MB_MODE_INFO *bottom_mbmi, backup_mbmi;
+    BLOCK_SIZE b_bsize;
+
+    // create the original prediction if offset exceeds the boundary
+    if (mi_row + mi_row_offset > xd->sb_mi_bd.mi_row_end) mi_row_offset = 0;
+
+    bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    bottom_mbmi = &bottom_mi->mbmi;
+    b_bsize = AOMMAX(bottom_mbmi->sb_type, BLOCK_8X8);
+
+    mi_step = AOMMIN(mi_nums, mi_size_high[b_bsize]);
+
+    // reset the mi if it is not overlappble
+    if (!is_neighbor_overlappable(bottom_mbmi)) {
+      int search_mi_step = mi_size_high[bottom_mbmi->sb_type];
+      while (!is_neighbor_overlappable(bottom_mbmi)) {
+        mi_col_offset += search_mi_step;
+        if (mi_col_offset < mi_nums) {
+          bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          bottom_mbmi = &bottom_mi->mbmi;
+          search_mi_step = mi_size_high[bottom_mbmi->sb_type];
+        } else {
+          if (overlappable_offset >= 0) {
+            mi_col_offset = overlappable_offset;
+          } else {
+            mi_col_offset = 0;
+            mi_row_offset = 0;
+          }
+          bottom_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+          bottom_mbmi = &bottom_mi->mbmi;
+          break;
+        }
+      }
+    } else {
+      // update the available overlappable mi
+      overlappable_offset = mi_col_offset;
+    }
+
+    backup_mbmi = *bottom_mbmi;
+    modify_neighbor_predictor_for_obmc(bottom_mbmi);
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, b_bsize, dst_buf[j], MAX_SB_SIZE, MAX_SB_SIZE,
+                       dst_stride[j], mi_row_shift, i, NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(bottom_mbmi->mode));
+         ++ref) {
+      const MV_REFERENCE_FRAME frame = has_second_ref(bottom_mbmi)
+                                           ? bottom_mbmi->ref_frame[ref]
+                                           : bottom_mbmi->ref_frame[0];
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
+    for (ref = 0; ref < 1 + has_second_ref(bottom_mbmi); ++ref) {
+      const MV_REFERENCE_FRAME frame = bottom_mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+      xd->block_refs[ref] = ref_buf;
+      if ((!av1_is_valid_scale(&ref_buf->sf)))
+        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + mi_row_shift,
+                           mi_col + i, &ref_buf->sf);
+    }
+
+    xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+    xd->mb_to_right_edge =
+        mb_to_right_edge_base + (mi_nums - i - mi_step) * MI_SIZE * 8;
+    mi_x = (mi_col + i) << MI_SIZE_LOG2;
+    mi_y = (mi_row + mi_row_shift) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+
+      bh = mi_size_high[bsize] << (MI_SIZE_LOG2 - 1) >> pd->subsampling_x;
+      bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      build_inter_predictors(cm, xd, j, bottom_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                             0, 0,
+#endif  // CONFIG_SUPERTX
+                             mi_x, mi_y);
+    }
+
+    *bottom_mbmi = backup_mbmi;
+  }
+  // restore the boundaries
+  xd->mb_to_top_edge = mb_to_top_edge_base;
+  xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
+  xd->mb_to_left_edge = mb_to_left_edge_base;
+  xd->mb_to_right_edge = mb_to_right_edge_base;
+}
+
+void get_pred_by_corner_neighbor(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                 int bsize, int mi_row, int mi_col,
+                                 uint8_t *dst_buf[MAX_MB_PLANE],
+                                 int dst_stride[MAX_MB_PLANE]) {
+  const TileInfo *const tile = &xd->tile;
+  const int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
+  const int mb_to_top_edge_base = xd->mb_to_top_edge;
+  const int mb_to_left_edge_base = xd->mb_to_left_edge;
+  const int mb_to_right_edge_base = xd->mb_to_right_edge;
+  const int mi_wide = mi_size_wide[bsize];
+  const int mi_high = mi_size_high[bsize];
+
+  // location of four mi sources
+  const int mi_row_offsets[4] = { -1, -1, mi_high, mi_high };
+  const int mi_col_offsets[4] = { -1, mi_wide, -1, mi_wide };
+
+  MB_MODE_INFO backup_mbmi;
+  int mi_x, mi_y, bh, bw;
+  int i, j, ref;
+
+  assert(bsize >= BLOCK_8X8);
+
+  for (i = 0; i < 4; ++i) {
+    int mi_row_offset = mi_row_offsets[i];
+    int mi_col_offset = mi_col_offsets[i];
+    MODE_INFO *corner_mi;
+    MB_MODE_INFO *corner_mbmi;
+
+    if (mi_col + mi_col_offset < tile->mi_col_start ||
+        mi_col + mi_col_offset > xd->sb_mi_bd.mi_col_end)
+      mi_col_offset = 0;
+
+    if (mi_row + mi_row_offset < tile->mi_row_start ||
+        mi_row + mi_row_offset > xd->sb_mi_bd.mi_row_end)
+      mi_row_offset = 0;
+
+    corner_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    corner_mbmi = &corner_mi->mbmi;
+
+    // reset the mi if it is not overlappble
+    if (!is_neighbor_overlappable(corner_mbmi)) {
+      mi_row_offset = 0;
+      mi_col_offset = 0;
+      corner_mi = xd->mi[0];
+      corner_mbmi = &corner_mi->mbmi;
+    }
+
+    backup_mbmi = *corner_mbmi;
+    modify_neighbor_predictor_for_obmc(corner_mbmi);
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst, BLOCK_8X8, dst_buf[j], MAX_SB_SIZE,
+                       MAX_SB_SIZE, dst_stride[j], (i / 2) * (mi_high >> 1),
+                       (i % 2) * (mi_wide >> 1), NULL, pd->subsampling_x,
+                       pd->subsampling_y);
+    }
+
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + (is_inter_anyref_comp_mode(corner_mbmi->mode));
+         ++ref) {
+      const MV_REFERENCE_FRAME frame = has_second_ref(corner_mbmi)
+                                           ? corner_mbmi->ref_frame[ref]
+                                           : corner_mbmi->ref_frame[0];
+#else
+    for (ref = 0; ref < 1 + has_second_ref(corner_mbmi); ++ref) {
+      const MV_REFERENCE_FRAME frame = corner_mbmi->ref_frame[ref];
+#endif
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+      xd->block_refs[ref] = ref_buf;
+
+      if ((!av1_is_valid_scale(&ref_buf->sf)))
+        aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                           "Reference frame has invalid dimensions");
+      av1_setup_pre_planes(xd, ref, ref_buf->buf,
+                           mi_row + (i / 2) * (mi_high >> 1),
+                           mi_col + (i % 2) * (mi_wide >> 1), &ref_buf->sf);
+    }
+    // adjust mi boundaries of this block
+    xd->mb_to_bottom_edge =
+        mb_to_bottom_edge_base + (1 - (i / 2)) * mi_high * MI_SIZE * 4;
+    xd->mb_to_top_edge = mb_to_top_edge_base - (i / 2) * mi_high * MI_SIZE * 4;
+    xd->mb_to_right_edge =
+        mb_to_right_edge_base + (1 - (i % 2)) * mi_wide * MI_SIZE * 4;
+    xd->mb_to_left_edge =
+        mb_to_left_edge_base - (i % 2) * mi_wide * MI_SIZE * 4;
+
+    mi_x = (mi_col + (i % 2) * mi_wide / 2) << MI_SIZE_LOG2;
+    mi_y = (mi_row + (i / 2) * mi_high / 2) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bh = mi_high << MI_SIZE_LOG2 >> (pd->subsampling_x + 1);
+      bw = mi_wide << MI_SIZE_LOG2 >> (pd->subsampling_y + 1);
+      build_inter_predictors(cm, xd, j, corner_mi, 1, 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX
+                             0, 0,
+#endif  // CONFIG_SUPERTX
+                             mi_x, mi_y);
+    }
+    *corner_mbmi = backup_mbmi;
+  }
+  // restore the boundaries
+  xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
+  xd->mb_to_top_edge = mb_to_top_edge_base;
+  xd->mb_to_right_edge = mb_to_right_edge_base;
+  xd->mb_to_left_edge = mb_to_left_edge_base;
+}
+
+// get the stitched extra prediction for this block
+void av1_get_ext_blk_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                           int mi_row, int mi_col,
+                           uint8_t *dst_buf[][MAX_MB_PLANE],
+                           int dst_stride[MAX_MB_PLANE]) {
+  get_pred_by_corner_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[0],
+                              dst_stride);
+  get_pred_by_vert_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[1],
+                            dst_stride);
+  get_pred_by_horz_neighbor(cm, xd, bsize, mi_row, mi_col, dst_buf[2],
+                            dst_stride);
+}
+
+void av1_get_ori_blk_pred(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                          int mi_row, int mi_col,
+                          uint8_t *dst_buf[MAX_MB_PLANE],
+                          int dst_stride[MAX_MB_PLANE]) {
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  int mi_x = mi_col << MI_SIZE_LOG2;
+  int mi_y = mi_row << MI_SIZE_LOG2;
+  int bw = block_size_wide[bsize];
+  int bh = block_size_high[bsize];
+  int i, ref;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    setup_pred_plane(&pd->dst, BLOCK_8X8, dst_buf[i], MAX_SB_SIZE, MAX_SB_SIZE,
+                     dst_stride[i], 0, 0, NULL, pd->subsampling_x,
+                     pd->subsampling_y);
+  }
+
+  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+    const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+    const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+    xd->block_refs[ref] = ref_buf;
+
+    if (!av1_is_valid_scale(&ref_buf->sf))
+      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
+                         "Reference frame has invalid dimensions");
+
+    av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf);
+  }
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    const struct macroblockd_plane *pd = &xd->plane[i];
+    build_inter_predictors(cm, xd, i, mi, 1, 0, bw >> pd->subsampling_x,
+                           bh >> pd->subsampling_y, 0, 0,
+                           bw >> pd->subsampling_x, bh >> pd->subsampling_y,
+#if CONFIG_SUPERTX
+                           0, 0,
+#endif  // CONFIG_SUPERTX
+                           mi_x, mi_y);
+  }
+}
+
+#endif
diff --git a/third_party/aom/av1/common/reconinter.h b/third_party/aom/av1/common/reconinter.h
index 229646036..fd69f9db3 100644
--- a/third_party/aom/av1/common/reconinter.h
+++ b/third_party/aom/av1/common/reconinter.h
@@ -40,34 +40,27 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
                                    uint8_t *dst, int dst_stride, int subpel_x,
                                    int subpel_y, const struct scale_factors *sf,
                                    int w, int h, ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                                   const InterpFilter *interp_filter,
-#else
-                                   const InterpFilter interp_filter,
-#endif
-                                   int xs, int ys) {
-#if CONFIG_DUAL_FILTER
-  const InterpFilter filter_x = av1_get_plane_interp_filter(
-      interp_filter[1 + 2 * conv_params->ref], conv_params->plane);
-  const InterpFilter filter_y = av1_get_plane_interp_filter(
-      interp_filter[0 + 2 * conv_params->ref], conv_params->plane);
-  const InterpFilterParams interp_filter_params_x =
-      av1_get_interp_filter_params(filter_x);
-  const InterpFilterParams interp_filter_params_y =
-      av1_get_interp_filter_params(filter_y);
-#else
-  const InterpFilterParams interp_filter_params_x =
-      av1_get_interp_filter_params(interp_filter);
-  const InterpFilterParams interp_filter_params_y = interp_filter_params_x;
-#endif
-
+                                   InterpFilters interp_filters, int xs,
+                                   int ys) {
   assert(conv_params->do_average == 0 || conv_params->do_average == 1);
   assert(sf);
   if (has_scale(xs, ys)) {
     // TODO(afergs, debargha): Use a different scale convolve function
     // that uses higher precision for subpel_x, subpel_y, xs, ys
-    av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filter,
-                       subpel_x, xs, subpel_y, ys, conv_params);
+    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+#if CONFIG_CONVOLVE_ROUND
+      av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+                             interp_filters, subpel_x, xs, subpel_y, ys, 1,
+                             conv_params);
+      conv_params->do_post_rounding = 1;
+#else
+      assert(0);
+#endif  // CONFIG_CONVOLVE_ROUND
+    } else {
+      assert(conv_params->round == CONVOLVE_OPT_ROUND);
+      av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
+                         subpel_x, xs, subpel_y, ys, conv_params);
+    }
   } else {
     subpel_x >>= SCALE_EXTRA_BITS;
     subpel_y >>= SCALE_EXTRA_BITS;
@@ -80,31 +73,32 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
 #if CONFIG_CONVOLVE_ROUND
       av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
-#if CONFIG_DUAL_FILTER
-                             interp_filter,
-#else   // CONFIG_DUAL_FILTER
-                             &interp_filter,
-#endif  // CONFIG_DUAL_FILTER
-                             subpel_x, xs, subpel_y, ys, conv_params);
+                             interp_filters, subpel_x, xs, subpel_y, ys, 0,
+                             conv_params);
       conv_params->do_post_rounding = 1;
 #else
       assert(0);
 #endif  // CONFIG_CONVOLVE_ROUND
     } else {
       assert(conv_params->round == CONVOLVE_OPT_ROUND);
+
+      InterpFilterParams filter_params_x, filter_params_y;
+      av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                     &filter_params_y);
+
       if (w <= 2 || h <= 2) {
-        av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filter,
+        av1_convolve_c(src, src_stride, dst, dst_stride, w, h, interp_filters,
                        subpel_x, xs, subpel_y, ys, conv_params);
-      } else if (interp_filter_params_x.taps == SUBPEL_TAPS &&
-                 interp_filter_params_y.taps == SUBPEL_TAPS) {
-        const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(
-            interp_filter_params_x, subpel_x);
-        const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(
-            interp_filter_params_y, subpel_y);
+      } else if (filter_params_x.taps == SUBPEL_TAPS &&
+                 filter_params_y.taps == SUBPEL_TAPS) {
+        const int16_t *kernel_x =
+            av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x);
+        const int16_t *kernel_y =
+            av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y);
         sf->predict[subpel_x != 0][subpel_y != 0][conv_params->do_average](
             src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
       } else {
-        av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+        av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filters,
                      subpel_x, xs, subpel_y, ys, conv_params);
       }
     }
@@ -117,31 +111,26 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
                                           int subpel_x, int subpel_y,
                                           const struct scale_factors *sf, int w,
                                           int h, ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                                          const InterpFilter *interp_filter,
-#else
-                                          const InterpFilter interp_filter,
-#endif
-                                          int xs, int ys, int bd) {
+                                          InterpFilters interp_filters, int xs,
+                                          int ys, int bd) {
   const int avg = conv_params->do_average;
   assert(avg == 0 || avg == 1);
-#if CONFIG_DUAL_FILTER
-  const int ref = conv_params->ref;
-  const InterpFilterParams interp_filter_params_x =
-      av1_get_interp_filter_params(interp_filter[1 + 2 * ref]);
-  const InterpFilterParams interp_filter_params_y =
-      av1_get_interp_filter_params(interp_filter[0 + 2 * ref]);
-#else
-  const InterpFilterParams interp_filter_params_x =
-      av1_get_interp_filter_params(interp_filter);
-  const InterpFilterParams interp_filter_params_y = interp_filter_params_x;
-#endif
 
   if (has_scale(xs, ys)) {
-    av1_highbd_convolve_scale(
-        src, src_stride, dst, dst_stride, w, h, interp_filter,
-        subpel_x >> SCALE_EXTRA_BITS, xs >> SCALE_EXTRA_BITS,
-        subpel_y >> SCALE_EXTRA_BITS, ys >> SCALE_EXTRA_BITS, avg, bd);
+    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+#if CONFIG_CONVOLVE_ROUND
+      av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
+                                    interp_filters, subpel_x, xs, subpel_y, ys,
+                                    1, conv_params, bd);
+      conv_params->do_post_rounding = 1;
+#else
+      assert(0);
+#endif  // CONFIG_CONVOLVE_ROUND
+    } else {
+      av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
+                                interp_filters, subpel_x, xs, subpel_y, ys, avg,
+                                bd);
+    }
   } else {
     subpel_x >>= SCALE_EXTRA_BITS;
     subpel_y >>= SCALE_EXTRA_BITS;
@@ -154,37 +143,36 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
     if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
 #if CONFIG_CONVOLVE_ROUND
       av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
-#if CONFIG_DUAL_FILTER
-                                    interp_filter,
-#else  // CONFIG_DUAL_FILTER
-                                    &interp_filter,
-#endif  // CONFIG_DUAL_FILTER
-                                    subpel_x, xs, subpel_y, ys, conv_params,
-                                    bd);
+                                    interp_filters, subpel_x, xs, subpel_y, ys,
+                                    0, conv_params, bd);
       conv_params->do_post_rounding = 1;
 #else
       assert(0);
 #endif  // CONFIG_CONVOLVE_ROUND
     } else {
-      if (interp_filter_params_x.taps == SUBPEL_TAPS &&
-          interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
-        const int16_t *kernel_x = av1_get_interp_filter_subpel_kernel(
-            interp_filter_params_x, subpel_x);
-        const int16_t *kernel_y = av1_get_interp_filter_subpel_kernel(
-            interp_filter_params_y, subpel_y);
+      InterpFilterParams filter_params_x, filter_params_y;
+      av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
+                                     &filter_params_y);
+
+      if (filter_params_x.taps == SUBPEL_TAPS &&
+          filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+        const int16_t *kernel_x =
+            av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x);
+        const int16_t *kernel_y =
+            av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y);
         sf->highbd_predict[subpel_x != 0][subpel_y != 0][avg](
             src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
             bd);
       } else {
         av1_highbd_convolve(src, src_stride, dst, dst_stride, w, h,
-                            interp_filter, subpel_x, xs, subpel_y, ys, avg, bd);
+                            interp_filters, subpel_x, xs, subpel_y, ys, avg,
+                            bd);
       }
     }
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if CONFIG_EXT_INTER
 // Set to (1 << 5) if the 32-ary codebooks are used for any bock size
 #define MAX_WEDGE_TYPES (1 << 4)
 
@@ -239,7 +227,8 @@ static INLINE int is_interinter_compound_used(COMPOUND_TYPE type,
     case COMPOUND_WEDGE: return wedge_params_lookup[sb_type].bits > 0;
 #endif  // CONFIG_WEDGE
 #if CONFIG_COMPOUND_SEGMENT
-    case COMPOUND_SEG: return sb_type >= BLOCK_8X8;
+    case COMPOUND_SEG:
+      return AOMMIN(block_size_wide[sb_type], block_size_high[sb_type]) >= 8;
 #endif  // CONFIG_COMPOUND_SEGMENT
     default: assert(0); return 0;
   }
@@ -288,225 +277,20 @@ void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                     BLOCK_SIZE sb_type, int h, int w, int bd);
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_COMPOUND_SEGMENT
-#endif  // CONFIG_EXT_INTER
-
-void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
-#if CONFIG_MOTION_VAR
-                            int mi_col_offset, int mi_row_offset,
-#endif  // CONFIG_MOTION_VAR
-                            int block, int bw, int bh, int x, int y, int w,
-                            int h,
-#if CONFIG_SUPERTX && CONFIG_EXT_INTER
-                            int wedge_offset_x, int wedge_offset_y,
-#endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
-                            int mi_x, int mi_y);
-
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-// This function will determine whether or not to create a warped
-// prediction and return the appropriate motion model depending
-// on the configuration. Behavior will change with different
-// combinations of GLOBAL_MOTION, WARPED_MOTION and MOTION_VAR.
-static INLINE int allow_warp(const MODE_INFO *const mi,
-                             const WarpTypesAllowed *const warp_types,
-#if CONFIG_GLOBAL_MOTION
-                             const WarpedMotionParams *const gm_params,
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_MOTION_VAR
-                             int mi_col_offset, int mi_row_offset,
-#endif  // CONFIG_MOTION_VAR
-                             WarpedMotionParams *final_warp_params) {
-  const MB_MODE_INFO *const mbmi = &mi->mbmi;
-  set_default_warp_params(final_warp_params);
-
-// Only global motion configured
-#if CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
-  (void)mbmi;
-  if (warp_types->global_warp_allowed) {
-    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
-    return 1;
-  }
-#endif  // CONFIG_GLOBAL_MOTION && !CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
-
-// Only warped motion configured
-#if CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
-  if (warp_types->local_warp_allowed) {
-    memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
-    return 1;
-  }
-#endif  // CONFIG_WARPED_MOTION && !CONFIG_GLOBAL_MOTION && !CONFIG_MOTION_VAR
-
-// Warped and global motion configured
-#if CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
-  // When both are enabled, warped will take priority. The global parameters
-  // will only be used to compute projection samples to find the warped model.
-  // Note that when a block chooses global, it will not be possible to
-  // select WARPED_CAUSAL.
-  if (warp_types->local_warp_allowed) {
-    memcpy(final_warp_params, &mbmi->wm_params[0], sizeof(*final_warp_params));
-    return 1;
-  } else if (warp_types->global_warp_allowed) {
-    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
-    return 1;
-  }
-#endif  // CONFIG_GLOBAL_MOTION && CONFIG_WARPED_MOTION && !CONFIG_MOTION_VAR
-
-// Motion var and global motion configured
-#if CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
-  // We warp if either case is true:
-  //   1.) We are predicting a block which uses global motion
-  //   2.) We are predicting a neighboring block of a block using OBMC,
-  //       the neighboring block uses global motion, and we have enabled
-  //       WARP_GM_NEIGHBORS_WITH_OBMC
-  const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
-  (void)mbmi;
-  if (warp_types->global_warp_allowed &&
-      (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
-    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
-    return 1;
-  }
-#endif  // CONFIG_GLOBAL_MOTION && CONFIG_MOTION_VAR && !CONFIG_WARPED_MOTION
-
-// Motion var and warped motion configured
-#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
-  // We warp if either case is true:
-  //   1.) We are predicting a block with motion mode WARPED_CAUSAL
-  //   2.) We are predicting a neighboring block of a block using OBMC,
-  //       the neighboring block has mode WARPED_CAUSAL, and we have enabled
-  //       WARP_WM_NEIGHBORS_WITH_OBMC
-  const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
-  if (warp_types->local_warp_allowed) {
-    if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
-      memcpy(final_warp_params, &mbmi->wm_params[0],
-             sizeof(*final_warp_params));
-      return 1;
-    }
-  }
-#endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && !CONFIG_GLOBAL_MOTION
-
-// Motion var, warped motion and global motion all configured
-#if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
-  const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
-  if (warp_types->local_warp_allowed) {
-    if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
-      memcpy(final_warp_params, &mbmi->wm_params[0],
-             sizeof(*final_warp_params));
-      return 1;
-    }
-  } else if (warp_types->global_warp_allowed &&
-             (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
-    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
-    return 1;
-  }
-#endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR && CONFIG_GLOBAL_MOTION
 
-  return 0;
-}
-#endif  // CONFIG_GLOBAL_MOTION ||CONFIG_WARPED_MOTION
-
-static INLINE void av1_make_inter_predictor(
-    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+void av1_make_masked_inter_predictor(
+    const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
     const int subpel_x, const int subpel_y, const struct scale_factors *sf,
-    int w, int h, ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-    const InterpFilter *interp_filter,
-#else
-    const InterpFilter interp_filter,
-#endif
-#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-    const WarpTypesAllowed *warp_types, int p_col, int p_row, int plane,
-    int ref,
-#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR
-    int mi_col_offset, int mi_row_offset,
-#endif
-    int xs, int ys, const MACROBLOCKD *xd) {
-  (void)xd;
-
-#if CONFIG_MOTION_VAR
-  const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
-#else
-  const MODE_INFO *mi = xd->mi[0];
-  (void)mi;
-#endif  // CONFIG_MOTION_VAR
-
-// Make sure the selected motion mode is valid for this configuration
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-  assert_motion_mode_valid(mi->mbmi.motion_mode,
-#if CONFIG_GLOBAL_MOTION
-                           0, xd->global_motion,
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
-                           xd,
-#endif
-                           mi);
-#endif  // CONFIG MOTION_VAR || CONFIG_WARPED_MOTION
-
-#if CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
-  WarpedMotionParams final_warp_params;
-  const int do_warp = allow_warp(
-      mi, warp_types,
-#if CONFIG_GLOBAL_MOTION
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-      // TODO(zoeliu): To further check the single
-      // ref comp mode to work together with
-      //               global motion.
-      has_second_ref(&mi->mbmi) ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
-                                : &xd->global_motion[mi->mbmi.ref_frame[0]],
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
-      &xd->global_motion[mi->mbmi.ref_frame[ref]],
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_MOTION_VAR
-      mi_col_offset, mi_row_offset,
-#endif  // CONFIG_MOTION_VAR
-      &final_warp_params);
-  if (do_warp) {
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const struct buf_2d *const pre_buf = &pd->pre[ref];
-    av1_warp_plane(&final_warp_params,
-#if CONFIG_HIGHBITDEPTH
-                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-#endif  // CONFIG_HIGHBITDEPTH
-                   pre_buf->buf0, pre_buf->width, pre_buf->height,
-                   pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
-                   pd->subsampling_x, pd->subsampling_y, xs, ys, conv_params);
-    return;
-  }
-#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-#if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                           sf, w, h, conv_params, interp_filter, xs, ys,
-                           xd->bd);
-    return;
-  }
-#endif  // CONFIG_HIGHBITDEPTH
-  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
-                  h, conv_params, interp_filter, xs, ys);
-}
-
-#if CONFIG_EXT_INTER
-void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
-                                     uint8_t *dst, int dst_stride,
-                                     const int subpel_x, const int subpel_y,
-                                     const struct scale_factors *sf, int w,
-                                     int h, ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                                     const InterpFilter *interp_filter,
-#else
-                                     const InterpFilter interp_filter,
-#endif
-                                     int xs, int ys,
+    int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
+    int xs, int ys,
 #if CONFIG_SUPERTX
-                                     int wedge_offset_x, int wedge_offset_y,
+    int wedge_offset_x, int wedge_offset_y,
 #endif  // CONFIG_SUPERTX
-                                     int plane,
+    int plane,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-                                     const WarpTypesAllowed *warp_types,
-                                     int p_col, int p_row, int ref,
+    const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-                                     MACROBLOCKD *xd);
-#endif  // CONFIG_EXT_INTER
+    MACROBLOCKD *xd);
 
 static INLINE int round_mv_comp_q4(int value) {
   return (value < 0 ? value - 2 : value + 2) / 4;
@@ -588,18 +372,13 @@ void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
 
 #if CONFIG_SUPERTX
 void av1_build_inter_predictor_sb_sub8x8_extend(const AV1_COMMON *cm,
-                                                MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
-                                                int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                                                int mi_row, int mi_col,
-                                                int plane, BLOCK_SIZE bsize,
-                                                int block);
+                                                MACROBLOCKD *xd, int mi_row_ori,
+                                                int mi_col_ori, int mi_row,
+                                                int mi_col, int plane,
+                                                BLOCK_SIZE bsize, int block);
 
 void av1_build_inter_predictor_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
                                          int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
                                          int mi_row, int mi_col, int plane,
                                          BLOCK_SIZE bsize);
 struct macroblockd_plane;
@@ -614,11 +393,7 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                                int dst_stride, const MV *src_mv,
                                const struct scale_factors *sf, int w, int h,
                                ConvolveParams *conv_params,
-#if CONFIG_DUAL_FILTER
-                               const InterpFilter *interp_filter,
-#else
-                               const InterpFilter interp_filter,
-#endif
+                               InterpFilters interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                const WarpTypesAllowed *warp_types, int p_col,
                                int p_row, int plane, int ref,
@@ -630,11 +405,7 @@ void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
 void av1_highbd_build_inter_predictor(
     const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
     const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
-#if CONFIG_DUAL_FILTER
-    const InterpFilter *interp_filter,
-#else
-    const InterpFilter interp_filter,
-#endif
+    InterpFilters interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     const WarpTypesAllowed *warp_types, int p_col, int p_row,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -657,11 +428,11 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, BLOCK_SIZE bsize,
                                     const struct scale_factors *scale,
                                     int subsampling_x, int subsampling_y) {
 #if CONFIG_CHROMA_SUB8X8
-  if (bsize < BLOCK_8X8) {
-    // Offset the buffer pointer
-    if (subsampling_y && (mi_row & 0x01)) mi_row -= 1;
-    if (subsampling_x && (mi_col & 0x01)) mi_col -= 1;
-  }
+  // Offset the buffer pointer
+  if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
+    mi_row -= 1;
+  if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
+    mi_col -= 1;
 #else
   (void)bsize;
 #endif
@@ -740,16 +511,8 @@ static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
 
 static INLINE void set_default_interp_filters(
     MB_MODE_INFO *const mbmi, InterpFilter frame_interp_filter) {
-#if CONFIG_DUAL_FILTER
-  int dir;
-  for (dir = 0; dir < 4; ++dir)
-    mbmi->interp_filter[dir] = frame_interp_filter == SWITCHABLE
-                                   ? EIGHTTAP_REGULAR
-                                   : frame_interp_filter;
-#else
-  mbmi->interp_filter = frame_interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
-                                                          : frame_interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+  mbmi->interp_filters =
+      av1_broadcast_interp_filter(av1_unswitchable_filter(frame_interp_filter));
 }
 
 static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) {
@@ -810,7 +573,6 @@ void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
 #endif
 #endif  // CONFIG_MOTION_VAR
 
-#if CONFIG_EXT_INTER
 #define MASK_MASTER_SIZE ((MAX_WEDGE_SIZE) << 1)
 #define MASK_MASTER_STRIDE (MASK_MASTER_SIZE)
 
@@ -836,26 +598,26 @@ const uint8_t *av1_get_compound_type_mask_inverse(
 const uint8_t *av1_get_compound_type_mask(
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type);
 #if CONFIG_INTERINTRA
-void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
-                                     uint8_t *upred, uint8_t *vpred,
-                                     int ystride, int ustride, int vstride,
-                                     BUFFER_SET *ctx, BLOCK_SIZE bsize);
-void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
-                                         int ystride, BUFFER_SET *ctx,
+void av1_build_interintra_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                     uint8_t *ypred, uint8_t *upred,
+                                     uint8_t *vpred, int ystride, int ustride,
+                                     int vstride, BUFFER_SET *ctx,
+                                     BLOCK_SIZE bsize);
+void av1_build_interintra_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         uint8_t *ypred, int ystride,
+                                         BUFFER_SET *ctx, BLOCK_SIZE bsize);
+void av1_build_interintra_predictors_sbc(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         uint8_t *upred, int ustride,
+                                         BUFFER_SET *ctx, int plane,
                                          BLOCK_SIZE bsize);
-void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
-                                         int ustride, BUFFER_SET *ctx,
-                                         int plane, BLOCK_SIZE bsize);
-void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
-                                          uint8_t *vpred, int ustride,
-                                          int vstride, BUFFER_SET *ctx,
-                                          BLOCK_SIZE bsize);
-
-void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
-                                               BLOCK_SIZE bsize, int plane,
-                                               BUFFER_SET *ctx,
-                                               uint8_t *intra_pred,
-                                               int intra_stride);
+void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                          uint8_t *upred, uint8_t *vpred,
+                                          int ustride, int vstride,
+                                          BUFFER_SET *ctx, BLOCK_SIZE bsize);
+
+void av1_build_intra_predictors_for_interintra(
+    const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+    BUFFER_SET *ctx, uint8_t *intra_pred, int intra_stride);
 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
                             const uint8_t *inter_pred, int inter_stride,
                             const uint8_t *intra_pred, int intra_stride);
@@ -871,7 +633,45 @@ void av1_build_wedge_inter_predictor_from_buf(
 #endif  // CONFIG_SUPERTX
     uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
     int ext_dst_stride1[3]);
-#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+#define ASSIGN_ALIGNED_PTRS(p, a, s) \
+  p[0] = a;                          \
+  p[1] = a + s;                      \
+  p[2] = a + 2 * s;
+
+#define ASSIGN_ALIGNED_PTRS_HBD(p, a, s, l) \
+  p[0] = CONVERT_TO_BYTEPTR(a);             \
+  p[1] = CONVERT_TO_BYTEPTR(a + s * l);     \
+  p[2] = CONVERT_TO_BYTEPTR(a + 2 * s * l);
+
+void alloc_ncobmc_pred_buffer(MACROBLOCKD *const xd);
+void free_ncobmc_pred_buffer(MACROBLOCKD *const xd);
+void set_sb_mi_boundaries(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                          const int mi_row, const int mi_col);
+
+void reset_xd_boundary(MACROBLOCKD *xd, int mi_row, int bh, int mi_col, int bw,
+                       int mi_rows, int mi_cols);
+
+void get_pred_from_intrpl_buf(MACROBLOCKD *xd, int mi_row, int mi_col,
+                              BLOCK_SIZE bsize, int plane);
+
+void build_ncobmc_intrpl_pred(const AV1_COMMON *const cm, MACROBLOCKD *xd,
+                              int plane, int pxl_row, int pxl_col,
+                              BLOCK_SIZE bsize, uint8_t *preds[][MAX_MB_PLANE],
+                              int ps[MAX_MB_PLANE],  // pred buffer strides
+                              int mode);
+
+void av1_get_ext_blk_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                           int mi_row, int mi_col,
+                           uint8_t *dst_buf[][MAX_MB_PLANE],
+                           int dst_stride[MAX_MB_PLANE]);
+
+void av1_get_ori_blk_pred(const AV1_COMMON *cm, MACROBLOCKD *xd, int bsize,
+                          int mi_row, int mi_col,
+                          uint8_t *dst_buf[MAX_MB_PLANE],
+                          int dst_stride[MAX_MB_PLANE]);
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/aom/av1/common/reconintra.c b/third_party/aom/av1/common/reconintra.c
index 0fdb6183b..c6d57b742 100644
--- a/third_party/aom/av1/common/reconintra.c
+++ b/third_party/aom/av1/common/reconintra.c
@@ -44,6 +44,10 @@ enum {
 #endif  // CONFIG_INTRA_EDGE_UPSAMPLE
 #endif  // CONFIG_INTRA_EDGE
 
+#define INTRA_USES_EXT_TRANSFORMS 1
+#define INTRA_USES_RECT_TRANSFORMS \
+  (CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX))
+
 static const uint8_t extend_modes[INTRA_MODES] = {
   NEED_ABOVE | NEED_LEFT,                   // DC
   NEED_ABOVE,                               // V
@@ -54,13 +58,11 @@ static const uint8_t extend_modes[INTRA_MODES] = {
   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D153
   NEED_LEFT | NEED_BOTTOMLEFT,              // D207
   NEED_ABOVE | NEED_ABOVERIGHT,             // D63
-#if CONFIG_ALT_INTRA
-  NEED_LEFT | NEED_ABOVE,  // SMOOTH
+  NEED_LEFT | NEED_ABOVE,                   // SMOOTH
 #if CONFIG_SMOOTH_HV
   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
 #endif                                      // CONFIG_SMOOTH_HV
-#endif                                      // CONFIG_ALT_INTRA
   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // TM
 };
 
@@ -156,6 +158,13 @@ static const uint16_t orders_4x16[256] = {
 };
 #endif
 
+static const uint16_t orders_32x128[4] = {
+  0, 1, 2, 3,
+};
+static const uint16_t orders_128x32[4] = {
+  0, 1, 2, 3,
+};
+
 #if CONFIG_CB4X4 || CONFIG_EXT_PARTITION
 static const uint16_t orders_16x8[128] = {
   0,  2,  8,  10, 32,  34,  40,  42,  1,  3,  9,  11, 33,  35,  41,  43,
@@ -387,8 +396,10 @@ static const uint16_t *const orders[BLOCK_SIZES_ALL] = {
   orders_64x128,  orders_128x64,  orders_128x128,
   // 4x16,        16x4,           8x32
   orders_4x16,    orders_16x4,    orders_8x32,
-  // 32x8
-  orders_32x8
+  // 32x8,        16x64,          64x16
+  orders_32x8,    orders_16x64,   orders_64x16,
+  // 32x128,      128x32
+  orders_32x128,  orders_128x32
 };
 /* clang-format on */
 #else
@@ -417,13 +428,13 @@ static const uint16_t *const orders[BLOCK_SIZES_ALL] = {
   orders_64x128,  orders_128x64,  orders_128x128,
   // 4x16,        16x4,           8x32
   orders_8x32,    orders_32x8,    orders_16x64,
-  // 32x8
-  orders_64x16
+  // 32x8,        16x64,          64x16
+  orders_64x16,   orders_32x128,  orders_128x32
 };
 /* clang-format on */
 #endif  // CONFIG_EXT_PARTITION
 
-#if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
 static const uint16_t orders_verta_64x64[4] = {
   0, 2, 1, 2,
 };
@@ -511,11 +522,11 @@ static const uint16_t *const orders_verta[BLOCK_SIZES] = {
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
-static int has_top_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
-                         int top_available, int right_available,
-#if CONFIG_EXT_PARTITION_TYPES
+static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
+                         int mi_col, int top_available, int right_available,
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
                          PARTITION_TYPE partition,
-#endif
+#endif  // CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
                          TX_SIZE txsz, int row_off, int col_off, int ss_x) {
   if (!top_available || !right_available) return 0;
 
@@ -551,8 +562,9 @@ static int has_top_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
 
     const int bw_in_mi_log2 = mi_width_log2_lookup[bsize];
     const int bh_in_mi_log2 = mi_height_log2_lookup[bsize];
-    const int blk_row_in_sb = (mi_row & MAX_MIB_MASK) >> bh_in_mi_log2;
-    const int blk_col_in_sb = (mi_col & MAX_MIB_MASK) >> bw_in_mi_log2;
+    const int sb_mi_size = mi_size_high[cm->sb_size];
+    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
+    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
 
     // Top row of superblock: so top-right pixels are in the top and/or
     // top-right superblocks, both of which are already available.
@@ -560,12 +572,12 @@ static int has_top_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
 
     // Rightmost column of superblock (and not the top row): so top-right pixels
     // fall in the right superblock, which is not available yet.
-    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= MAX_MIB_SIZE) return 0;
+    if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) return 0;
 
     // General case (neither top row nor rightmost column): check if the
     // top-right block is coded before the current block.
     const uint16_t *const order =
-#if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
         (partition == PARTITION_VERT_A) ? orders_verta[bsize] :
 #endif  // CONFIG_EXT_PARTITION_TYPES
                                         orders[bsize];
@@ -581,8 +593,8 @@ static int has_top_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
   }
 }
 
-static int has_bottom_left(BLOCK_SIZE bsize, int mi_row, int mi_col,
-                           int bottom_available, int left_available,
+static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
+                           int mi_col, int bottom_available, int left_available,
                            TX_SIZE txsz, int row_off, int col_off, int ss_y) {
   if (!bottom_available || !left_available) return 0;
 
@@ -604,8 +616,9 @@ static int has_bottom_left(BLOCK_SIZE bsize, int mi_row, int mi_col,
 
     const int bw_in_mi_log2 = mi_width_log2_lookup[bsize];
     const int bh_in_mi_log2 = mi_height_log2_lookup[bsize];
-    const int blk_row_in_sb = (mi_row & MAX_MIB_MASK) >> bh_in_mi_log2;
-    const int blk_col_in_sb = (mi_col & MAX_MIB_MASK) >> bw_in_mi_log2;
+    const int sb_mi_size = mi_size_high[cm->sb_size];
+    const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
+    const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
 
     // Leftmost column of superblock: so bottom-left pixels maybe in the left
     // and/or bottom-left superblocks. But only the left superblock is
@@ -617,13 +630,13 @@ static int has_bottom_left(BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     ss_y;
       const int row_off_in_sb = blk_start_row_off + row_off;
       const int sb_height_unit =
-          MAX_MIB_SIZE << (MI_SIZE_LOG2 - tx_size_wide_log2[0]) >> ss_y;
+          sb_mi_size << (MI_SIZE_LOG2 - tx_size_wide_log2[0]) >> ss_y;
       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
     }
 
     // Bottom row of superblock (and not the leftmost column): so bottom-left
     // pixels fall in the bottom superblock, which is not available yet.
-    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= MAX_MIB_SIZE) return 0;
+    if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
 
     // General case (neither leftmost column nor bottom row): check if the
     // bottom-left block is coded before the current block.
@@ -659,6 +672,17 @@ static void av1_init_intra_predictors_internal(void) {
   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
 #endif  // CONFIG_EXT_INTRA
 
+#if CONFIG_TX64X64
+#define INIT_RECTANGULAR(p, type)             \
+  p[TX_4X8] = aom_##type##_predictor_4x8;     \
+  p[TX_8X4] = aom_##type##_predictor_8x4;     \
+  p[TX_8X16] = aom_##type##_predictor_8x16;   \
+  p[TX_16X8] = aom_##type##_predictor_16x8;   \
+  p[TX_16X32] = aom_##type##_predictor_16x32; \
+  p[TX_32X16] = aom_##type##_predictor_32x16; \
+  p[TX_32X64] = aom_##type##_predictor_32x64; \
+  p[TX_64X32] = aom_##type##_predictor_64x32;
+#else
 #define INIT_RECTANGULAR(p, type)             \
   p[TX_4X8] = aom_##type##_predictor_4x8;     \
   p[TX_8X4] = aom_##type##_predictor_8x4;     \
@@ -666,6 +690,7 @@ static void av1_init_intra_predictors_internal(void) {
   p[TX_16X8] = aom_##type##_predictor_16x8;   \
   p[TX_16X32] = aom_##type##_predictor_16x32; \
   p[TX_32X16] = aom_##type##_predictor_32x16;
+#endif  // CONFIG_TX64X64
 
 #if CONFIG_TX64X64
 #define INIT_NO_4X4(p, type)                  \
@@ -702,16 +727,12 @@ static void av1_init_intra_predictors_internal(void) {
   INIT_ALL_SIZES(pred[D135_PRED], d135);
   INIT_ALL_SIZES(pred[D153_PRED], d153);
 
-#if CONFIG_ALT_INTRA
   INIT_ALL_SIZES(pred[TM_PRED], paeth);
   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
 #if CONFIG_SMOOTH_HV
   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
 #endif  // CONFIG_SMOOTH_HV
-#else
-  INIT_ALL_SIZES(pred[TM_PRED], tm);
-#endif  // CONFIG_ALT_INTRA
 
   INIT_ALL_SIZES(dc_pred[0][0], dc_128);
   INIT_ALL_SIZES(dc_pred[0][1], dc_top);
@@ -728,16 +749,12 @@ static void av1_init_intra_predictors_internal(void) {
   INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
   INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
 
-#if CONFIG_ALT_INTRA
   INIT_ALL_SIZES(pred_high[TM_PRED], highbd_paeth);
   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
 #if CONFIG_SMOOTH_HV
   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
 #endif  // CONFIG_SMOOTH_HV
-#else
-  INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm);
-#endif  // CONFIG_ALT_INTRA
 
   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
@@ -797,67 +814,6 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
   assert(dy == 1);
   assert(dx > 0);
 
-#if CONFIG_INTRA_INTERP
-  if (filter_type != INTRA_FILTER_LINEAR) {
-    const int pad_size = SUBPEL_TAPS >> 1;
-    int len;
-    DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]);
-    DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]);
-    uint8_t flags[SUBPEL_SHIFTS];
-
-    memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
-    memset(src, above[0], pad_size * sizeof(above[0]));
-    memcpy(src + pad_size, above, (bw + bh) * sizeof(above[0]));
-    memset(src + pad_size + bw + bh, above[bw + bh - 1],
-           pad_size * sizeof(above[0]));
-    flags[0] = 1;
-    x = dx;
-    for (r = 0; r < bh; ++r, dst += stride, x += dx) {
-      base = x >> 8;
-      shift = x & 0xFF;
-      shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
-      if (shift == SUBPEL_SHIFTS) {
-        base += 1;
-        shift = 0;
-      }
-      len = AOMMIN(bw, bw + bh - 1 - base);
-      if (len <= 0) {
-        int i;
-        for (i = r; i < bh; ++i) {
-          memset(dst, above[bw + bh - 1], bw * sizeof(dst[0]));
-          dst += stride;
-        }
-        return;
-      }
-
-      if (len <= (bw >> 1) && !flags[shift]) {
-        base = x >> 8;
-        shift = x & 0xFF;
-        for (c = 0; c < len; ++c) {
-          val = intra_subpel_interp(base, shift, above, 0, bw + bh - 1,
-                                    filter_type);
-          dst[c] = clip_pixel(val);
-          ++base;
-        }
-      } else {
-        if (!flags[shift]) {
-          const int16_t *filter = av1_intra_filter_kernels[filter_type][shift];
-          aom_convolve8_horiz(src + pad_size, bw + bh, buf[shift], bw + bh,
-                              filter, 16, NULL, 16, bw + bh,
-                              bw + bh < 16 ? 2 : 1);
-          flags[shift] = 1;
-        }
-        memcpy(dst, shift == 0 ? src + pad_size + base : &buf[shift][base],
-               len * sizeof(dst[0]));
-      }
-
-      if (len < bw)
-        memset(dst + len, above[bw + bh - 1], (bw - len) * sizeof(dst[0]));
-    }
-    return;
-  }
-#endif  // CONFIG_INTRA_INTERP
-
 #if !CONFIG_INTRA_EDGE_UPSAMPLE
   const int upsample_above = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
@@ -879,8 +835,13 @@ static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
 
     for (c = 0; c < bw; ++c, base += base_inc) {
       if (base < max_base_x) {
+#if CONFIG_INTRA_INTERP
+        val = intra_subpel_interp(base, shift, above, 0, bw + bh - 1,
+                                  filter_type);
+#else   // CONFIG_INTRA_INTERP
         val = above[base] * (256 - shift) + above[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif  // CONFIG_INTRA_INTERP
         dst[c] = clip_pixel(val);
       } else {
         dst[c] = above[max_base_x];
@@ -960,77 +921,6 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
   assert(dx == 1);
   assert(dy > 0);
 
-#if CONFIG_INTRA_INTERP
-  if (filter_type != INTRA_FILTER_LINEAR) {
-    const int pad_size = SUBPEL_TAPS >> 1;
-    int len, i;
-    DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]);
-    DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]);
-    uint8_t flags[SUBPEL_SHIFTS];
-
-    memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
-    for (i = 0; i < pad_size; ++i) src[4 * i] = left[0];
-    for (i = 0; i < bw + bh; ++i) src[4 * (i + pad_size)] = left[i];
-    for (i = 0; i < pad_size; ++i)
-      src[4 * (i + bw + bh + pad_size)] = left[bw + bh - 1];
-    flags[0] = 1;
-    y = dy;
-    for (c = 0; c < bw; ++c, y += dy) {
-      base = y >> 8;
-      shift = y & 0xFF;
-      shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
-      if (shift == SUBPEL_SHIFTS) {
-        base += 1;
-        shift = 0;
-      }
-      len = AOMMIN(bh, bw + bh - 1 - base);
-
-      if (len <= 0) {
-        for (r = 0; r < bh; ++r) {
-          dst[r * stride + c] = left[bw + bh - 1];
-        }
-        continue;
-      }
-
-      if (len <= (bh >> 1) && !flags[shift]) {
-        base = y >> 8;
-        shift = y & 0xFF;
-        for (r = 0; r < len; ++r) {
-          val = intra_subpel_interp(base, shift, left, 0, bw + bh - 1,
-                                    filter_type);
-          dst[r * stride + c] = clip_pixel(val);
-          ++base;
-        }
-      } else {
-        if (!flags[shift]) {
-          const int16_t *filter = av1_intra_filter_kernels[filter_type][shift];
-          aom_convolve8_vert(src + 4 * pad_size, 4, buf[0] + 4 * shift,
-                             4 * SUBPEL_SHIFTS, NULL, 16, filter, 16,
-                             bw + bh < 16 ? 4 : 4, bw + bh);
-          flags[shift] = 1;
-        }
-
-        if (shift == 0) {
-          for (r = 0; r < len; ++r) {
-            dst[r * stride + c] = left[r + base];
-          }
-        } else {
-          for (r = 0; r < len; ++r) {
-            dst[r * stride + c] = buf[r + base][4 * shift];
-          }
-        }
-      }
-
-      if (len < bh) {
-        for (r = len; r < bh; ++r) {
-          dst[r * stride + c] = left[bw + bh - 1];
-        }
-      }
-    }
-    return;
-  }
-#endif  // CONFIG_INTRA_INTERP
-
 #if !CONFIG_INTRA_EDGE_UPSAMPLE
   const int upsample_left = 0;
 #endif  // !CONFIG_INTRA_EDGE_UPSAMPLE
@@ -1044,8 +934,13 @@ static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
 
     for (r = 0; r < bh; ++r, base += base_inc) {
       if (base < max_base_y) {
+#if CONFIG_INTRA_INTERP
+        val =
+            intra_subpel_interp(base, shift, left, 0, bw + bh - 1, filter_type);
+#else   // CONFIG_INTRA_INTERP
         val = left[base] * (256 - shift) + left[base + 1] * shift;
         val = ROUND_POWER_OF_TWO(val, 8);
+#endif  // CONFIG_INTRA_INTERP
         dst[r * stride + c] = clip_pixel(val);
       } else {
         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
@@ -2324,7 +2219,7 @@ static int intra_edge_filter_strength(int bsz, int delta) {
   return strength;
 }
 
-static void filter_intra_edge(uint8_t *p, int sz, int strength) {
+void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
   if (!strength) return;
 
   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
@@ -2348,7 +2243,7 @@ static void filter_intra_edge(uint8_t *p, int sz, int strength) {
 }
 
 #if CONFIG_HIGHBITDEPTH
-static void filter_intra_edge_high(uint16_t *p, int sz, int strength) {
+void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
   if (!strength) return;
 
   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
@@ -2378,7 +2273,7 @@ static int use_intra_edge_upsample(int bsz, int delta) {
   return (bsz == 4 && d > 0 && d < 56);
 }
 
-static void upsample_intra_edge(uint8_t *p, int sz) {
+void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
   // interpolate half-sample positions
   assert(sz <= MAX_UPSAMPLE_SZ);
 
@@ -2402,7 +2297,7 @@ static void upsample_intra_edge(uint8_t *p, int sz) {
 }
 
 #if CONFIG_HIGHBITDEPTH
-static void upsample_intra_edge_high(uint16_t *p, int sz, int bd) {
+void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
   // interpolate half-sample positions
   assert(sz <= MAX_UPSAMPLE_SZ);
 
@@ -2438,17 +2333,15 @@ static void build_intra_predictors_high(
   int i;
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-  DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 16]);
-  DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 16]);
+  DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
   uint16_t *const above_row = above_data + 16;
   uint16_t *const left_col = left_data + 16;
   const int txwpx = tx_size_wide[tx_size];
   const int txhpx = tx_size_high[tx_size];
-#if !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && \
-      (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#if !INTRA_USES_RECT_TRANSFORMS
   assert(txwpx == txhpx);
-#endif  // !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX &&
-        // (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#endif  // !INTRA_USES_RECT_TRANSFORMS
   int need_left = extend_modes[mode] & NEED_LEFT;
   int need_above = extend_modes[mode] & NEED_ABOVE;
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -2632,25 +2525,25 @@ static void build_intra_predictors_high(
       if (need_above && n_top_px > 0) {
         const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
         const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
-        filter_intra_edge_high(above_row - ab_le, n_px, strength);
+        av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
       }
       if (need_left && n_left_px > 0) {
         const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
         const int n_px =
             n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
-        filter_intra_edge_high(left_col - ab_le, n_px, strength);
+        av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
       }
     }
 #if CONFIG_INTRA_EDGE_UPSAMPLE
     const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
-    if (upsample_above) {
+    if (need_above && upsample_above) {
       const int n_px = txwpx + (need_right ? txhpx : 0);
-      upsample_intra_edge_high(above_row, n_px, xd->bd);
+      av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
     }
     const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
-    if (upsample_left) {
+    if (need_left && upsample_left) {
       const int n_px = txhpx + (need_bottom ? txwpx : 0);
-      upsample_intra_edge_high(left_col, n_px, xd->bd);
+      av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
     }
 #endif  // CONFIG_INTRA_EDGE_UPSAMPLE
 #endif  // CONFIG_INTRA_EDGE
@@ -2684,17 +2577,15 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
                                    int plane) {
   int i;
   const uint8_t *above_ref = ref - ref_stride;
-  DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 16]);
-  DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 16]);
+  DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
   uint8_t *const above_row = above_data + 16;
   uint8_t *const left_col = left_data + 16;
   const int txwpx = tx_size_wide[tx_size];
   const int txhpx = tx_size_high[tx_size];
-#if !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && \
-      (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#if !INTRA_USES_RECT_TRANSFORMS
   assert(txwpx == txhpx);
-#endif  // !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX &&
-        // (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#endif  // !INTRA_USES_RECT_TRANSFORMS
   int need_left = extend_modes[mode] & NEED_LEFT;
   int need_above = extend_modes[mode] & NEED_ABOVE;
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -2876,25 +2767,25 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
       if (need_above && n_top_px > 0) {
         const int strength = intra_edge_filter_strength(txwpx, p_angle - 90);
         const int n_px = n_top_px + ab_le + (need_right ? n_topright_px : 0);
-        filter_intra_edge(above_row - ab_le, n_px, strength);
+        av1_filter_intra_edge(above_row - ab_le, n_px, strength);
       }
       if (need_left && n_left_px > 0) {
         const int strength = intra_edge_filter_strength(txhpx, p_angle - 180);
         const int n_px =
             n_left_px + ab_le + (need_bottom ? n_bottomleft_px : 0);
-        filter_intra_edge(left_col - ab_le, n_px, strength);
+        av1_filter_intra_edge(left_col - ab_le, n_px, strength);
       }
     }
 #if CONFIG_INTRA_EDGE_UPSAMPLE
     const int upsample_above = use_intra_edge_upsample(txwpx, p_angle - 90);
-    if (upsample_above) {
+    if (need_above && upsample_above) {
       const int n_px = txwpx + (need_right ? txhpx : 0);
-      upsample_intra_edge(above_row, n_px);
+      av1_upsample_intra_edge(above_row, n_px);
     }
     const int upsample_left = use_intra_edge_upsample(txhpx, p_angle - 180);
-    if (upsample_left) {
+    if (need_left && upsample_left) {
       const int n_px = txhpx + (need_bottom ? txwpx : 0);
-      upsample_intra_edge(left_col, n_px);
+      av1_upsample_intra_edge(left_col, n_px);
     }
 #endif  // CONFIG_INTRA_EDGE_UPSAMPLE
 #endif  // CONFIG_INTRA_EDGE
@@ -2912,22 +2803,15 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
 
   // predict
   if (mode == DC_PRED) {
-#if CONFIG_CFL
-    // CFL predict its own DC_PRED for Chromatic planes
-    if (plane == AOM_PLANE_Y) {
-#endif
-      dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
-                                                    left_col);
-#if CONFIG_CFL
-    }
-#endif
-
+    dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
+                                                  left_col);
   } else {
     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
   }
 }
 
-static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
+static void predict_intra_block_helper(const AV1_COMMON *cm,
+                                       const MACROBLOCKD *xd, int wpx, int hpx,
                                        TX_SIZE tx_size, PREDICTION_MODE mode,
                                        const uint8_t *ref, int ref_stride,
                                        uint8_t *dst, int dst_stride,
@@ -2951,11 +2835,9 @@ static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
   const int txwpx = tx_size_wide[tx_size];
   const int txhpx = tx_size_high[tx_size];
-#if !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && \
-      (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#if !INTRA_USES_RECT_TRANSFORMS
   assert(txwpx == txhpx);
-#endif  // !(CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX &&
-        // (CONFIG_VAR_TX || CONFIG_EXT_TX))
+#endif  // !INTRA_USES_RECT_TRANSFORMS
 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 && !CONFIG_CHROMA_SUB8X8
   const int xr_chr_offset = (pd->subsampling_x && bsize < BLOCK_8X8) ? 2 : 0;
   const int yd_chr_offset = (pd->subsampling_y && bsize < BLOCK_8X8) ? 2 : 0;
@@ -2976,7 +2858,7 @@ static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
                                         (MI_SIZE_LOG2 - tx_size_wide_log2[0])) <
                               xd->tile.mi_col_end;
   const int bottom_available = (yd > 0);
-#if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
   const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition;
 #endif
 
@@ -2986,15 +2868,14 @@ static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
 #endif
 
   const int have_top_right =
-      has_top_right(bsize, mi_row, mi_col, have_top, right_available,
-#if CONFIG_EXT_PARTITION_TYPES
+      has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available,
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
                     partition,
-#endif
+#endif  // CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
                     tx_size, row_off, col_off, pd->subsampling_x);
   const int have_bottom_left =
-      has_bottom_left(bsize, mi_row, mi_col, bottom_available, have_left,
+      has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left,
                       tx_size, row_off, col_off, pd->subsampling_y);
-#if CONFIG_PALETTE
   if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
     const int stride = wpx;
     int r, c;
@@ -3023,7 +2904,6 @@ static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
 #endif  // CONFIG_HIGHBITDEPTH
     return;
   }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -3043,8 +2923,9 @@ static void predict_intra_block_helper(const MACROBLOCKD *xd, int wpx, int hpx,
                          have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
 }
 
-void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
-                                    int blk_col, int blk_row, TX_SIZE tx_size) {
+void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                    int plane, int block_idx, int blk_col,
+                                    int blk_row, TX_SIZE tx_size) {
   const MODE_INFO *mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -3057,213 +2938,262 @@ void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
                                    ? get_y_mode(mi, block_raster_idx)
                                    : get_uv_mode(mbmi->uv_mode);
 #if CONFIG_CFL
-  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_DC_PRED) {
+  if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
     if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
       // Avoid computing the CfL parameters twice, if they have already been
       // computed in cfl_rd_pick_alpha.
       if (!xd->cfl->are_parameters_computed)
         cfl_compute_parameters(xd, tx_size);
     }
-
-    cfl_predict_block(xd, dst, pd->dst.stride, blk_row, blk_col, tx_size,
-                      plane);
-
+    cfl_predict_block(xd, dst, dst_stride, blk_row, blk_col, tx_size, plane);
     return;
   }
 #endif
 
-  av1_predict_intra_block(xd, pd->width, pd->height, txsize_to_bsize[tx_size],
-                          mode, dst, dst_stride, dst, dst_stride, blk_col,
-                          blk_row, plane);
+  av1_predict_intra_block(cm, xd, pd->width, pd->height,
+                          txsize_to_bsize[tx_size], mode, dst, dst_stride, dst,
+                          dst_stride, blk_col, blk_row, plane);
 }
 
-void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
-                             BLOCK_SIZE bsize, PREDICTION_MODE mode,
-                             const uint8_t *ref, int ref_stride, uint8_t *dst,
-                             int dst_stride, int col_off, int row_off,
-                             int plane) {
-  const int block_width = block_size_wide[bsize];
-  const int block_height = block_size_high[bsize];
-#if CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX)
-  const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
-  assert(tx_size < TX_SIZES_ALL);
-#else
-  const TX_SIZE tx_size = max_txsize_lookup[bsize];
-  assert(tx_size < TX_SIZES);
-#endif  // CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX ||
-        // CONFIG_EXT_TX)
+#if INTRA_USES_EXT_TRANSFORMS
+// Copy the given row of dst into the equivalent row of ref, saving
+// the overwritten data to tmp. Returns zero if no copy happened (so
+// no restore is needed)
+//
+// Note that ref_row and dst_row follow the usual hibd convention
+// where you convert to a uint16_t* with CONVERT_TO_SHORTPTR(). tmp
+// does not follow that convention: it's a genuine pointer which is
+// correctly aligned and sized for either 8 or 16 bit data.
+//
+// matching_strides is a boolean flag which should be nonzero if ref
+// and dst have the same stride.
+static int overwrite_ref_row(int matching_strides, int buf_flags,
+                             int block_width, const uint8_t *dst_row,
+                             uint8_t *ref_row, uint8_t *tmp_row) {
+  if (ref_row == dst_row && matching_strides) return 0;
+
+  int row_bytes = block_width;
 
-  if (block_width == block_height) {
-    predict_intra_block_helper(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
-                               dst, dst_stride, col_off, row_off, plane);
-  } else {
-#if (CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX)) || (CONFIG_EXT_INTER)
-    assert((block_width == wpx && block_height == hpx) ||
-           (block_width == (wpx >> 1) && block_height == hpx) ||
-           (block_width == wpx && block_height == (hpx >> 1)));
 #if CONFIG_HIGHBITDEPTH
-    uint16_t tmp16[MAX_SB_SIZE];
-#endif  // CONFIG_HIGHBITDEPTH
-    uint8_t tmp[MAX_SB_SIZE];
-
-    if (block_width < block_height) {
-      assert(block_height == (block_width << 1));
-      // Predict the top square sub-block.
-      predict_intra_block_helper(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
-                                 dst, dst_stride, col_off, row_off, plane);
-#if CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX)
-      if (block_width == tx_size_wide[tx_size] &&
-          block_height == tx_size_high[tx_size]) {  // Most common case.
-        return;                                     // We are done.
-      } else {
-        // Can only happen for large rectangular block sizes as such large
-        // transform sizes aren't available.
-#if CONFIG_EXT_PARTITION
-        assert(bsize == BLOCK_32X64 || bsize == BLOCK_64X128);
-#else
-        assert(bsize == BLOCK_32X64);
-#endif  // CONFIG_EXT_PARTITION
-#if CONFIG_TX64X64
-        assert(tx_size == TX_32X32 || tx_size == TX64X64);
+  if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
+    row_bytes *= 2;
+    ref_row = (uint8_t *)CONVERT_TO_SHORTPTR(ref_row);
+    dst_row = (const uint8_t *)CONVERT_TO_SHORTPTR(dst_row);
+  }
 #else
-        assert(tx_size == TX_32X32);
-#endif  // CONFIG_TX64X64
-        // In this case, we continue to the bottom square sub-block.
-      }
-#endif  // CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX ||
-      // CONFIG_EXT_TX)
-      {
-        const int half_block_height = block_height >> 1;
-        const int half_block_height_unit =
-            half_block_height >> tx_size_wide_log2[0];
-        // Cast away const to modify 'ref' temporarily; will be restored later.
-        uint8_t *src_2 = (uint8_t *)ref + half_block_height * ref_stride;
-        uint8_t *dst_2 = dst + half_block_height * dst_stride;
-        const int row_off_2 = row_off + half_block_height_unit;
-        // Save the last row of top square sub-block as 'above' row for bottom
-        // square sub-block.
-        if (src_2 != dst_2 || ref_stride != dst_stride) {
-#if CONFIG_HIGHBITDEPTH
-          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-            uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
-            memcpy(tmp16, src_2_16 - ref_stride,
-                   block_width * sizeof(*src_2_16));
-            memcpy(src_2_16 - ref_stride, dst_2_16 - dst_stride,
-                   block_width * sizeof(*src_2_16));
-          } else {
+  (void)buf_flags;
 #endif  // CONFIG_HIGHBITDEPTH
-            memcpy(tmp, src_2 - ref_stride, block_width * sizeof(*src_2));
-            memcpy(src_2 - ref_stride, dst_2 - dst_stride,
-                   block_width * sizeof(*src_2));
+
+  memcpy(tmp_row, ref_row, row_bytes);
+  memcpy(ref_row, dst_row, row_bytes);
+  return 1;
+}
+
+static void restore_ref_row(int buf_flags, int block_width,
+                            const uint8_t *tmp_row, uint8_t *ref_row) {
+  int row_bytes = block_width;
 #if CONFIG_HIGHBITDEPTH
-          }
+  if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
+    row_bytes *= 2;
+    ref_row = (uint8_t *)CONVERT_TO_SHORTPTR(ref_row);
+  }
+#else
+  (void)buf_flags;
 #endif  // CONFIG_HIGHBITDEPTH
-        }
-        // Predict the bottom square sub-block.
-        predict_intra_block_helper(xd, wpx, hpx, tx_size, mode, src_2,
-                                   ref_stride, dst_2, dst_stride, col_off,
-                                   row_off_2, plane);
-        // Restore the last row of top square sub-block.
-        if (src_2 != dst_2 || ref_stride != dst_stride) {
+
+  memcpy(ref_row, tmp_row, row_bytes);
+}
+
+// The column equivalent of overwrite_ref_row. ref_row and dst_row
+// point at the relevant column of the first row of the block.
+static int overwrite_ref_col(int buf_flags, int block_height,
+                             const uint8_t *dst_row, int dst_stride,
+                             uint8_t *ref_row, int ref_stride,
+                             uint8_t *tmp_row) {
+  if (ref_row == dst_row && ref_stride == dst_stride) return 0;
+
 #if CONFIG_HIGHBITDEPTH
-          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-            uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            memcpy(src_2_16 - ref_stride, tmp16,
-                   block_width * sizeof(*src_2_16));
-          } else {
+  if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
+    uint16_t *tmp_16 = (uint16_t *)tmp_row;
+    uint16_t *ref_16 = CONVERT_TO_SHORTPTR(ref_row);
+    const uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst_row);
+
+    for (int i = 0; i < block_height; ++i) {
+      tmp_16[i] = ref_16[i * ref_stride];
+      ref_16[i * ref_stride] = dst_16[i * dst_stride];
+    }
+  } else {
 #endif  // CONFIG_HIGHBITDEPTH
-            memcpy(src_2 - ref_stride, tmp, block_width * sizeof(*src_2));
+    for (int i = 0; i < block_height; ++i) {
+      tmp_row[i] = ref_row[i * ref_stride];
+      ref_row[i * ref_stride] = dst_row[i * dst_stride];
+    }
 #if CONFIG_HIGHBITDEPTH
-          }
-#endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
-    } else {  // block_width > block_height
-      assert(block_width == (block_height << 1));
-      // Predict the left square sub-block
-      predict_intra_block_helper(xd, wpx, hpx, tx_size, mode, ref, ref_stride,
-                                 dst, dst_stride, col_off, row_off, plane);
-#if CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX)
-      if (block_width == tx_size_wide[tx_size] &&
-          block_height == tx_size_high[tx_size]) {  // Most common case.
-        return;                                     // We are done.
-      } else {
-        // Can only happen for large rectangular block sizes as such large
-        // transform sizes aren't available.
-#if CONFIG_EXT_PARTITION
-        assert(bsize == BLOCK_64X32 || bsize == BLOCK_128X64);
-#else
-        assert(bsize == BLOCK_64X32);
-#endif  // CONFIG_EXT_PARTITION
-#if CONFIG_TX64X64
-        assert(tx_size == TX_32X32 || tx_size == TX64X64);
+  }
 #else
-        assert(tx_size == TX_32X32);
-#endif  // CONFIG_TX64X64
-        // In this case, we continue to the right square sub-block.
-      }
-#endif  // CONFIG_RECT_INTRA_PRED && CONFIG_RECT_TX && (CONFIG_VAR_TX ||
-      // CONFIG_EXT_TX)
-      {
-        int i;
-        const int half_block_width = block_width >> 1;
-        const int half_block_width_unit =
-            half_block_width >> tx_size_wide_log2[0];
-        // Cast away const to modify 'ref' temporarily; will be restored later.
-        uint8_t *src_2 = (uint8_t *)ref + half_block_width;
-        uint8_t *dst_2 = dst + half_block_width;
-        const int col_off_2 = col_off + half_block_width_unit;
-        // Save the last column of left square sub-block as 'left' column for
-        // right square sub-block.
-        const int save_src = src_2 != dst_2 || ref_stride != dst_stride;
-        if (save_src) {
-#if CONFIG_HIGHBITDEPTH
-          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-            uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            uint16_t *dst_2_16 = CONVERT_TO_SHORTPTR(dst_2);
-            for (i = 0; i < block_height; ++i) {
-              tmp16[i] = src_2_16[i * ref_stride - 1];
-              src_2_16[i * ref_stride - 1] = dst_2_16[i * dst_stride - 1];
-            }
-          } else {
+  (void)buf_flags;
 #endif  // CONFIG_HIGHBITDEPTH
-            for (i = 0; i < block_height; ++i) {
-              tmp[i] = src_2[i * ref_stride - 1];
-              src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
-            }
+  return 1;
+}
+
+static void restore_ref_col(int buf_flags, int block_height,
+                            const uint8_t *tmp_row, uint8_t *ref_row,
+                            int ref_stride) {
 #if CONFIG_HIGHBITDEPTH
-          }
+  if (buf_flags & YV12_FLAG_HIGHBITDEPTH) {
+    const uint16_t *tmp_16 = (const uint16_t *)tmp_row;
+    uint16_t *ref_16 = CONVERT_TO_SHORTPTR(ref_row);
+
+    for (int i = 0; i < block_height; ++i) {
+      ref_16[i * ref_stride] = tmp_16[i];
+    }
+  } else {
 #endif  // CONFIG_HIGHBITDEPTH
-        }
-        // Predict the right square sub-block.
-        predict_intra_block_helper(xd, wpx, hpx, tx_size, mode, src_2,
-                                   ref_stride, dst_2, dst_stride, col_off_2,
-                                   row_off, plane);
-        // Restore the last column of left square sub-block.
-        if (save_src) {
+    for (int i = 0; i < block_height; ++i) {
+      ref_row[i * ref_stride] = tmp_row[i];
+    }
 #if CONFIG_HIGHBITDEPTH
-          if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-            uint16_t *src_2_16 = CONVERT_TO_SHORTPTR(src_2);
-            for (i = 0; i < block_height; ++i) {
-              src_2_16[i * ref_stride - 1] = tmp16[i];
-            }
-          } else {
+  }
+#else
+  (void)buf_flags;
 #endif  // CONFIG_HIGHBITDEPTH
-            for (i = 0; i < block_height; ++i) {
-              src_2[i * ref_stride - 1] = tmp[i];
-            }
+}
+#endif  // #if INTRA_USES_EXT_TRANSFORMS
+
+void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                             int wpx, int hpx, BLOCK_SIZE bsize,
+                             PREDICTION_MODE mode, const uint8_t *ref,
+                             int ref_stride, uint8_t *dst, int dst_stride,
+                             int col_off, int row_off, int plane) {
+  const int block_width = block_size_wide[bsize];
+  const int block_height = block_size_high[bsize];
+#if INTRA_USES_RECT_TRANSFORMS
+  const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
+  assert(tx_size < TX_SIZES_ALL);
+#else
+  const TX_SIZE tx_size = max_txsize_lookup[bsize];
+  assert(tx_size < TX_SIZES);
+#endif  // INTRA_USES_RECT_TRANSFORMS
+
+  // Start by running the helper to predict either the entire block
+  // (if the block is square or the same size as tx_size) or the top
+  // or left of the block if it's tall and thin or short and wide.
+  predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, ref, ref_stride,
+                             dst, dst_stride, col_off, row_off, plane);
+
+// If we're not using extended transforms, this function should
+// always be called with a square block.
+#if !INTRA_USES_EXT_TRANSFORMS
+  assert(block_width == block_height);
+#endif  // !INTRA_USES_EXT_TRANSFORMS
+
+  // If the block is square, we're done.
+  if (block_width == block_height) return;
+
+#if INTRA_USES_EXT_TRANSFORMS
+// If we're using rectangular transforms, we might be done even
+// though the block isn't square.
+#if INTRA_USES_RECT_TRANSFORMS
+  if (block_width == tx_size_wide[tx_size] &&
+      block_height == tx_size_high[tx_size])
+    return;
+
+  // A block should only fail to have a matching transform if it's
+  // large and rectangular (such large transform sizes aren't
+  // available).
+  assert(block_width >= 32 && block_height >= 32);
+#endif  // INTRA_USES_RECT_TRANSFORMS
+
+  assert((block_width == wpx && block_height == hpx) ||
+         (block_width == (wpx >> 1) && block_height == hpx) ||
+         (block_width == wpx && block_height == (hpx >> 1)));
+
+// The tmp buffer needs to be big enough to hold MAX_SB_SIZE samples
+// from the image. If CONFIG_HIGHBITDEPTH is enabled, it also needs
+// to be big enough and correctly aligned to hold 16-bit entries.
 #if CONFIG_HIGHBITDEPTH
-          }
+  uint16_t tmp_buf[MAX_SB_SIZE];
+#else
+  uint8_t tmp_buf[MAX_SB_SIZE];
 #endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
+  uint8_t *tmp = (uint8_t *)tmp_buf;
+
+  if (block_width < block_height) {
+    // The block is tall and thin. We've already done the top part,
+    // and need to repeat the prediction down the rest of the block.
+
+    const int tx_height = tx_size_high[tx_size];
+    const int tx_height_off = tx_height >> tx_size_wide_log2[0];
+    assert(tx_height_off << tx_size_wide_log2[0] == tx_height);
+
+    int next_row_off = row_off + tx_height_off;
+    int next_row_idx = tx_height;
+
+    while (next_row_idx < block_height) {
+      const int last_row_idx = next_row_idx - 1;
+
+      // Cast away the const to make a mutable pointer to the last
+      // row of ref. This will be snapshotted and restored later.
+      uint8_t *last_ref_row = (uint8_t *)ref + last_row_idx * ref_stride;
+      uint8_t *last_dst_row = dst + last_row_idx * dst_stride;
+
+      const int needs_restore =
+          overwrite_ref_row(ref_stride == dst_stride, xd->cur_buf->flags,
+                            block_width, last_dst_row, last_ref_row, tmp);
+
+      const uint8_t *next_ref_row = ref + next_row_idx * ref_stride;
+      uint8_t *next_dst_row = dst + next_row_idx * dst_stride;
+
+      predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, next_ref_row,
+                                 ref_stride, next_dst_row, dst_stride, col_off,
+                                 next_row_off, plane);
+
+      if (needs_restore)
+        restore_ref_row(xd->cur_buf->flags, block_width, tmp, last_ref_row);
+
+      next_row_idx += tx_height;
+      next_row_off += tx_height_off;
+    }
+  } else {
+    // The block is short and wide. We've already done the left part,
+    // and need to repeat the prediction to the right.
+
+    const int tx_width = tx_size_wide[tx_size];
+    const int tx_width_off = tx_width >> tx_size_wide_log2[0];
+    assert(tx_width_off << tx_size_wide_log2[0] == tx_width);
+
+    int next_col_off = col_off + tx_width_off;
+    int next_col_idx = tx_width;
+
+    while (next_col_idx < block_width) {
+      const int last_col_idx = next_col_idx - 1;
+
+      // Cast away the const to make a mutable pointer to ref,
+      // starting at the last column written. This will be
+      // snapshotted and restored later.
+      uint8_t *last_ref_col = (uint8_t *)ref + last_col_idx;
+      uint8_t *last_dst_col = dst + last_col_idx;
+
+      const int needs_restore =
+          overwrite_ref_col(xd->cur_buf->flags, block_height, last_dst_col,
+                            dst_stride, last_ref_col, ref_stride, tmp);
+
+      const uint8_t *next_ref_col = ref + next_col_idx;
+      uint8_t *next_dst_col = dst + next_col_idx;
+
+      predict_intra_block_helper(cm, xd, wpx, hpx, tx_size, mode, next_ref_col,
+                                 ref_stride, next_dst_col, dst_stride,
+                                 next_col_off, row_off, plane);
+
+      if (needs_restore)
+        restore_ref_col(xd->cur_buf->flags, block_height, tmp, last_ref_col,
+                        ref_stride);
+
+      next_col_idx += tx_width;
+      next_col_off += tx_width_off;
     }
-#else
-    assert(0);
-#endif  // (CONFIG_RECT_TX && (CONFIG_VAR_TX || CONFIG_EXT_TX)) ||
-        // (CONFIG_EXT_INTER)
   }
+#endif  // INTRA_USES_EXT_TRANSFORMS
 }
 
 void av1_init_intra_predictors(void) {
diff --git a/third_party/aom/av1/common/reconintra.h b/third_party/aom/av1/common/reconintra.h
index 67e5706d6..42797e310 100644
--- a/third_party/aom/av1/common/reconintra.h
+++ b/third_party/aom/av1/common/reconintra.h
@@ -14,60 +14,34 @@
 
 #include "aom/aom_integer.h"
 #include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#if CONFIG_DPCM_INTRA
-static INLINE int av1_use_dpcm_intra(int plane, PREDICTION_MODE mode,
-                                     TX_TYPE tx_type,
-                                     const MB_MODE_INFO *const mbmi) {
-  (void)mbmi;
-  (void)plane;
-#if CONFIG_EXT_INTRA
-  if (mbmi->sb_type >= BLOCK_8X8 && mbmi->angle_delta[plane != 0]) return 0;
-#endif  // CONFIG_EXT_INTRA
-  return (mode == V_PRED && (tx_type == IDTX || tx_type == H_DCT)) ||
-         (mode == H_PRED && (tx_type == IDTX || tx_type == V_DCT));
-}
-#endif  // CONFIG_DPCM_INTRA
-
 void av1_init_intra_predictors(void);
-void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
-                                    int blk_col, int blk_row, TX_SIZE tx_size);
-void av1_predict_intra_block(const MACROBLOCKD *xd, int bw, int bh,
-                             BLOCK_SIZE bsize, PREDICTION_MODE mode,
-                             const uint8_t *ref, int ref_stride, uint8_t *dst,
-                             int dst_stride, int aoff, int loff, int plane);
+void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                    int plane, int block_idx, int blk_col,
+                                    int blk_row, TX_SIZE tx_size);
+void av1_predict_intra_block(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                             int bw, int bh, BLOCK_SIZE bsize,
+                             PREDICTION_MODE mode, const uint8_t *ref,
+                             int ref_stride, uint8_t *dst, int dst_stride,
+                             int aoff, int loff, int plane);
 
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
 // Mapping of interintra to intra mode for use in the intra component
 static const PREDICTION_MODE interintra_to_intra_mode[INTERINTRA_MODES] = {
-  DC_PRED, V_PRED, H_PRED,
-#if CONFIG_ALT_INTRA
-  SMOOTH_PRED
-#else
-  TM_PRED
-#endif
+  DC_PRED, V_PRED, H_PRED, SMOOTH_PRED
 };
 
 // Mapping of intra mode to the interintra mode
 static const INTERINTRA_MODE intra_to_interintra_mode[INTRA_MODES] = {
-  II_DC_PRED,     II_V_PRED,     II_H_PRED, II_V_PRED,
-#if CONFIG_ALT_INTRA
-  II_SMOOTH_PRED,
-#else
-  II_TM_PRED,
-#endif
-  II_V_PRED,      II_H_PRED,     II_H_PRED, II_V_PRED,
-#if CONFIG_ALT_INTRA
-  II_SMOOTH_PRED, II_SMOOTH_PRED
-#else
-  II_TM_PRED
-#endif
+  II_DC_PRED, II_V_PRED, II_H_PRED, II_V_PRED,      II_SMOOTH_PRED, II_V_PRED,
+  II_H_PRED,  II_H_PRED, II_V_PRED, II_SMOOTH_PRED, II_SMOOTH_PRED
 };
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 
 #if CONFIG_FILTER_INTRA
 #define FILTER_INTRA_PREC_BITS 10
@@ -97,6 +71,14 @@ static INLINE int av1_use_angle_delta(BLOCK_SIZE bsize) {
 }
 #endif  // CONFIG_EXT_INTRA
 
+#if CONFIG_INTRABC
+static INLINE int av1_allow_intrabc(BLOCK_SIZE bsize,
+                                    const AV1_COMMON *const cm) {
+  return (bsize >= BLOCK_8X8 || bsize == BLOCK_4X4) &&
+         cm->allow_screen_content_tools;
+}
+#endif  // CONFIG_INTRABC
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/resize.c b/third_party/aom/av1/common/resize.c
index 8ddca0acb..b0f303e35 100644
--- a/third_party/aom/av1/common/resize.c
+++ b/third_party/aom/av1/common/resize.c
@@ -32,7 +32,7 @@
 #define INTERP_TAPS 8
 #define SUBPEL_BITS_RS 6
 #define SUBPEL_MASK_RS ((1 << SUBPEL_BITS_RS) - 1)
-#define INTERP_PRECISION_BITS 32
+#define INTERP_PRECISION_BITS 16
 #define SUBPEL_INTERP_EXTRA_BITS (INTERP_PRECISION_BITS - SUBPEL_BITS_RS)
 #define SUBPEL_INTERP_EXTRA_OFF (1 << (SUBPEL_INTERP_EXTRA_BITS - 1))
 
@@ -40,24 +40,6 @@ typedef int16_t interp_kernel[INTERP_TAPS];
 
 // Filters for interpolation (0.5-band) - note this also filters integer pels.
 static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS_RS)] = {
-#if SUBPEL_BITS_RS == 5
-  { -3, 0, 35, 64, 35, 0, -3, 0 },    { -3, -1, 34, 64, 36, 1, -3, 0 },
-  { -3, -1, 32, 64, 38, 1, -3, 0 },   { -2, -2, 31, 63, 39, 2, -3, 0 },
-  { -2, -2, 29, 63, 41, 2, -3, 0 },   { -2, -2, 28, 63, 42, 3, -4, 0 },
-  { -2, -3, 27, 63, 43, 4, -4, 0 },   { -2, -3, 25, 62, 45, 5, -4, 0 },
-  { -2, -3, 24, 62, 46, 5, -4, 0 },   { -2, -3, 23, 61, 47, 6, -4, 0 },
-  { -2, -3, 21, 60, 49, 7, -4, 0 },   { -1, -4, 20, 60, 50, 8, -4, -1 },
-  { -1, -4, 19, 59, 51, 9, -4, -1 },  { -1, -4, 17, 58, 52, 10, -4, 0 },
-  { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 },
-  { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 },
-  { -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 },
-  { -1, -4, 9, 51, 59, 19, -4, -1 },  { -1, -4, 8, 50, 60, 20, -4, -1 },
-  { 0, -4, 7, 49, 60, 21, -3, -2 },   { 0, -4, 6, 47, 61, 23, -3, -2 },
-  { 0, -4, 5, 46, 62, 24, -3, -2 },   { 0, -4, 5, 45, 62, 25, -3, -2 },
-  { 0, -4, 4, 43, 63, 27, -3, -2 },   { 0, -4, 3, 42, 63, 28, -2, -2 },
-  { 0, -3, 2, 41, 63, 29, -2, -2 },   { 0, -3, 2, 39, 63, 31, -2, -2 },
-  { 0, -3, 1, 38, 64, 32, -1, -3 },   { 0, -3, 1, 36, 64, 34, -1, -3 },
-#elif SUBPEL_BITS_RS == 6
   { -3, 0, 35, 64, 35, 0, -3, 0 },    { -3, 0, 34, 64, 36, 0, -3, 0 },
   { -3, -1, 34, 64, 36, 1, -3, 0 },   { -3, -1, 33, 64, 37, 1, -3, 0 },
   { -3, -1, 32, 64, 38, 1, -3, 0 },   { -3, -1, 31, 64, 39, 1, -3, 0 },
@@ -90,29 +72,10 @@ static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS_RS)] = {
   { 0, -3, 2, 39, 63, 31, -1, -3 },   { 0, -3, 1, 39, 64, 31, -1, -3 },
   { 0, -3, 1, 38, 64, 32, -1, -3 },   { 0, -3, 1, 37, 64, 33, -1, -3 },
   { 0, -3, 1, 36, 64, 34, -1, -3 },   { 0, -3, 0, 36, 64, 34, 0, -3 },
-#endif  // SUBPEL_BITS_RS == 5
 };
 
 // Filters for interpolation (0.625-band) - note this also filters integer pels.
 static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS_RS)] = {
-#if SUBPEL_BITS_RS == 5
-  { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 },
-  { -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 },
-  { 0, -8, 24, 79, 41, -7, -2, 1 },  { 0, -8, 22, 78, 43, -6, -2, 1 },
-  { 0, -8, 20, 78, 45, -5, -3, 1 },  { 0, -8, 18, 77, 48, -5, -3, 1 },
-  { 0, -8, 16, 76, 50, -4, -3, 1 },  { 0, -8, 15, 75, 52, -3, -4, 1 },
-  { 0, -7, 13, 74, 54, -3, -4, 1 },  { 0, -7, 11, 73, 56, -2, -4, 1 },
-  { 0, -7, 10, 71, 58, -1, -4, 1 },  { 1, -7, 8, 70, 60, 0, -5, 1 },
-  { 1, -6, 6, 68, 62, 1, -5, 1 },    { 1, -6, 5, 67, 63, 2, -5, 1 },
-  { 1, -6, 4, 65, 65, 4, -6, 1 },    { 1, -5, 2, 63, 67, 5, -6, 1 },
-  { 1, -5, 1, 62, 68, 6, -6, 1 },    { 1, -5, 0, 60, 70, 8, -7, 1 },
-  { 1, -4, -1, 58, 71, 10, -7, 0 },  { 1, -4, -2, 56, 73, 11, -7, 0 },
-  { 1, -4, -3, 54, 74, 13, -7, 0 },  { 1, -4, -3, 52, 75, 15, -8, 0 },
-  { 1, -3, -4, 50, 76, 16, -8, 0 },  { 1, -3, -5, 48, 77, 18, -8, 0 },
-  { 1, -3, -5, 45, 78, 20, -8, 0 },  { 1, -2, -6, 43, 78, 22, -8, 0 },
-  { 1, -2, -7, 41, 79, 24, -8, 0 },  { 1, -2, -7, 39, 79, 26, -8, 0 },
-  { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 },
-#elif SUBPEL_BITS_RS == 6
   { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 31, 80, 34, -8, -1, 1 },
   { -1, -8, 30, 80, 35, -8, -1, 1 }, { -1, -8, 29, 80, 36, -7, -2, 1 },
   { -1, -8, 28, 80, 37, -7, -2, 1 }, { -1, -8, 27, 80, 38, -7, -2, 1 },
@@ -145,29 +108,10 @@ static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS_RS)] = {
   { 1, -2, -7, 39, 79, 26, -8, 0 },  { 1, -2, -7, 38, 80, 27, -8, -1 },
   { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -2, -7, 36, 80, 29, -8, -1 },
   { 1, -1, -8, 35, 80, 30, -8, -1 }, { 1, -1, -8, 34, 80, 31, -8, -1 },
-#endif  // SUBPEL_BITS_RS == 5
 };
 
 // Filters for interpolation (0.75-band) - note this also filters integer pels.
 static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS_RS)] = {
-#if SUBPEL_BITS_RS == 5
-  { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 },
-  { 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 },
-  { 2, -9, 14, 94, 37, -12, 2, 0 },  { 2, -8, 12, 93, 40, -12, 1, 0 },
-  { 2, -8, 9, 92, 43, -12, 1, 1 },   { 2, -7, 7, 91, 46, -12, 1, 0 },
-  { 2, -7, 5, 90, 49, -12, 1, 0 },   { 2, -6, 3, 88, 52, -12, 0, 1 },
-  { 2, -5, 1, 86, 55, -12, 0, 1 },   { 2, -5, -1, 84, 58, -11, 0, 1 },
-  { 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 },
-  { 1, -3, -5, 77, 67, -9, -1, 1 },  { 1, -3, -6, 75, 70, -8, -2, 1 },
-  { 1, -2, -7, 72, 72, -7, -2, 1 },  { 1, -2, -8, 70, 75, -6, -3, 1 },
-  { 1, -1, -9, 67, 77, -5, -3, 1 },  { 1, -1, -10, 64, 80, -4, -4, 2 },
-  { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 },
-  { 1, 0, -12, 55, 86, 1, -5, 2 },   { 1, 0, -12, 52, 88, 3, -6, 2 },
-  { 0, 1, -12, 49, 90, 5, -7, 2 },   { 0, 1, -12, 46, 91, 7, -7, 2 },
-  { 1, 1, -12, 43, 92, 9, -8, 2 },   { 0, 1, -12, 40, 93, 12, -8, 2 },
-  { 0, 2, -12, 37, 94, 14, -9, 2 },  { 0, 2, -12, 34, 95, 17, -10, 2 },
-  { 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 },
-#elif SUBPEL_BITS_RS == 6
   { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 24, 96, 26, -11, 2, 0 },
   { 2, -11, 22, 96, 28, -11, 2, 0 }, { 2, -10, 21, 96, 29, -12, 2, 0 },
   { 2, -10, 19, 96, 31, -12, 2, 0 }, { 2, -10, 18, 95, 32, -11, 2, 0 },
@@ -200,29 +144,10 @@ static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS_RS)] = {
   { 0, 2, -12, 34, 95, 17, -10, 2 }, { 0, 2, -11, 32, 95, 18, -10, 2 },
   { 0, 2, -12, 31, 96, 19, -10, 2 }, { 0, 2, -12, 29, 96, 21, -10, 2 },
   { 0, 2, -11, 28, 96, 22, -11, 2 }, { 0, 2, -11, 26, 96, 24, -11, 2 },
-#endif  // SUBPEL_BITS_RS == 5
 };
 
 // Filters for interpolation (0.875-band) - note this also filters integer pels.
 static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS_RS)] = {
-#if SUBPEL_BITS_RS == 5
-  { 3, -8, 13, 112, 13, -8, 3, 0 },   { 3, -7, 10, 112, 17, -9, 3, -1 },
-  { 2, -6, 7, 111, 21, -9, 3, -1 },   { 2, -5, 4, 111, 24, -10, 3, -1 },
-  { 2, -4, 1, 110, 28, -11, 3, -1 },  { 1, -3, -1, 108, 32, -12, 4, -1 },
-  { 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 },
-  { 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 },
-  { 1, 1, -11, 97, 53, -16, 4, -1 },  { 0, 1, -12, 95, 57, -16, 4, -1 },
-  { 0, 2, -13, 91, 61, -16, 4, -1 },  { 0, 2, -14, 88, 65, -16, 4, -1 },
-  { 0, 3, -15, 84, 69, -17, 4, 0 },   { 0, 3, -16, 81, 73, -16, 3, 0 },
-  { 0, 3, -16, 77, 77, -16, 3, 0 },   { 0, 3, -16, 73, 81, -16, 3, 0 },
-  { 0, 4, -17, 69, 84, -15, 3, 0 },   { -1, 4, -16, 65, 88, -14, 2, 0 },
-  { -1, 4, -16, 61, 91, -13, 2, 0 },  { -1, 4, -16, 57, 95, -12, 1, 0 },
-  { -1, 4, -16, 53, 97, -11, 1, 1 },  { -1, 4, -15, 48, 100, -9, 0, 1 },
-  { -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 },
-  { -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 },
-  { -1, 3, -11, 28, 110, 1, -4, 2 },  { -1, 3, -10, 24, 111, 4, -5, 2 },
-  { -1, 3, -9, 21, 111, 7, -6, 2 },   { -1, 3, -9, 17, 112, 10, -7, 3 },
-#elif SUBPEL_BITS_RS == 6
   { 3, -8, 13, 112, 13, -8, 3, 0 },   { 2, -7, 12, 112, 15, -8, 3, -1 },
   { 3, -7, 10, 112, 17, -9, 3, -1 },  { 2, -6, 8, 112, 19, -9, 3, -1 },
   { 2, -6, 7, 112, 21, -10, 3, -1 },  { 2, -5, 6, 111, 22, -10, 3, -1 },
@@ -255,29 +180,10 @@ static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS_RS)] = {
   { -1, 3, -10, 24, 111, 4, -5, 2 },  { -1, 3, -10, 22, 111, 6, -5, 2 },
   { -1, 3, -10, 21, 112, 7, -6, 2 },  { -1, 3, -9, 19, 112, 8, -6, 2 },
   { -1, 3, -9, 17, 112, 10, -7, 3 },  { -1, 3, -8, 15, 112, 12, -7, 2 },
-#endif  // SUBPEL_BITS_RS == 5
 };
 
 // Filters for interpolation (full-band) - no filtering for integer pixels
 static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS_RS)] = {
-#if SUBPEL_BITS_RS == 5
-  { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 1, -3, 128, 3, -1, 0, 0 },
-  { -1, 2, -6, 127, 7, -2, 1, 0 },     { -1, 3, -9, 126, 12, -4, 1, 0 },
-  { -1, 4, -12, 125, 16, -5, 1, 0 },   { -1, 4, -14, 123, 20, -6, 2, 0 },
-  { -1, 5, -15, 120, 25, -8, 2, 0 },   { -1, 5, -17, 118, 30, -9, 3, -1 },
-  { -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 },
-  { -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 },
-  { -1, 6, -21, 99, 57, -16, 5, -1 },  { -1, 6, -21, 94, 63, -17, 5, -1 },
-  { -1, 6, -20, 89, 68, -18, 5, -1 },  { -1, 6, -20, 84, 73, -19, 6, -1 },
-  { -1, 6, -20, 79, 79, -20, 6, -1 },  { -1, 6, -19, 73, 84, -20, 6, -1 },
-  { -1, 5, -18, 68, 89, -20, 6, -1 },  { -1, 5, -17, 63, 94, -21, 6, -1 },
-  { -1, 5, -16, 57, 99, -21, 6, -1 },  { -1, 4, -14, 52, 103, -21, 6, -1 },
-  { -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 },
-  { -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 },
-  { 0, 2, -8, 25, 120, -15, 5, -1 },   { 0, 2, -6, 20, 123, -14, 4, -1 },
-  { 0, 1, -5, 16, 125, -12, 4, -1 },   { 0, 1, -4, 12, 126, -9, 3, -1 },
-  { 0, 1, -2, 7, 127, -6, 2, -1 },     { 0, 0, -1, 3, 128, -3, 1, 0 },
-#elif SUBPEL_BITS_RS == 6
   { 0, 0, 0, 128, 0, 0, 0, 0 },        { 0, 0, -1, 128, 2, -1, 0, 0 },
   { 0, 1, -3, 127, 4, -2, 1, 0 },      { 0, 1, -4, 127, 6, -3, 1, 0 },
   { 0, 2, -6, 126, 8, -3, 1, 0 },      { 0, 2, -7, 125, 11, -4, 1, 0 },
@@ -310,9 +216,86 @@ static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS_RS)] = {
   { 0, 2, -5, 13, 125, -8, 2, -1 },    { 0, 1, -4, 11, 125, -7, 2, 0 },
   { 0, 1, -3, 8, 126, -6, 2, 0 },      { 0, 1, -3, 6, 127, -4, 1, 0 },
   { 0, 1, -2, 4, 127, -3, 1, 0 },      { 0, 0, -1, 2, 128, -1, 0, 0 },
-#endif  // SUBPEL_BITS_RS == 5
 };
 
+#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+#define INTERP_SIMPLE_TAPS 4
+static const int16_t filter_simple[(1
+                                    << SUBPEL_BITS_RS)][INTERP_SIMPLE_TAPS] = {
+#if INTERP_SIMPLE_TAPS == 2
+  { 128, 0 },  { 126, 2 },  { 124, 4 },  { 122, 6 },  { 120, 8 },  { 118, 10 },
+  { 116, 12 }, { 114, 14 }, { 112, 16 }, { 110, 18 }, { 108, 20 }, { 106, 22 },
+  { 104, 24 }, { 102, 26 }, { 100, 28 }, { 98, 30 },  { 96, 32 },  { 94, 34 },
+  { 92, 36 },  { 90, 38 },  { 88, 40 },  { 86, 42 },  { 84, 44 },  { 82, 46 },
+  { 80, 48 },  { 78, 50 },  { 76, 52 },  { 74, 54 },  { 72, 56 },  { 70, 58 },
+  { 68, 60 },  { 66, 62 },  { 64, 64 },  { 62, 66 },  { 60, 68 },  { 58, 70 },
+  { 56, 72 },  { 54, 74 },  { 52, 76 },  { 50, 78 },  { 48, 80 },  { 46, 82 },
+  { 44, 84 },  { 42, 86 },  { 40, 88 },  { 38, 90 },  { 36, 92 },  { 34, 94 },
+  { 32, 96 },  { 30, 98 },  { 28, 100 }, { 26, 102 }, { 24, 104 }, { 22, 106 },
+  { 20, 108 }, { 18, 110 }, { 16, 112 }, { 14, 114 }, { 12, 116 }, { 10, 118 },
+  { 8, 120 },  { 6, 122 },  { 4, 124 },  { 2, 126 },
+#elif INTERP_SIMPLE_TAPS == 4
+  { 0, 128, 0, 0 },      { -1, 128, 2, -1 },    { -2, 127, 4, -1 },
+  { -3, 126, 7, -2 },    { -4, 125, 9, -2 },    { -5, 125, 11, -3 },
+  { -6, 124, 13, -3 },   { -7, 123, 16, -4 },   { -7, 122, 18, -5 },
+  { -8, 121, 20, -5 },   { -9, 120, 23, -6 },   { -9, 118, 25, -6 },
+  { -10, 117, 28, -7 },  { -11, 116, 30, -7 },  { -11, 114, 33, -8 },
+  { -12, 113, 35, -8 },  { -12, 111, 38, -9 },  { -13, 109, 41, -9 },
+  { -13, 108, 43, -10 }, { -13, 106, 45, -10 }, { -13, 104, 48, -11 },
+  { -14, 102, 51, -11 }, { -14, 100, 53, -11 }, { -14, 98, 56, -12 },
+  { -14, 96, 58, -12 },  { -14, 94, 61, -13 },  { -15, 92, 64, -13 },
+  { -15, 90, 66, -13 },  { -15, 87, 69, -13 },  { -14, 85, 71, -14 },
+  { -14, 83, 73, -14 },  { -14, 80, 76, -14 },  { -14, 78, 78, -14 },
+  { -14, 76, 80, -14 },  { -14, 73, 83, -14 },  { -14, 71, 85, -14 },
+  { -13, 69, 87, -15 },  { -13, 66, 90, -15 },  { -13, 64, 92, -15 },
+  { -13, 61, 94, -14 },  { -12, 58, 96, -14 },  { -12, 56, 98, -14 },
+  { -11, 53, 100, -14 }, { -11, 51, 102, -14 }, { -11, 48, 104, -13 },
+  { -10, 45, 106, -13 }, { -10, 43, 108, -13 }, { -9, 41, 109, -13 },
+  { -9, 38, 111, -12 },  { -8, 35, 113, -12 },  { -8, 33, 114, -11 },
+  { -7, 30, 116, -11 },  { -7, 28, 117, -10 },  { -6, 25, 118, -9 },
+  { -6, 23, 120, -9 },   { -5, 20, 121, -8 },   { -5, 18, 122, -7 },
+  { -4, 16, 123, -7 },   { -3, 13, 124, -6 },   { -3, 11, 125, -5 },
+  { -2, 9, 125, -4 },    { -2, 7, 126, -3 },    { -1, 4, 127, -2 },
+  { -1, 2, 128, -1 },
+#elif INTERP_SIMPLE_TAPS == 6
+  { 0, 0, 128, 0, 0, 0 },      { 0, -1, 128, 2, -1, 0 },
+  { 1, -3, 127, 4, -2, 1 },    { 1, -4, 127, 6, -3, 1 },
+  { 2, -6, 126, 8, -3, 1 },    { 2, -7, 125, 11, -4, 1 },
+  { 2, -9, 125, 13, -5, 2 },   { 3, -10, 124, 15, -6, 2 },
+  { 3, -11, 123, 18, -7, 2 },  { 3, -12, 122, 20, -8, 3 },
+  { 4, -13, 121, 22, -9, 3 },  { 4, -14, 119, 25, -9, 3 },
+  { 4, -15, 118, 27, -10, 4 }, { 4, -16, 117, 30, -11, 4 },
+  { 5, -17, 116, 32, -12, 4 }, { 5, -17, 114, 35, -13, 4 },
+  { 5, -18, 112, 37, -13, 5 }, { 5, -19, 111, 40, -14, 5 },
+  { 6, -19, 109, 42, -15, 5 }, { 6, -20, 107, 45, -15, 5 },
+  { 6, -20, 105, 48, -16, 5 }, { 6, -21, 103, 51, -17, 6 },
+  { 6, -21, 101, 53, -17, 6 }, { 6, -21, 99, 56, -18, 6 },
+  { 7, -22, 97, 58, -18, 6 },  { 7, -22, 95, 61, -19, 6 },
+  { 7, -22, 93, 63, -19, 6 },  { 7, -22, 91, 66, -20, 6 },
+  { 7, -22, 88, 69, -20, 6 },  { 7, -22, 86, 71, -21, 7 },
+  { 7, -22, 83, 74, -21, 7 },  { 7, -22, 81, 76, -21, 7 },
+  { 7, -22, 79, 79, -22, 7 },  { 7, -21, 76, 81, -22, 7 },
+  { 7, -21, 74, 83, -22, 7 },  { 7, -21, 71, 86, -22, 7 },
+  { 6, -20, 69, 88, -22, 7 },  { 6, -20, 66, 91, -22, 7 },
+  { 6, -19, 63, 93, -22, 7 },  { 6, -19, 61, 95, -22, 7 },
+  { 6, -18, 58, 97, -22, 7 },  { 6, -18, 56, 99, -21, 6 },
+  { 6, -17, 53, 101, -21, 6 }, { 6, -17, 51, 103, -21, 6 },
+  { 5, -16, 48, 105, -20, 6 }, { 5, -15, 45, 107, -20, 6 },
+  { 5, -15, 42, 109, -19, 6 }, { 5, -14, 40, 111, -19, 5 },
+  { 5, -13, 37, 112, -18, 5 }, { 4, -13, 35, 114, -17, 5 },
+  { 4, -12, 32, 116, -17, 5 }, { 4, -11, 30, 117, -16, 4 },
+  { 4, -10, 27, 118, -15, 4 }, { 3, -9, 25, 119, -14, 4 },
+  { 3, -9, 22, 121, -13, 4 },  { 3, -8, 20, 122, -12, 3 },
+  { 2, -7, 18, 123, -11, 3 },  { 2, -6, 15, 124, -10, 3 },
+  { 2, -5, 13, 125, -9, 2 },   { 1, -4, 11, 125, -7, 2 },
+  { 1, -3, 8, 126, -6, 2 },    { 1, -3, 6, 127, -4, 1 },
+  { 1, -2, 4, 127, -3, 1 },    { 0, -1, 2, 128, -1, 0 },
+#else
+#error "Invalid value of INTERP_SIMPLE_TAPS"
+#endif  // INTERP_SIMPLE_TAPS == 2
+};
+#endif  // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+
 // Filters for factor of 2 downsampling.
 static const int16_t av1_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
 static const int16_t av1_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
@@ -331,33 +314,34 @@ static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
     return filteredinterp_filters500;
 }
 
-static void interpolate(const uint8_t *const input, int inlength,
-                        uint8_t *output, int outlength) {
-  const int64_t delta =
-      (((uint64_t)inlength << 32) + outlength / 2) / outlength;
-  const int64_t offset =
+static void interpolate_core(const uint8_t *const input, int inlength,
+                             uint8_t *output, int outlength,
+                             const int16_t *interp_filters, int interp_taps) {
+  const int32_t delta =
+      (((uint32_t)inlength << INTERP_PRECISION_BITS) + outlength / 2) /
+      outlength;
+  const int32_t offset =
       inlength > outlength
-          ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
+          ? (((int32_t)(inlength - outlength) << (INTERP_PRECISION_BITS - 1)) +
+             outlength / 2) /
                 outlength
-          : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
+          : -(((int32_t)(outlength - inlength) << (INTERP_PRECISION_BITS - 1)) +
+              outlength / 2) /
                 outlength;
   uint8_t *optr = output;
   int x, x1, x2, sum, k, int_pel, sub_pel;
-  int64_t y;
-
-  const interp_kernel *interp_filters =
-      choose_interp_filter(inlength, outlength);
+  int32_t y;
 
   x = 0;
   y = offset + SUBPEL_INTERP_EXTRA_OFF;
-  while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
+  while ((y >> INTERP_PRECISION_BITS) < (interp_taps / 2 - 1)) {
     x++;
     y += delta;
   }
   x1 = x;
   x = outlength - 1;
   y = delta * x + offset + SUBPEL_INTERP_EXTRA_OFF;
-  while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
+  while ((y >> INTERP_PRECISION_BITS) + (int32_t)(interp_taps / 2) >=
          inlength) {
     x--;
     y -= delta;
@@ -366,13 +350,12 @@ static void interpolate(const uint8_t *const input, int inlength,
   if (x1 > x2) {
     for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < outlength;
          ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k) {
-        const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
+      for (k = 0; k < interp_taps; ++k) {
+        const int pk = int_pel - interp_taps / 2 + 1 + k;
         sum += filter[k] * input[AOMMAX(AOMMIN(pk, inlength - 1), 0)];
       }
       *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
@@ -380,41 +363,55 @@ static void interpolate(const uint8_t *const input, int inlength,
   } else {
     // Initial part.
     for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < x1; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
-        sum += filter[k] * input[AOMMAX(int_pel - INTERP_TAPS / 2 + 1 + k, 0)];
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
       *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     }
     // Middle part.
     for (; x <= x2; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
-        sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
       *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     }
     // End part.
     for (; x < outlength; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
+      for (k = 0; k < interp_taps; ++k)
         sum += filter[k] *
-               input[AOMMIN(int_pel - INTERP_TAPS / 2 + 1 + k, inlength - 1)];
+               input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, inlength - 1)];
       *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
     }
   }
 }
 
+static void interpolate(const uint8_t *const input, int inlength,
+                        uint8_t *output, int outlength) {
+  const interp_kernel *interp_filters =
+      choose_interp_filter(inlength, outlength);
+
+  interpolate_core(input, inlength, output, outlength, &interp_filters[0][0],
+                   INTERP_TAPS);
+}
+
+#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+static void interpolate_simple(const uint8_t *const input, int inlength,
+                               uint8_t *output, int outlength) {
+  interpolate_core(input, inlength, output, outlength, &filter_simple[0][0],
+                   INTERP_SIMPLE_TAPS);
+}
+#endif  // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+
 #ifndef __clang_analyzer__
 static void down2_symeven(const uint8_t *const input, int length,
                           uint8_t *output) {
@@ -596,14 +593,15 @@ static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
   }
 }
 
-void av1_resize_plane(const uint8_t *const input, int height, int width,
-                      int in_stride, uint8_t *output, int height2, int width2,
-                      int out_stride) {
+static void resize_plane(const uint8_t *const input, int height, int width,
+                         int in_stride, uint8_t *output, int height2,
+                         int width2, int out_stride) {
   int i;
-  uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
-  uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * AOMMAX(width, height));
-  uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
-  uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
+  uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height);
+  uint8_t *tmpbuf =
+      (uint8_t *)aom_malloc(sizeof(uint8_t) * AOMMAX(width, height));
+  uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height);
+  uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2);
   if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
     goto Error;
   assert(width > 0);
@@ -620,40 +618,80 @@ void av1_resize_plane(const uint8_t *const input, int height, int width,
   }
 
 Error:
-  free(intbuf);
-  free(tmpbuf);
-  free(arrbuf);
-  free(arrbuf2);
+  aom_free(intbuf);
+  aom_free(tmpbuf);
+  aom_free(arrbuf);
+  aom_free(arrbuf2);
+}
+
+#if CONFIG_FRAME_SUPERRES
+static void upscale_normative(const uint8_t *const input, int length,
+                              uint8_t *output, int olength) {
+#if CONFIG_LOOP_RESTORATION
+  interpolate_simple(input, length, output, olength);
+#else
+  interpolate(input, length, output, olength);
+#endif  // CONFIG_LOOP_RESTORATION
 }
 
+static void upscale_normative_plane(const uint8_t *const input, int height,
+                                    int width, int in_stride, uint8_t *output,
+                                    int height2, int width2, int out_stride) {
+  int i;
+  uint8_t *intbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * width2 * height);
+  uint8_t *arrbuf = (uint8_t *)aom_malloc(sizeof(uint8_t) * height);
+  uint8_t *arrbuf2 = (uint8_t *)aom_malloc(sizeof(uint8_t) * height2);
+  if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
+  assert(width > 0);
+  assert(height > 0);
+  assert(width2 > 0);
+  assert(height2 > 0);
+  for (i = 0; i < height; ++i)
+    upscale_normative(input + in_stride * i, width, intbuf + width2 * i,
+                      width2);
+  for (i = 0; i < width2; ++i) {
+    fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+    upscale_normative(arrbuf, height, arrbuf2, height2);
+    fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
+  }
+
+Error:
+  aom_free(intbuf);
+  aom_free(arrbuf);
+  aom_free(arrbuf2);
+}
+#endif  // CONFIG_FRAME_SUPERRES
+
 #if CONFIG_HIGHBITDEPTH
-static void highbd_interpolate(const uint16_t *const input, int inlength,
-                               uint16_t *output, int outlength, int bd) {
-  const int64_t delta =
-      (((uint64_t)inlength << 32) + outlength / 2) / outlength;
-  const int64_t offset =
+static void highbd_interpolate_core(const uint16_t *const input, int inlength,
+                                    uint16_t *output, int outlength, int bd,
+                                    const int16_t *interp_filters,
+                                    int interp_taps) {
+  const int32_t delta =
+      (((uint32_t)inlength << INTERP_PRECISION_BITS) + outlength / 2) /
+      outlength;
+  const int32_t offset =
       inlength > outlength
-          ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
+          ? (((int32_t)(inlength - outlength) << (INTERP_PRECISION_BITS - 1)) +
+             outlength / 2) /
                 outlength
-          : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
+          : -(((int32_t)(outlength - inlength) << (INTERP_PRECISION_BITS - 1)) +
+              outlength / 2) /
                 outlength;
   uint16_t *optr = output;
   int x, x1, x2, sum, k, int_pel, sub_pel;
-  int64_t y;
-
-  const interp_kernel *interp_filters =
-      choose_interp_filter(inlength, outlength);
+  int32_t y;
 
   x = 0;
   y = offset + SUBPEL_INTERP_EXTRA_OFF;
-  while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
+  while ((y >> INTERP_PRECISION_BITS) < (interp_taps / 2 - 1)) {
     x++;
     y += delta;
   }
   x1 = x;
   x = outlength - 1;
   y = delta * x + offset + SUBPEL_INTERP_EXTRA_OFF;
-  while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
+  while ((y >> INTERP_PRECISION_BITS) + (int32_t)(interp_taps / 2) >=
          inlength) {
     x--;
     y -= delta;
@@ -662,13 +700,12 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
   if (x1 > x2) {
     for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < outlength;
          ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k) {
-        const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
+      for (k = 0; k < interp_taps; ++k) {
+        const int pk = int_pel - interp_taps / 2 + 1 + k;
         sum += filter[k] * input[AOMMAX(AOMMIN(pk, inlength - 1), 0)];
       }
       *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
@@ -676,41 +713,55 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
   } else {
     // Initial part.
     for (x = 0, y = offset + SUBPEL_INTERP_EXTRA_OFF; x < x1; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
-        sum += filter[k] * input[AOMMAX(int_pel - INTERP_TAPS / 2 + 1 + k, 0)];
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[AOMMAX(int_pel - interp_taps / 2 + 1 + k, 0)];
       *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     }
     // Middle part.
     for (; x <= x2; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
-        sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
+      for (k = 0; k < interp_taps; ++k)
+        sum += filter[k] * input[int_pel - interp_taps / 2 + 1 + k];
       *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     }
     // End part.
     for (; x < outlength; ++x, y += delta) {
-      const int16_t *filter;
       int_pel = y >> INTERP_PRECISION_BITS;
       sub_pel = (y >> SUBPEL_INTERP_EXTRA_BITS) & SUBPEL_MASK_RS;
-      filter = interp_filters[sub_pel];
+      const int16_t *filter = &interp_filters[sub_pel * interp_taps];
       sum = 0;
-      for (k = 0; k < INTERP_TAPS; ++k)
+      for (k = 0; k < interp_taps; ++k)
         sum += filter[k] *
-               input[AOMMIN(int_pel - INTERP_TAPS / 2 + 1 + k, inlength - 1)];
+               input[AOMMIN(int_pel - interp_taps / 2 + 1 + k, inlength - 1)];
       *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     }
   }
 }
 
+static void highbd_interpolate(const uint16_t *const input, int inlength,
+                               uint16_t *output, int outlength, int bd) {
+  const interp_kernel *interp_filters =
+      choose_interp_filter(inlength, outlength);
+
+  highbd_interpolate_core(input, inlength, output, outlength, bd,
+                          &interp_filters[0][0], INTERP_TAPS);
+}
+
+#if CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+static void highbd_interpolate_simple(const uint16_t *const input, int inlength,
+                                      uint16_t *output, int outlength, int bd) {
+  highbd_interpolate_core(input, inlength, output, outlength, bd,
+                          &filter_simple[0][0], INTERP_SIMPLE_TAPS);
+}
+#endif  // CONFIG_FRAME_SUPERRES && CONFIG_LOOP_RESTORATION
+
 #ifndef __clang_analyzer__
 static void highbd_down2_symeven(const uint16_t *const input, int length,
                                  uint16_t *output, int bd) {
@@ -877,15 +928,16 @@ static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
   }
 }
 
-void av1_highbd_resize_plane(const uint8_t *const input, int height, int width,
-                             int in_stride, uint8_t *output, int height2,
-                             int width2, int out_stride, int bd) {
+static void highbd_resize_plane(const uint8_t *const input, int height,
+                                int width, int in_stride, uint8_t *output,
+                                int height2, int width2, int out_stride,
+                                int bd) {
   int i;
-  uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
+  uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height);
   uint16_t *tmpbuf =
-      (uint16_t *)malloc(sizeof(uint16_t) * AOMMAX(width, height));
-  uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height);
-  uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
+      (uint16_t *)aom_malloc(sizeof(uint16_t) * AOMMAX(width, height));
+  uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height);
+  uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2);
   if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
     goto Error;
   for (i = 0; i < height; ++i) {
@@ -900,11 +952,49 @@ void av1_highbd_resize_plane(const uint8_t *const input, int height, int width,
   }
 
 Error:
-  free(intbuf);
-  free(tmpbuf);
-  free(arrbuf);
-  free(arrbuf2);
+  aom_free(intbuf);
+  aom_free(tmpbuf);
+  aom_free(arrbuf);
+  aom_free(arrbuf2);
 }
+
+#if CONFIG_FRAME_SUPERRES
+static void highbd_upscale_normative(const uint16_t *const input, int length,
+                                     uint16_t *output, int olength, int bd) {
+#if CONFIG_LOOP_RESTORATION
+  highbd_interpolate_simple(input, length, output, olength, bd);
+#else
+  highbd_interpolate(input, length, output, olength, bd);
+#endif  // CONFIG_LOOP_RESTORATION
+}
+
+static void highbd_upscale_normative_plane(const uint8_t *const input,
+                                           int height, int width, int in_stride,
+                                           uint8_t *output, int height2,
+                                           int width2, int out_stride, int bd) {
+  int i;
+  uint16_t *intbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * width2 * height);
+  uint16_t *arrbuf = (uint16_t *)aom_malloc(sizeof(uint16_t) * height);
+  uint16_t *arrbuf2 = (uint16_t *)aom_malloc(sizeof(uint16_t) * height2);
+  if (intbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error;
+  for (i = 0; i < height; ++i) {
+    highbd_upscale_normative(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
+                             intbuf + width2 * i, width2, bd);
+  }
+  for (i = 0; i < width2; ++i) {
+    highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+    highbd_upscale_normative(arrbuf, height, arrbuf2, height2, bd);
+    highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
+                           arrbuf2);
+  }
+
+Error:
+  aom_free(intbuf);
+  aom_free(arrbuf);
+  aom_free(arrbuf2);
+}
+#endif  // CONFIG_FRAME_SUPERRES
+
 #endif  // CONFIG_HIGHBITDEPTH
 
 void av1_resize_frame420(const uint8_t *const y, int y_stride,
@@ -912,11 +1002,11 @@ void av1_resize_frame420(const uint8_t *const y, int y_stride,
                          int uv_stride, int height, int width, uint8_t *oy,
                          int oy_stride, uint8_t *ou, uint8_t *ov,
                          int ouv_stride, int oheight, int owidth) {
-  av1_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
-  av1_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
-                   owidth / 2, ouv_stride);
-  av1_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
-                   owidth / 2, ouv_stride);
+  resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+  resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, owidth / 2,
+               ouv_stride);
+  resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, owidth / 2,
+               ouv_stride);
 }
 
 void av1_resize_frame422(const uint8_t *const y, int y_stride,
@@ -924,11 +1014,11 @@ void av1_resize_frame422(const uint8_t *const y, int y_stride,
                          int uv_stride, int height, int width, uint8_t *oy,
                          int oy_stride, uint8_t *ou, uint8_t *ov,
                          int ouv_stride, int oheight, int owidth) {
-  av1_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
-  av1_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
-                   ouv_stride);
-  av1_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
-                   ouv_stride);
+  resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+  resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
+               ouv_stride);
+  resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
+               ouv_stride);
 }
 
 void av1_resize_frame444(const uint8_t *const y, int y_stride,
@@ -936,11 +1026,9 @@ void av1_resize_frame444(const uint8_t *const y, int y_stride,
                          int uv_stride, int height, int width, uint8_t *oy,
                          int oy_stride, uint8_t *ou, uint8_t *ov,
                          int ouv_stride, int oheight, int owidth) {
-  av1_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
-  av1_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
-                   ouv_stride);
-  av1_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
-                   ouv_stride);
+  resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+  resize_plane(u, height, width, uv_stride, ou, oheight, owidth, ouv_stride);
+  resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride);
 }
 
 #if CONFIG_HIGHBITDEPTH
@@ -950,12 +1038,12 @@ void av1_highbd_resize_frame420(const uint8_t *const y, int y_stride,
                                 uint8_t *oy, int oy_stride, uint8_t *ou,
                                 uint8_t *ov, int ouv_stride, int oheight,
                                 int owidth, int bd) {
-  av1_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
-                          oy_stride, bd);
-  av1_highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
-                          owidth / 2, ouv_stride, bd);
-  av1_highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
-                          owidth / 2, ouv_stride, bd);
+  highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+                      oy_stride, bd);
+  highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
+                      owidth / 2, ouv_stride, bd);
+  highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
+                      owidth / 2, ouv_stride, bd);
 }
 
 void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride,
@@ -964,12 +1052,12 @@ void av1_highbd_resize_frame422(const uint8_t *const y, int y_stride,
                                 uint8_t *oy, int oy_stride, uint8_t *ou,
                                 uint8_t *ov, int ouv_stride, int oheight,
                                 int owidth, int bd) {
-  av1_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
-                          oy_stride, bd);
-  av1_highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight,
-                          owidth / 2, ouv_stride, bd);
-  av1_highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight,
-                          owidth / 2, ouv_stride, bd);
+  highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+                      oy_stride, bd);
+  highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
+                      ouv_stride, bd);
+  highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
+                      ouv_stride, bd);
 }
 
 void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
@@ -978,12 +1066,12 @@ void av1_highbd_resize_frame444(const uint8_t *const y, int y_stride,
                                 uint8_t *oy, int oy_stride, uint8_t *ou,
                                 uint8_t *ov, int ouv_stride, int oheight,
                                 int owidth, int bd) {
-  av1_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
-                          oy_stride, bd);
-  av1_highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
-                          ouv_stride, bd);
-  av1_highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
-                          ouv_stride, bd);
+  highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+                      oy_stride, bd);
+  highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
+                      ouv_stride, bd);
+  highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
+                      ouv_stride, bd);
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
@@ -1013,30 +1101,56 @@ void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
   for (i = 0; i < MAX_MB_PLANE; ++i) {
 #if CONFIG_HIGHBITDEPTH
     if (src->flags & YV12_FLAG_HIGHBITDEPTH)
-      av1_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
-                              src_strides[i], dsts[i], dst_heights[i],
-                              dst_widths[i], dst_strides[i], bd);
+      highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
+                          src_strides[i], dsts[i], dst_heights[i],
+                          dst_widths[i], dst_strides[i], bd);
     else
 #endif  // CONFIG_HIGHBITDEPTH
-      av1_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
-                       dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
+      resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+                   dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
   }
   aom_extend_frame_borders(dst);
 }
 
-YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm,
-                                               YV12_BUFFER_CONFIG *unscaled,
-                                               YV12_BUFFER_CONFIG *scaled) {
-  if (cm->width != unscaled->y_crop_width ||
-      cm->height != unscaled->y_crop_height) {
-    // For 2x2 scaling down.
-    aom_scale_frame(unscaled, scaled, unscaled->y_buffer, 9, 2, 1, 2, 1, 0);
-    aom_extend_frame_borders(scaled);
-    return scaled;
-  } else {
-    return unscaled;
+#if CONFIG_FRAME_SUPERRES
+#if CONFIG_HIGHBITDEPTH
+void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                            YV12_BUFFER_CONFIG *dst, int bd) {
+#else
+void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                            YV12_BUFFER_CONFIG *dst) {
+#endif  // CONFIG_HIGHBITDEPTH
+  // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t
+  int i;
+  const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
+                                   src->v_buffer };
+  const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
+  const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
+                              src->uv_crop_width };
+  const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
+                               src->uv_crop_height };
+  uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
+  const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
+  const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
+                              dst->uv_crop_width };
+  const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
+                               dst->uv_crop_height };
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_HIGHBITDEPTH
+    if (src->flags & YV12_FLAG_HIGHBITDEPTH)
+      highbd_upscale_normative_plane(srcs[i], src_heights[i], src_widths[i],
+                                     src_strides[i], dsts[i], dst_heights[i],
+                                     dst_widths[i], dst_strides[i], bd);
+    else
+#endif  // CONFIG_HIGHBITDEPTH
+      upscale_normative_plane(srcs[i], src_heights[i], src_widths[i],
+                              src_strides[i], dsts[i], dst_heights[i],
+                              dst_widths[i], dst_strides[i]);
   }
+  aom_extend_frame_borders(dst);
 }
+#endif  // CONFIG_FRAME_SUPERRES
 
 YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
                                           YV12_BUFFER_CONFIG *unscaled,
@@ -1054,17 +1168,45 @@ YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
   }
 }
 
-void av1_calculate_scaled_size(int *width, int *height, int num) {
-  if (num != SCALE_DENOMINATOR) {
-    *width = *width * num / SCALE_DENOMINATOR;
-    *height = *height * num / SCALE_DENOMINATOR;
-    // Make width and height even
-    *width += *width & 1;
-    *height += *height & 1;
+// Calculates scaled dimensions given original dimensions and the scale
+// denominator. If 'scale_height' is 1, both width and height are scaled;
+// otherwise, only the width is scaled.
+static void calculate_scaled_size_helper(int *width, int *height, int denom,
+                                         int scale_height) {
+  if (denom != SCALE_NUMERATOR) {
+    *width = *width * SCALE_NUMERATOR / denom;
+    *width += *width & 1;  // Make it even.
+    if (scale_height) {
+      *height = *height * SCALE_NUMERATOR / denom;
+      *height += *height & 1;  // Make it even.
+    }
   }
 }
 
+void av1_calculate_scaled_size(int *width, int *height, int resize_denom) {
+  calculate_scaled_size_helper(width, height, resize_denom, 1);
+}
+
 #if CONFIG_FRAME_SUPERRES
+void av1_calculate_scaled_superres_size(int *width, int *height,
+                                        int superres_denom) {
+  calculate_scaled_size_helper(width, height, superres_denom,
+                               !CONFIG_HORZONLY_FRAME_SUPERRES);
+}
+
+void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) {
+  if (denom != SCALE_NUMERATOR) {
+    // Note: av1_calculate_scaled_superres_size() rounds *up* after division
+    // when the resulting dimensions are odd. So here, we round *down*.
+    *width = *width * denom / SCALE_NUMERATOR;
+#if CONFIG_HORZONLY_FRAME_SUPERRES
+    (void)height;
+#else
+    *height = *height * denom / SCALE_NUMERATOR;
+#endif  // CONFIG_HORZONLY_FRAME_SUPERRES
+  }
+}
+
 // TODO(afergs): Look for in-place upscaling
 // TODO(afergs): aom_ vs av1_ functions? Which can I use?
 // Upscale decoded image.
@@ -1138,11 +1280,13 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
 
   // Scale up and back into frame_to_show.
   assert(frame_to_show->y_crop_width != cm->width);
-  assert(frame_to_show->y_crop_height != cm->height);
+  assert(IMPLIES(!CONFIG_HORZONLY_FRAME_SUPERRES,
+                 frame_to_show->y_crop_height != cm->height));
 #if CONFIG_HIGHBITDEPTH
-  av1_resize_and_extend_frame(&copy_buffer, frame_to_show, (int)cm->bit_depth);
+  av1_upscale_normative_and_extend_frame(&copy_buffer, frame_to_show,
+                                         (int)cm->bit_depth);
 #else
-  av1_resize_and_extend_frame(&copy_buffer, frame_to_show);
+  av1_upscale_normative_and_extend_frame(&copy_buffer, frame_to_show);
 #endif  // CONFIG_HIGHBITDEPTH
 
   // Free the copy buffer
diff --git a/third_party/aom/av1/common/resize.h b/third_party/aom/av1/common/resize.h
index ca2c04686..66b32c72d 100644
--- a/third_party/aom/av1/common/resize.h
+++ b/third_party/aom/av1/common/resize.h
@@ -71,22 +71,40 @@ void av1_resize_and_extend_frame(const YV12_BUFFER_CONFIG *src,
                                  YV12_BUFFER_CONFIG *dst);
 #endif  // CONFIG_HIGHBITDEPTH
 
-YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm,
-                                               YV12_BUFFER_CONFIG *unscaled,
-                                               YV12_BUFFER_CONFIG *scaled);
+#if CONFIG_FRAME_SUPERRES
+#if CONFIG_HIGHBITDEPTH
+void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                            YV12_BUFFER_CONFIG *dst, int bd);
+#else
+void av1_upscale_normative_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                            YV12_BUFFER_CONFIG *dst);
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // CONFIG_FRAME_SUPERRES
 
 YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm,
                                           YV12_BUFFER_CONFIG *unscaled,
                                           YV12_BUFFER_CONFIG *scaled);
 
-void av1_calculate_scaled_size(int *width, int *height, int num);
+// Calculates the scaled dimensions from the given original dimensions and the
+// resize scale denominator.
+void av1_calculate_scaled_size(int *width, int *height, int resize_denom);
 
 #if CONFIG_FRAME_SUPERRES
+// Similar to above, but calculates scaled dimensions after superres from the
+// given original dimensions and superres scale denominator.
+void av1_calculate_scaled_superres_size(int *width, int *height,
+                                        int superres_denom);
+
+// Inverse of av1_calculate_scaled_superres_size() above: calculates the
+// original dimensions from the given scaled dimensions and the scale
+// denominator.
+void av1_calculate_unscaled_superres_size(int *width, int *height, int denom);
+
 void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool);
 
 // Returns 1 if a superres upscaled frame is unscaled and 0 otherwise.
 static INLINE int av1_superres_unscaled(const AV1_COMMON *cm) {
-  return (cm->superres_scale_numerator == SCALE_DENOMINATOR);
+  return (cm->superres_scale_denominator == SCALE_NUMERATOR);
 }
 #endif  // CONFIG_FRAME_SUPERRES
 
diff --git a/third_party/aom/av1/common/restoration.c b/third_party/aom/av1/common/restoration.c
index 8293af154..00441f072 100644
--- a/third_party/aom/av1/common/restoration.c
+++ b/third_party/aom/av1/common/restoration.c
@@ -19,6 +19,7 @@
 #include "av1/common/restoration.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
+
 #include "aom_ports/mem.h"
 
 const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
@@ -29,11 +30,18 @@ const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
   { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 },
   { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 }
 #else
-  // r1, eps1, r2, eps2
+// r1, eps1, r2, eps2
+#if MAX_RADIUS == 2
+  { 2, 12, 1, 4 },  { 2, 15, 1, 6 },  { 2, 18, 1, 8 },  { 2, 20, 1, 9 },
+  { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
+  { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 2 },
+  { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 },
+#else
   { 2, 12, 1, 4 },  { 2, 15, 1, 6 },  { 2, 18, 1, 8 },  { 2, 20, 1, 9 },
   { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
   { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
   { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
+#endif  // MAX_RADIUS == 2
 #endif
 };
 
@@ -76,12 +84,6 @@ void av1_free_restoration_struct(RestorationInfo *rst_info) {
   rst_info->sgrproj_info = NULL;
 }
 
-#define MAX_RADIUS 3  // Only 1, 2, 3 allowed
-#define MAX_EPS 80    // Max value of eps
-#define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
-#define SGRPROJ_MTABLE_BITS 20
-#define SGRPROJ_RECIP_BITS 12
-
 // TODO(debargha): This table can be substantially reduced since only a few
 // values are actually used.
 int sgrproj_mtable[MAX_EPS][MAX_NELEM];
@@ -102,82 +104,277 @@ static void loop_restoration_init(RestorationInternal *rst, int kf) {
   rst->keyframe = kf;
 }
 
-void extend_frame(uint8_t *data, int width, int height, int stride) {
+void extend_frame(uint8_t *data, int width, int height, int stride,
+                  int border_horz, int border_vert) {
   uint8_t *data_p;
   int i;
   for (i = 0; i < height; ++i) {
     data_p = data + i * stride;
-    memset(data_p - WIENER_HALFWIN, data_p[0], WIENER_HALFWIN);
-    memset(data_p + width, data_p[width - 1], WIENER_HALFWIN);
+    memset(data_p - border_horz, data_p[0], border_horz);
+    memset(data_p + width, data_p[width - 1], border_horz);
   }
-  data_p = data - WIENER_HALFWIN;
-  for (i = -WIENER_HALFWIN; i < 0; ++i) {
-    memcpy(data_p + i * stride, data_p, width + 2 * WIENER_HALFWIN);
+  data_p = data - border_horz;
+  for (i = -border_vert; i < 0; ++i) {
+    memcpy(data_p + i * stride, data_p, width + 2 * border_horz);
   }
-  for (i = height; i < height + WIENER_HALFWIN; ++i) {
+  for (i = height; i < height + border_vert; ++i) {
     memcpy(data_p + i * stride, data_p + (height - 1) * stride,
-           width + 2 * WIENER_HALFWIN);
+           width + 2 * border_horz);
   }
 }
 
-static void loop_copy_tile(uint8_t *data, int tile_idx, int subtile_idx,
-                           int subtile_bits, int width, int height, int stride,
-                           RestorationInternal *rst, uint8_t *dst,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+
+// This function setup a processing stripe by replacing the vertical
+// stripe boundary (2 lines above and 2 lines below) by data coming
+// from the above/below buffers. Before doing so the original
+// frame data is saved into a temporary buffer, such that it
+// can be restored by the restore_processing_stripe_boundary
+// function after the filtering of the processing stripe.
+// Returns the height of the processing stripe
+static int setup_processing_stripe_boundary(int y0, int v_end, int h_start,
+                                            int h_end, uint8_t *data,
+                                            int stride,
+                                            RestorationInternal *rst,
+                                            int use_highbd) {
+  int y, y_stripe_topmost, stripe_index, i;
+  int tile_offset = RESTORATION_TILE_OFFSET >> rst->subsampling_y;
+  int stripe_height = rst->rsi->procunit_height;
+  int comp = rst->component;
+  uint8_t *boundary_above_buf = rst->stripe_boundary_above[comp];
+  uint8_t *boundary_below_buf = rst->stripe_boundary_below[comp];
+  int boundary_stride = rst->stripe_boundary_stride[comp];
+  int x0 = h_start - RESTORATION_EXTRA_HORZ;
+  int x1 = h_end + RESTORATION_EXTRA_HORZ;
+
+  stripe_index = (y0 + tile_offset) / stripe_height;
+  y_stripe_topmost = stripe_index * stripe_height - tile_offset;
+  boundary_above_buf +=
+      ((stripe_index - 1) * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
+      << use_highbd;
+  boundary_below_buf +=
+      (stripe_index * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
+      << use_highbd;
+
+  // setup the 2 lines above the stripe
+  for (i = 0; i < 2; i++) {
+    y = y_stripe_topmost - 2 + i;
+    if (y >= 0 && y < y0 && y >= y0 - 2) {
+      uint8_t *p = data + ((y * stride + x0) << use_highbd);
+      uint8_t *new_data =
+          boundary_above_buf + ((i * boundary_stride + x0) << use_highbd);
+      // printf("above %3d %3d: %08x %08x : %08x %08x\n", y, x0,
+      // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
+      // ((uint32_t*)new_data)[1]);
+      // Save old pixels
+      memcpy(rst->tmp_save_above[i], p, (x1 - x0) << use_highbd);
+      // Replace width pixels from boundary_above_buf
+      memcpy(p, new_data, (x1 - x0) << use_highbd);
+    }
+  }
+  // setup the 2 lines below the stripe
+  for (i = 0; i < 2; i++) {
+    y = y_stripe_topmost + stripe_height + i;
+    if (y < v_end + 2) {
+      uint8_t *p = data + ((y * stride + x0) << use_highbd);
+      uint8_t *new_data =
+          boundary_below_buf + ((i * boundary_stride + x0) << use_highbd);
+      // printf("below %3d %3d: %08x %08x : %08x %08x\n", y, x0,
+      // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
+      // ((uint32_t*)new_data)[1]);
+      // Save old pixels
+      memcpy(rst->tmp_save_below[i], p, (x1 - x0) << use_highbd);
+      // Replace width pixels from boundary_below_buf
+      memcpy(p, new_data, (x1 - x0) << use_highbd);
+    }
+  }
+  // Return actual stripe height
+  return AOMMIN(v_end, y_stripe_topmost + stripe_height) - y0;
+}
+
+// This function restores the boundary lines modified by
+// setup_processing_stripe_boundary.
+static void restore_processing_stripe_boundary(int y0, int v_end, int h_start,
+                                               int h_end, uint8_t *data,
+                                               int stride,
+                                               RestorationInternal *rst,
+                                               int use_highbd) {
+  int y, y_stripe_topmost, i, stripe_index;
+  int tile_offset = 8 >> rst->subsampling_y;
+  int stripe_height = rst->rsi->procunit_height;
+  int x0 = h_start - RESTORATION_EXTRA_HORZ;
+  int x1 = h_end + RESTORATION_EXTRA_HORZ;
+
+  stripe_index = (y0 + tile_offset) / stripe_height;
+  y_stripe_topmost = stripe_index * stripe_height - tile_offset;
+
+  // restore the 2 lines above the stripe
+  for (i = 0; i < 2; i++) {
+    y = y_stripe_topmost - 2 + i;
+    if (y >= 0 && y < y0 && y >= y0 - 2) {
+      uint8_t *p = data + ((y * stride + x0) << use_highbd);
+      memcpy(p, rst->tmp_save_above[i], (x1 - x0) << use_highbd);
+    }
+  }
+  // restore the 2 lines below the stripe
+  for (i = 0; i < 2; i++) {
+    y = y_stripe_topmost + stripe_height + i;
+    if (y < v_end + 2) {
+      uint8_t *p = data + ((y * stride + x0) << use_highbd);
+      memcpy(p, rst->tmp_save_below[i], (x1 - x0) << use_highbd);
+    }
+  }
+}
+
+#endif
+
+static void loop_copy_tile(uint8_t *data, int tile_idx, int width, int height,
+                           int stride, RestorationInternal *rst, uint8_t *dst,
                            int dst_stride) {
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int i;
-  int h_start, h_end, v_start, v_end;
-  av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
-                           rst->nvtiles, tile_width, tile_height, width, height,
-                           0, 0, &h_start, &h_end, &v_start, &v_end);
-  for (i = v_start; i < v_end; ++i)
-    memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
-           h_end - h_start);
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+  for (int i = limits.v_start; i < limits.v_end; ++i)
+    memcpy(dst + i * dst_stride + limits.h_start,
+           data + i * stride + limits.h_start, limits.h_end - limits.h_start);
+}
+
+static void stepdown_wiener_kernel(const InterpKernel orig, InterpKernel vert,
+                                   int boundary_dist, int istop) {
+  memcpy(vert, orig, sizeof(InterpKernel));
+  switch (boundary_dist) {
+    case 0:
+      vert[WIENER_HALFWIN] += vert[2] + vert[1] + vert[0];
+      vert[2] = vert[1] = vert[0] = 0;
+      break;
+    case 1:
+      vert[2] += vert[1] + vert[0];
+      vert[1] = vert[0] = 0;
+      break;
+    case 2:
+      vert[1] += vert[0];
+      vert[0] = 0;
+      break;
+    default: break;
+  }
+  if (!istop) {
+    int tmp;
+    tmp = vert[0];
+    vert[0] = vert[WIENER_WIN - 1];
+    vert[WIENER_WIN - 1] = tmp;
+    tmp = vert[1];
+    vert[1] = vert[WIENER_WIN - 2];
+    vert[WIENER_WIN - 2] = tmp;
+    tmp = vert[2];
+    vert[2] = vert[WIENER_WIN - 3];
+    vert[WIENER_WIN - 3] = tmp;
+  }
 }
 
 static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width,
                                     int height, int stride,
                                     RestorationInternal *rst, uint8_t *dst,
                                     int dst_stride) {
+  const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  int procunit_height;
+#else
+  const int procunit_height = rst->rsi->procunit_height;
+#endif
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int i, j;
-  int h_start, h_end, v_start, v_end;
   if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-    loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
-                   dst_stride);
+    loop_copy_tile(data, tile_idx, width, height, stride, rst, dst, dst_stride);
     return;
   }
-  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
-                           tile_width, tile_height, width, height, 0, 0,
-                           &h_start, &h_end, &v_start, &v_end);
+  InterpKernel vertical_topbot;
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+
   // Convolve the whole tile (done in blocks here to match the requirements
   // of the vectorized convolve functions, but the result is equivalent)
-  for (i = v_start; i < v_end; i += MAX_SB_SIZE)
-    for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
-      int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
-      int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
+  for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    int h = setup_processing_stripe_boundary(
+        i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
+    h = ALIGN_POWER_OF_TWO(h, 1);
+    procunit_height = h;
+#else
+    int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
+#endif
+    for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
+      int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
       const uint8_t *data_p = data + i * stride + j;
       uint8_t *dst_p = dst + i * dst_stride + j;
+      // Note h is at least 16
+      for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
+        stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
+                               vertical_topbot, WIENER_BORDER_VERT + b, 1);
+#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
+                                  rst->rsi->wiener_info[tile_idx].hfilter, 16,
+                                  vertical_topbot, 16, w, 1);
+#else
+        aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
+                              rst->rsi->wiener_info[tile_idx].hfilter, 16,
+                              vertical_topbot, 16, w, 1);
+#endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        data_p += stride;
+        dst_p += dst_stride;
+      }
 #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
       aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
                                 rst->rsi->wiener_info[tile_idx].hfilter, 16,
                                 rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
-                                h);
+                                h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
 #else
       aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
                             rst->rsi->wiener_info[tile_idx].hfilter, 16,
-                            rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h);
+                            rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
+                            h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
+#endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+      data_p += stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
+      dst_p += dst_stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
+      for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
+        stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
+                               vertical_topbot, WIENER_BORDER_VERT + b, 0);
+#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride,
+                                  rst->rsi->wiener_info[tile_idx].hfilter, 16,
+                                  vertical_topbot, 16, w, 1);
+#else
+        aom_convolve8_add_src(data_p, stride, dst_p, dst_stride,
+                              rst->rsi->wiener_info[tile_idx].hfilter, 16,
+                              vertical_topbot, 16, w, 1);
 #endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        data_p += stride;
+        dst_p += dst_stride;
+      }
     }
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                       limits.h_end, data, stride, rst, 0);
+#endif
+  }
 }
 
 static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
                                RestorationInternal *rst, uint8_t *dst,
                                int dst_stride) {
   int tile_idx;
-  extend_frame(data, width, height, stride);
+  extend_frame(data, width, height, stride, WIENER_BORDER_HORZ,
+               WIENER_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
                             dst_stride);
@@ -560,36 +757,48 @@ const int32_t x_by_xplus1[256] = {
 
 const int32_t one_by_x[MAX_NELEM] = {
   4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
-  293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164, 158,
-  152,  146,  141,  137,  132, 128, 124, 120, 117, 114, 111, 108, 105,
-  102,  100,  98,   95,   93,  91,  89,  87,  85,  84
+  293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164,
+#if MAX_RADIUS > 2
+  158,  152,  146,  141,  137, 132, 128, 124, 120, 117, 114, 111, 108,
+  105,  102,  100,  98,   95,  93,  91,  89,  87,  85,  84
+#endif  // MAX_RADIUS > 2
 };
 
 static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
-                                                int height, int stride,
-                                                int bit_depth, int r, int eps,
-                                                int32_t *tmpbuf) {
-  int32_t *A = tmpbuf;
-  int32_t *B = A + SGRPROJ_OUTBUF_SIZE;
-  int8_t num[RESTORATION_TILEPELS_MAX];
-  int i, j;
+                                                int height, int dgd_stride,
+                                                int32_t *dst, int dst_stride,
+                                                int bit_depth, int r, int eps) {
+  const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+  const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
+  const int num_stride = width_ext;
   // Adjusting the stride of A and B here appears to avoid bad cache effects,
   // leading to a significant speed improvement.
   // We also align the stride to a multiple of 16 bytes, for consistency
   // with the SIMD version of this function.
-  int buf_stride = ((width + 3) & ~3) + 16;
+  int buf_stride = ((width_ext + 3) & ~3) + 16;
+  int32_t A_[RESTORATION_PROC_UNIT_PELS];
+  int32_t B_[RESTORATION_PROC_UNIT_PELS];
+  int32_t *A = A_;
+  int32_t *B = B_;
+  int8_t num_[RESTORATION_PROC_UNIT_PELS];
+  int8_t *num = num_ + SGRPROJ_BORDER_VERT * num_stride + SGRPROJ_BORDER_HORZ;
+  int i, j;
 
   // Don't filter tiles with dimensions < 5 on any axis
   if ((width < 5) || (height < 5)) return;
 
-  boxsum(dgd, width, height, stride, r, 0, B, buf_stride);
-  boxsum(dgd, width, height, stride, r, 1, A, buf_stride);
-  boxnum(width, height, r, num, width);
+  boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
+         width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
+  boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
+         width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
+  boxnum(width_ext, height_ext, r, num_, num_stride);
   assert(r <= 3);
+  A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+  B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
       const int k = i * buf_stride + j;
-      const int n = num[i * width + j];
+      const int n = num[i * num_stride + j];
 
       // a < 2^16 * n < 2^22 regardless of bit depth
       uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
@@ -625,106 +834,115 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
   j = 0;
   {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a =
         3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1];
     const int32_t b =
         3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   i = 0;
   j = width - 1;
   {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a =
         3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1];
     const int32_t b =
         3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   i = height - 1;
   j = 0;
   {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a =
         3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1];
     const int32_t b =
         3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   i = height - 1;
   j = width - 1;
   {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a =
         3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1];
     const int32_t b =
         3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   i = 0;
   for (j = 1; j < width - 1; ++j) {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
                       A[k + buf_stride - 1] + A[k + buf_stride + 1];
     const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] +
                       B[k + buf_stride - 1] + B[k + buf_stride + 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   i = height - 1;
   for (j = 1; j < width - 1; ++j) {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
                       A[k - buf_stride - 1] + A[k - buf_stride + 1];
     const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] +
                       B[k - buf_stride - 1] + B[k - buf_stride + 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   j = 0;
   for (i = 1; i < height - 1; ++i) {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
                       A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1];
     const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
                       B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   j = width - 1;
   for (i = 1; i < height - 1; ++i) {
     const int k = i * buf_stride + j;
-    const int l = i * stride + j;
+    const int l = i * dgd_stride + j;
+    const int m = i * dst_stride + j;
     const int nb = 3;
     const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
                       A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1];
     const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) +
                       B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1];
     const int32_t v = a * dgd[l] + b;
-    dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+    dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
   }
   for (i = 1; i < height - 1; ++i) {
     for (j = 1; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
+      const int m = i * dst_stride + j;
       const int nb = 5;
       const int32_t a =
           (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
@@ -739,22 +957,26 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
            B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
               3;
       const int32_t v = a * dgd[l] + b;
-      dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
+      dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
     }
   }
 }
 
 void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height,
                                   int stride, int32_t *dst, int dst_stride,
-                                  int r, int eps, int32_t *tmpbuf) {
+                                  int r, int eps) {
+  int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
+  const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
+  int32_t *dgd32 =
+      dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
   int i, j;
-  for (i = 0; i < height; ++i) {
-    for (j = 0; j < width; ++j) {
-      dst[i * dst_stride + j] = dgd[i * stride + j];
+  for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
+    for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
+      dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
     }
   }
-  av1_selfguided_restoration_internal(dst, width, height, dst_stride, 8, r, eps,
-                                      tmpbuf);
+  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
+                                      dst_stride, 8, r, eps);
 }
 
 void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride,
@@ -853,7 +1075,6 @@ void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
-  int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
   int i, j;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if USE_HIGHPASS_IN_SGRPROJ
@@ -861,10 +1082,10 @@ void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
                         sgr_params[eps].corner, sgr_params[eps].edge);
 #else
   av1_selfguided_restoration_c(dat, width, height, stride, flt1, width,
-                               sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
+                               sgr_params[eps].r1, sgr_params[eps].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
   av1_selfguided_restoration_c(dat, width, height, stride, flt2, width,
-                               sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
+                               sgr_params[eps].r2, sgr_params[eps].e2);
   decode_xq(xqd, xq);
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
@@ -886,31 +1107,54 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
                                      int height, int stride,
                                      RestorationInternal *rst, uint8_t *dst,
                                      int dst_stride) {
+  const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  int procunit_height;
+#else
+  const int procunit_height = rst->rsi->procunit_height;
+#endif
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int h_start, h_end, v_start, v_end;
-  uint8_t *data_p, *dst_p;
-
   if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-    loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
-                   dst_stride);
+    loop_copy_tile(data, tile_idx, width, height, stride, rst, dst, dst_stride);
     return;
   }
-  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
-                           tile_width, tile_height, width, height, 0, 0,
-                           &h_start, &h_end, &v_start, &v_end);
-  data_p = data + h_start + v_start * stride;
-  dst_p = dst + h_start + v_start * dst_stride;
-  apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
-                               rst->rsi->sgrproj_info[tile_idx].ep,
-                               rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
-                               dst_stride, rst->tmpbuf);
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+  for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    int h = setup_processing_stripe_boundary(
+        i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
+    procunit_height = h;
+#else
+    int h = AOMMIN(procunit_height, limits.v_end - i);
+#endif
+    for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
+      int w = AOMMIN(procunit_width, limits.h_end - j);
+      uint8_t *data_p = data + i * stride + j;
+      uint8_t *dst_p = dst + i * dst_stride + j;
+      apply_selfguided_restoration(
+          data_p, w, h, stride, rst->rsi->sgrproj_info[tile_idx].ep,
+          rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
+    }
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                       limits.h_end, data, stride, rst, 0);
+#endif
+  }
 }
 
 static void loop_sgrproj_filter(uint8_t *data, int width, int height,
                                 int stride, RestorationInternal *rst,
                                 uint8_t *dst, int dst_stride) {
   int tile_idx;
+  extend_frame(data, width, height, stride, SGRPROJ_BORDER_HORZ,
+               SGRPROJ_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst,
                              dst_stride);
@@ -921,10 +1165,11 @@ static void loop_switchable_filter(uint8_t *data, int width, int height,
                                    int stride, RestorationInternal *rst,
                                    uint8_t *dst, int dst_stride) {
   int tile_idx;
-  extend_frame(data, width, height, stride);
+  extend_frame(data, width, height, stride, RESTORATION_BORDER_HORZ,
+               RESTORATION_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-      loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
+      loop_copy_tile(data, tile_idx, width, height, stride, rst, dst,
                      dst_stride);
     } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
       loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst,
@@ -937,40 +1182,43 @@ static void loop_switchable_filter(uint8_t *data, int width, int height,
 }
 
 #if CONFIG_HIGHBITDEPTH
-void extend_frame_highbd(uint16_t *data, int width, int height, int stride) {
+void extend_frame_highbd(uint16_t *data, int width, int height, int stride,
+                         int border_horz, int border_vert) {
   uint16_t *data_p;
   int i, j;
   for (i = 0; i < height; ++i) {
     data_p = data + i * stride;
-    for (j = -WIENER_HALFWIN; j < 0; ++j) data_p[j] = data_p[0];
-    for (j = width; j < width + WIENER_HALFWIN; ++j)
-      data_p[j] = data_p[width - 1];
+    for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
+    for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
   }
-  data_p = data - WIENER_HALFWIN;
-  for (i = -WIENER_HALFWIN; i < 0; ++i) {
+  data_p = data - border_horz;
+  for (i = -border_vert; i < 0; ++i) {
     memcpy(data_p + i * stride, data_p,
-           (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
+           (width + 2 * border_horz) * sizeof(uint16_t));
   }
-  for (i = height; i < height + WIENER_HALFWIN; ++i) {
+  for (i = height; i < height + border_vert; ++i) {
     memcpy(data_p + i * stride, data_p + (height - 1) * stride,
-           (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t));
+           (width + 2 * border_horz) * sizeof(uint16_t));
   }
 }
 
-static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int subtile_idx,
-                                  int subtile_bits, int width, int height,
-                                  int stride, RestorationInternal *rst,
-                                  uint16_t *dst, int dst_stride) {
+static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int width,
+                                  int height, int stride,
+                                  RestorationInternal *rst, uint16_t *dst,
+                                  int dst_stride) {
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int i;
-  int h_start, h_end, v_start, v_end;
-  av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles,
-                           rst->nvtiles, tile_width, tile_height, width, height,
-                           0, 0, &h_start, &h_end, &v_start, &v_end);
-  for (i = v_start; i < v_end; ++i)
-    memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start,
-           (h_end - h_start) * sizeof(*dst));
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+  for (int i = limits.v_start; i < limits.v_end; ++i)
+    memcpy(dst + i * dst_stride + limits.h_start,
+           data + i * stride + limits.h_start,
+           (limits.h_end - limits.h_start) * sizeof(*dst));
 }
 
 static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
@@ -978,39 +1226,102 @@ static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx,
                                            RestorationInternal *rst,
                                            int bit_depth, uint16_t *dst,
                                            int dst_stride) {
+  const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  int procunit_height;
+#else
+  const int procunit_height = rst->rsi->procunit_height;
+#endif
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int h_start, h_end, v_start, v_end;
-  int i, j;
 
   if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-    loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
+    loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
                           dst_stride);
     return;
   }
-  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
-                           tile_width, tile_height, width, height, 0, 0,
-                           &h_start, &h_end, &v_start, &v_end);
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+  InterpKernel vertical_topbot;
+
   // Convolve the whole tile (done in blocks here to match the requirements
   // of the vectorized convolve functions, but the result is equivalent)
-  for (i = v_start; i < v_end; i += MAX_SB_SIZE)
-    for (j = h_start; j < h_end; j += MAX_SB_SIZE) {
-      int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15);
-      int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15);
+  for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                             limits.h_end, (uint8_t *)data,
+                                             stride, rst, 1);
+    h = ALIGN_POWER_OF_TWO(h, 1);
+    procunit_height = h;
+#else
+    int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
+#endif
+    for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
+      int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
       const uint16_t *data_p = data + i * stride + j;
       uint16_t *dst_p = dst + i * dst_stride + j;
+      // Note h is at least 16
+      for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
+        stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
+                               vertical_topbot, WIENER_BORDER_VERT + b, 1);
+#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        aom_highbd_convolve8_add_src_hip(
+            CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
+            dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
+            vertical_topbot, 16, w, 1, bit_depth);
+#else
+        aom_highbd_convolve8_add_src(CONVERT_TO_BYTEPTR(data_p), stride,
+                                     CONVERT_TO_BYTEPTR(dst_p), dst_stride,
+                                     rst->rsi->wiener_info[tile_idx].hfilter,
+                                     16, vertical_topbot, 16, w, 1, bit_depth);
+#endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        data_p += stride;
+        dst_p += dst_stride;
+      }
 #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
       aom_highbd_convolve8_add_src_hip(
           CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
           dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
-          rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth);
+          rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
+          h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2, bit_depth);
 #else
       aom_highbd_convolve8_add_src(
           CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
           dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
-          rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth);
+          rst->rsi->wiener_info[tile_idx].vfilter, 16, w,
+          h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2, bit_depth);
+#endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+      data_p += stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
+      dst_p += dst_stride * (h - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
+      for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
+        stepdown_wiener_kernel(rst->rsi->wiener_info[tile_idx].vfilter,
+                               vertical_topbot, WIENER_BORDER_VERT + b, 0);
+#if USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        aom_highbd_convolve8_add_src_hip(
+            CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p),
+            dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16,
+            vertical_topbot, 16, w, 1, bit_depth);
+#else
+        aom_highbd_convolve8_add_src(CONVERT_TO_BYTEPTR(data_p), stride,
+                                     CONVERT_TO_BYTEPTR(dst_p), dst_stride,
+                                     rst->rsi->wiener_info[tile_idx].hfilter,
+                                     16, vertical_topbot, 16, w, 1, bit_depth);
 #endif  // USE_WIENER_HIGH_INTERMEDIATE_PRECISION
+        data_p += stride;
+        dst_p += dst_stride;
+      }
     }
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                       limits.h_end, (uint8_t *)data, stride,
+                                       rst, 1);
+#endif
+  }
 }
 
 static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
@@ -1020,7 +1331,8 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   int tile_idx;
-  extend_frame_highbd(data, width, height, stride);
+  extend_frame_highbd(data, width, height, stride, WIENER_BORDER_HORZ,
+                      WIENER_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
                                    bit_depth, dst, dst_stride);
@@ -1030,15 +1342,19 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
 void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height,
                                          int stride, int32_t *dst,
                                          int dst_stride, int bit_depth, int r,
-                                         int eps, int32_t *tmpbuf) {
+                                         int eps) {
+  int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
+  const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
+  int32_t *dgd32 =
+      dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
   int i, j;
-  for (i = 0; i < height; ++i) {
-    for (j = 0; j < width; ++j) {
-      dst[i * dst_stride + j] = dgd[i * stride + j];
+  for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
+    for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
+      dgd32[i * dgd32_stride + j] = dgd[i * stride + j];
     }
   }
-  av1_selfguided_restoration_internal(dst, width, height, dst_stride, bit_depth,
-                                      r, eps, tmpbuf);
+  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
+                                      dst_stride, bit_depth, r, eps);
 }
 
 void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height,
@@ -1139,7 +1455,6 @@ void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
-  int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
   int i, j;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if USE_HIGHPASS_IN_SGRPROJ
@@ -1148,11 +1463,11 @@ void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
 #else
   av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width,
                                       bit_depth, sgr_params[eps].r1,
-                                      sgr_params[eps].e1, tmpbuf2);
+                                      sgr_params[eps].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
   av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width,
                                       bit_depth, sgr_params[eps].r2,
-                                      sgr_params[eps].e2, tmpbuf2);
+                                      sgr_params[eps].e2);
   decode_xq(xqd, xq);
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
@@ -1175,25 +1490,50 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
                                             RestorationInternal *rst,
                                             int bit_depth, uint16_t *dst,
                                             int dst_stride) {
+  const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  int procunit_height;
+#else
+  const int procunit_height = rst->rsi->procunit_height;
+#endif
   const int tile_width = rst->tile_width;
   const int tile_height = rst->tile_height;
-  int h_start, h_end, v_start, v_end;
-  uint16_t *data_p, *dst_p;
 
   if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-    loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
+    loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
                           dst_stride);
     return;
   }
-  av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles,
-                           tile_width, tile_height, width, height, 0, 0,
-                           &h_start, &h_end, &v_start, &v_end);
-  data_p = data + h_start + v_start * stride;
-  dst_p = dst + h_start + v_start * dst_stride;
-  apply_selfguided_restoration_highbd(
-      data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
-      rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
-      dst_p, dst_stride, rst->tmpbuf);
+  RestorationTileLimits limits =
+      av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                               tile_height, width, height, rst->subsampling_y);
+#else
+                               tile_height, width, height);
+#endif
+  for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                             limits.h_end, (uint8_t *)data,
+                                             stride, rst, 1);
+    procunit_height = h;
+#else
+    int h = AOMMIN(procunit_height, limits.v_end - i);
+#endif
+    for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
+      int w = AOMMIN(procunit_width, limits.h_end - j);
+      uint16_t *data_p = data + i * stride + j;
+      uint16_t *dst_p = dst + i * dst_stride + j;
+      apply_selfguided_restoration_highbd(
+          data_p, w, h, stride, bit_depth, rst->rsi->sgrproj_info[tile_idx].ep,
+          rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
+    }
+#if CONFIG_STRIPED_LOOP_RESTORATION
+    restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+                                       limits.h_end, (uint8_t *)data, stride,
+                                       rst, 1);
+#endif
+  }
 }
 
 static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
@@ -1203,6 +1543,8 @@ static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
   int tile_idx;
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  extend_frame_highbd(data, width, height, stride, SGRPROJ_BORDER_HORZ,
+                      SGRPROJ_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
                                     bit_depth, dst, dst_stride);
@@ -1216,11 +1558,12 @@ static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   int tile_idx;
-  extend_frame_highbd(data, width, height, stride);
+  extend_frame_highbd(data, width, height, stride, RESTORATION_BORDER_HORZ,
+                      RESTORATION_BORDER_VERT);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
-      loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst,
-                            dst, dst_stride);
+      loop_copy_tile_highbd(data, tile_idx, width, height, stride, rst, dst,
+                            dst_stride);
     } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) {
       loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst,
                                      bit_depth, dst, dst_stride);
@@ -1263,7 +1606,6 @@ static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 
   yend = AOMMIN(yend, yheight);
   uvend = AOMMIN(uvend, uvheight);
-
   if (components_pattern == (1 << AOM_PLANE_Y)) {
     // Only y
     if (rsi[0].frame_restoration_type == RESTORE_NONE) {
@@ -1313,6 +1655,10 @@ static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
           &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
           &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
       cm->rst_internal.rsi = &rsi[0];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+      cm->rst_internal.component = AOM_PLANE_Y;
+      cm->rst_internal.subsampling_y = 0;
+#endif
       restore_func =
           restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
 #if CONFIG_HIGHBITDEPTH
@@ -1340,6 +1686,10 @@ static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
           &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
           &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
       cm->rst_internal.rsi = &rsi[AOM_PLANE_U];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+      cm->rst_internal.component = AOM_PLANE_U;
+      cm->rst_internal.subsampling_y = cm->subsampling_y;
+#endif
       restore_func =
           restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
 #if CONFIG_HIGHBITDEPTH
@@ -1367,6 +1717,10 @@ static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
           &cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
           &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
       cm->rst_internal.rsi = &rsi[AOM_PLANE_V];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+      cm->rst_internal.component = AOM_PLANE_V;
+      cm->rst_internal.subsampling_y = cm->subsampling_y;
+#endif
       restore_func =
           restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
 #if CONFIG_HIGHBITDEPTH
@@ -1416,3 +1770,160 @@ void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
   loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, components_pattern,
                         rsi, dst);
 }
+
+int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
+                                       int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                       int *rcol0, int *rcol1, int *rrow0,
+                                       int *rrow1, int *nhtiles) {
+  assert(rcol0 && rcol1 && rrow0 && rrow1 && nhtiles);
+
+  if (bsize != cm->sb_size) return 0;
+
+#if CONFIG_FRAME_SUPERRES
+  const int frame_w = cm->superres_upscaled_width;
+  const int frame_h = cm->superres_upscaled_height;
+  const int mi_to_px = MI_SIZE * SCALE_NUMERATOR;
+  const int denom = cm->superres_scale_denominator;
+#else
+  const int frame_w = cm->width;
+  const int frame_h = cm->height;
+  const int mi_to_px = MI_SIZE;
+  const int denom = 1;
+#endif  // CONFIG_FRAME_SUPERRES
+
+  const int ss_x = plane > 0 && cm->subsampling_x != 0;
+  const int ss_y = plane > 0 && cm->subsampling_y != 0;
+
+  const int ss_frame_w = (frame_w + ss_x) >> ss_x;
+  const int ss_frame_h = (frame_h + ss_y) >> ss_y;
+
+  int rtile_w, rtile_h, nvtiles;
+  av1_get_rest_ntiles(ss_frame_w, ss_frame_h,
+                      cm->rst_info[plane].restoration_tilesize, &rtile_w,
+                      &rtile_h, nhtiles, &nvtiles);
+
+  const int rnd_w = rtile_w * denom - 1;
+  const int rnd_h = rtile_h * denom - 1;
+
+  // rcol0/rrow0 should be the first column/row of rtiles that doesn't start
+  // left/below of mi_col/mi_row. For this calculation, we need to round up the
+  // division (if the sb starts at rtile column 10.1, the first matching rtile
+  // has column index 11)
+  *rcol0 = (mi_col * mi_to_px + rnd_w) / (rtile_w * denom);
+  *rrow0 = (mi_row * mi_to_px + rnd_h) / (rtile_h * denom);
+
+  // rcol1/rrow1 is the equivalent calculation, but for the superblock
+  // below-right. There are some slightly strange boundary effects. First, we
+  // need to clamp to nhtiles/nvtiles for the case where it appears there are,
+  // say, 2.4 restoration tiles horizontally. There we need a maximum mi_row1
+  // of 2 because tile 1 gets extended.
+  //
+  // Second, if mi_col1 >= cm->mi_cols then we must manually set *rcol1 to
+  // nhtiles. This is needed whenever the frame's width rounded up to the next
+  // toplevel superblock is smaller than nhtiles * rtile_w. The same logic is
+  // needed for rows.
+  const int mi_row1 = mi_row + mi_size_high[bsize];
+  const int mi_col1 = mi_col + mi_size_wide[bsize];
+
+  if (mi_col1 >= cm->mi_cols)
+    *rcol1 = *nhtiles;
+  else
+    *rcol1 = AOMMIN(*nhtiles, (mi_col1 * mi_to_px + rnd_w) / (rtile_w * denom));
+
+  if (mi_row1 >= cm->mi_rows)
+    *rrow1 = nvtiles;
+  else
+    *rrow1 = AOMMIN(nvtiles, (mi_row1 * mi_to_px + rnd_h) / (rtile_h * denom));
+
+  return *rcol0 < *rcol1 && *rrow0 < *rrow1;
+}
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+
+// Extend to left and right
+static void extend_line(uint8_t *buf, int width, int extend,
+                        int use_highbitdepth) {
+  int i;
+  if (use_highbitdepth) {
+    uint16_t val, *buf16 = (uint16_t *)buf;
+    val = buf16[0];
+    for (i = 0; i < extend; i++) buf16[-1 - i] = val;
+    val = buf16[width - 1];
+    for (i = 0; i < extend; i++) buf16[width + i] = val;
+  } else {
+    uint8_t val;
+    val = buf[0];
+    for (i = 0; i < extend; i++) buf[-1 - i] = val;
+    val = buf[width - 1];
+    for (i = 0; i < extend; i++) buf[width + i] = val;
+  }
+}
+
+// For each 64 pixel high stripe, save 4 scan lines to be used as boundary in
+// the loop restoration process. The lines are saved in
+// rst_internal.stripe_boundary_lines
+void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+                                              AV1_COMMON *cm) {
+  int p, boundary_stride;
+  int src_width, src_height, src_stride, stripe_height, stripe_offset, stripe_y,
+      yy;
+  uint8_t *src_buf, *boundary_below_buf, *boundary_above_buf;
+  int use_highbitdepth = 0;
+  for (p = 0; p < MAX_MB_PLANE; ++p) {
+    if (p == 0) {
+      src_buf = frame->y_buffer;
+      src_width = frame->y_crop_width;
+      src_height = frame->y_crop_height;
+      src_stride = frame->y_stride;
+      stripe_height = 64;
+      stripe_offset = 56 - 2;  // offset of first line to copy
+    } else {
+      src_buf = p == 1 ? frame->u_buffer : frame->v_buffer;
+      src_width = frame->uv_crop_width;
+      src_height = frame->uv_crop_height;
+      src_stride = frame->uv_stride;
+      stripe_height = 64 >> cm->subsampling_y;
+      stripe_offset = (56 >> cm->subsampling_y) - 2;
+    }
+    boundary_above_buf = cm->rst_internal.stripe_boundary_above[p];
+    boundary_below_buf = cm->rst_internal.stripe_boundary_below[p];
+    boundary_stride = cm->rst_internal.stripe_boundary_stride[p];
+#if CONFIG_HIGHBITDEPTH
+    use_highbitdepth = cm->use_highbitdepth;
+    if (use_highbitdepth) {
+      src_buf = (uint8_t *)CONVERT_TO_SHORTPTR(src_buf);
+    }
+#endif
+    src_buf += (stripe_offset * src_stride) << use_highbitdepth;
+    boundary_above_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
+    boundary_below_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
+    // Loop over stripes
+    for (stripe_y = stripe_offset; stripe_y < src_height;
+         stripe_y += stripe_height) {
+      // Save 2 lines above the LR stripe (offset -9, -10)
+      for (yy = 0; yy < 2; yy++) {
+        if (stripe_y + yy < src_height) {
+          memcpy(boundary_above_buf, src_buf, src_width << use_highbitdepth);
+          extend_line(boundary_above_buf, src_width, RESTORATION_EXTRA_HORZ,
+                      use_highbitdepth);
+          src_buf += src_stride << use_highbitdepth;
+          boundary_above_buf += boundary_stride << use_highbitdepth;
+        }
+      }
+      // Save 2 lines below the LR stripe (offset 56,57)
+      for (yy = 2; yy < 4; yy++) {
+        if (stripe_y + yy < src_height) {
+          memcpy(boundary_below_buf, src_buf, src_width << use_highbitdepth);
+          extend_line(boundary_below_buf, src_width, RESTORATION_EXTRA_HORZ,
+                      use_highbitdepth);
+          src_buf += src_stride << use_highbitdepth;
+          boundary_below_buf += boundary_stride << use_highbitdepth;
+        }
+      }
+      // jump to next stripe
+      src_buf += ((stripe_height - 4) * src_stride) << use_highbitdepth;
+    }
+  }
+}
+
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
diff --git a/third_party/aom/av1/common/restoration.h b/third_party/aom/av1/common/restoration.h
index 477f20a42..23a53879e 100644
--- a/third_party/aom/av1/common/restoration.h
+++ b/third_party/aom/av1/common/restoration.h
@@ -24,18 +24,77 @@ extern "C" {
 #define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))
 #define RINT(x) ((x) < 0 ? (int)((x)-0.5) : (int)((x) + 0.5))
 
+#define RESTORATION_PROC_UNIT_SIZE 64
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+// Filter tile grid offset upwards compared to the superblock grid
+#define RESTORATION_TILE_OFFSET 8
+#endif
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define SGRPROJ_BORDER_VERT 2  // Vertical border used for Sgr
+#else
+#define SGRPROJ_BORDER_VERT 1  // Vertical border used for Sgr
+#endif
+#define SGRPROJ_BORDER_HORZ 2  // Horizontal border used for Sgr
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define WIENER_BORDER_VERT 2  // Vertical border used for Wiener
+#else
+#define WIENER_BORDER_VERT 1  // Vertical border used for Wiener
+#endif
+#define WIENER_HALFWIN 3
+#define WIENER_BORDER_HORZ (WIENER_HALFWIN)  // Horizontal border for Wiener
+
+// RESTORATION_BORDER_VERT determines line buffer requirement for LR.
+// Should be set at the max of SGRPROJ_BORDER_VERT and WIENER_BORDER_VERT.
+// Note the line buffer needed is twice the value of this macro.
+#if SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
+#define RESTORATION_BORDER_VERT (SGRPROJ_BORDER_VERT)
+#else
+#define RESTORATION_BORDER_VERT (WIENER_BORDER_VERT)
+#endif  // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
+
+#if SGRPROJ_BORDER_HORZ >= WIENER_BORDER_HORZ
+#define RESTORATION_BORDER_HORZ (SGRPROJ_BORDER_HORZ)
+#else
+#define RESTORATION_BORDER_HORZ (WIENER_BORDER_HORZ)
+#endif  // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+// Additional pixels to the left and right in above/below buffers
+// It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment
+#define RESTORATION_EXTRA_HORZ 4
+#endif
+
+// Pad up to 20 more (may be much less is needed)
+#define RESTORATION_PADDING 20
+#define RESTORATION_PROC_UNIT_PELS                             \
+  ((RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_HORZ * 2 + \
+    RESTORATION_PADDING) *                                     \
+   (RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \
+    RESTORATION_PADDING))
+
 #define RESTORATION_TILESIZE_MAX 256
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define RESTORATION_TILEPELS_HORZ_MAX \
+  (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
+#define RESTORATION_TILEPELS_VERT_MAX                                \
+  ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \
+    RESTORATION_TILE_OFFSET))
 #define RESTORATION_TILEPELS_MAX \
-  (RESTORATION_TILESIZE_MAX * RESTORATION_TILESIZE_MAX * 9 / 4)
-
-// 4 32-bit buffers needed for the filter:
-// 2 for the restored versions of the frame and
-// 2 for each restoration operation
-#define SGRPROJ_OUTBUF_SIZE \
-  ((RESTORATION_TILESIZE_MAX * 3 / 2) * (RESTORATION_TILESIZE_MAX * 3 / 2 + 16))
-#define SGRPROJ_TMPBUF_SIZE                         \
-  (RESTORATION_TILEPELS_MAX * 2 * sizeof(int32_t) + \
-   SGRPROJ_OUTBUF_SIZE * 2 * sizeof(int32_t))
+  (RESTORATION_TILEPELS_HORZ_MAX * RESTORATION_TILEPELS_VERT_MAX)
+#else
+#define RESTORATION_TILEPELS_MAX                                           \
+  ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) * \
+   (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT))
+#endif
+
+// Two 32-bit buffers needed for the restored versions from two filters
+// TODO(debargha, rupert): Refactor to not need the large tilesize to be stored
+// on the decoder side.
+#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 2 * sizeof(int32_t))
+
 #define SGRPROJ_EXTBUF_SIZE (0)
 #define SGRPROJ_PARAMS_BITS 4
 #define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
@@ -65,19 +124,22 @@ extern "C" {
 
 #define SGRPROJ_BITS (SGRPROJ_PRJ_BITS * 2 + SGRPROJ_PARAMS_BITS)
 
-#define MAX_RADIUS 3  // Only 1, 2, 3 allowed
+#define MAX_RADIUS 2  // Only 1, 2, 3 allowed
 #define MAX_EPS 80    // Max value of eps
 #define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1))
 #define SGRPROJ_MTABLE_BITS 20
 #define SGRPROJ_RECIP_BITS 12
 
-#define WIENER_HALFWIN 3
 #define WIENER_HALFWIN1 (WIENER_HALFWIN + 1)
 #define WIENER_WIN (2 * WIENER_HALFWIN + 1)
 #define WIENER_WIN2 ((WIENER_WIN) * (WIENER_WIN))
 #define WIENER_TMPBUF_SIZE (0)
 #define WIENER_EXTBUF_SIZE (0)
 
+// If WIENER_WIN_CHROMA == WIENER_WIN - 2, that implies 5x5 filters are used for
+// chroma. To use 7x7 for chroma set WIENER_WIN_CHROMA to WIENER_WIN.
+#define WIENER_WIN_CHROMA (WIENER_WIN - 2)
+
 #define WIENER_FILT_PREC_BITS 7
 #define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS)
 
@@ -131,10 +193,6 @@ extern "C" {
 #if WIENER_FILT_PREC_BITS != 7
 #error "Wiener filter currently only works if WIENER_FILT_PREC_BITS == 7"
 #endif
-typedef struct {
-  DECLARE_ALIGNED(16, InterpKernel, vfilter);
-  DECLARE_ALIGNED(16, InterpKernel, hfilter);
-} WienerInfo;
 
 typedef struct {
 #if USE_HIGHPASS_IN_SGRPROJ
@@ -149,12 +207,8 @@ typedef struct {
 } sgr_params_type;
 
 typedef struct {
-  int ep;
-  int xqd[2];
-} SgrprojInfo;
-
-typedef struct {
   int restoration_tilesize;
+  int procunit_width, procunit_height;
   RestorationType frame_restoration_type;
   RestorationType *restoration_type;
   // Wiener filter
@@ -170,6 +224,20 @@ typedef struct {
   int tile_width, tile_height;
   int nhtiles, nvtiles;
   int32_t *tmpbuf;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  int component;
+  int subsampling_y;
+  uint8_t *stripe_boundary_above[MAX_MB_PLANE];
+  uint8_t *stripe_boundary_below[MAX_MB_PLANE];
+  int stripe_boundary_stride[MAX_MB_PLANE];
+  // Temporary buffers to save/restore 2 lines above/below the restoration
+  // stripe
+  // Allow for filter margin to left and right
+  uint16_t
+      tmp_save_above[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
+  uint16_t
+      tmp_save_below[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
+#endif
 } RestorationInternal;
 
 static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) {
@@ -196,6 +264,8 @@ static INLINE int av1_get_rest_ntiles(int width, int height, int tilesize,
   int tile_width_, tile_height_;
   tile_width_ = (tilesize < 0) ? width : AOMMIN(tilesize, width);
   tile_height_ = (tilesize < 0) ? height : AOMMIN(tilesize, height);
+  assert(tile_width_ > 0 && tile_height_ > 0);
+
   nhtiles_ = (width + (tile_width_ >> 1)) / tile_width_;
   nvtiles_ = (height + (tile_height_ >> 1)) / tile_height_;
   if (tile_width) *tile_width = tile_width_;
@@ -205,37 +275,33 @@ static INLINE int av1_get_rest_ntiles(int width, int height, int tilesize,
   return (nhtiles_ * nvtiles_);
 }
 
-static INLINE void av1_get_rest_tile_limits(
-    int tile_idx, int subtile_idx, int subtile_bits, int nhtiles, int nvtiles,
-    int tile_width, int tile_height, int im_width, int im_height, int clamp_h,
-    int clamp_v, int *h_start, int *h_end, int *v_start, int *v_end) {
+typedef struct { int h_start, h_end, v_start, v_end; } RestorationTileLimits;
+
+static INLINE RestorationTileLimits
+av1_get_rest_tile_limits(int tile_idx, int nhtiles, int nvtiles, int tile_width,
+                         int tile_height, int im_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                         int im_height, int subsampling_y) {
+#else
+                         int im_height) {
+#endif
   const int htile_idx = tile_idx % nhtiles;
   const int vtile_idx = tile_idx / nhtiles;
-  *h_start = htile_idx * tile_width;
-  *v_start = vtile_idx * tile_height;
-  *h_end = (htile_idx < nhtiles - 1) ? *h_start + tile_width : im_width;
-  *v_end = (vtile_idx < nvtiles - 1) ? *v_start + tile_height : im_height;
-  if (subtile_bits) {
-    const int num_subtiles_1d = (1 << subtile_bits);
-    const int subtile_width = (*h_end - *h_start) >> subtile_bits;
-    const int subtile_height = (*v_end - *v_start) >> subtile_bits;
-    const int subtile_idx_h = subtile_idx & (num_subtiles_1d - 1);
-    const int subtile_idx_v = subtile_idx >> subtile_bits;
-    *h_start += subtile_idx_h * subtile_width;
-    *v_start += subtile_idx_v * subtile_height;
-    *h_end = subtile_idx_h == num_subtiles_1d - 1 ? *h_end
-                                                  : *h_start + subtile_width;
-    *v_end = subtile_idx_v == num_subtiles_1d - 1 ? *v_end
-                                                  : *v_start + subtile_height;
-  }
-  if (clamp_h) {
-    *h_start = AOMMAX(*h_start, clamp_h);
-    *h_end = AOMMIN(*h_end, im_width - clamp_h);
-  }
-  if (clamp_v) {
-    *v_start = AOMMAX(*v_start, clamp_v);
-    *v_end = AOMMIN(*v_end, im_height - clamp_v);
-  }
+  RestorationTileLimits limits;
+  limits.h_start = htile_idx * tile_width;
+  limits.v_start = vtile_idx * tile_height;
+  limits.h_end =
+      (htile_idx < nhtiles - 1) ? limits.h_start + tile_width : im_width;
+  limits.v_end =
+      (vtile_idx < nvtiles - 1) ? limits.v_start + tile_height : im_height;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  // Offset the tile upwards to align with the restoration processing stripe
+  limits.v_start -= RESTORATION_TILE_OFFSET >> subsampling_y;
+  if (limits.v_start < 0) limits.v_start = 0;
+  if (limits.v_end < im_height)
+    limits.v_end -= RESTORATION_TILE_OFFSET >> subsampling_y;
+#endif
+  return limits;
 }
 
 extern const sgr_params_type sgr_params[SGRPROJ_PARAMS];
@@ -248,15 +314,34 @@ int av1_alloc_restoration_struct(struct AV1Common *cm,
                                  int height);
 void av1_free_restoration_struct(RestorationInfo *rst_info);
 
-void extend_frame(uint8_t *data, int width, int height, int stride);
+void extend_frame(uint8_t *data, int width, int height, int stride,
+                  int border_horz, int border_vert);
 #if CONFIG_HIGHBITDEPTH
-void extend_frame_highbd(uint16_t *data, int width, int height, int stride);
+void extend_frame_highbd(uint16_t *data, int width, int height, int stride,
+                         int border_horz, int border_vert);
 #endif  // CONFIG_HIGHBITDEPTH
 void decode_xq(int *xqd, int *xq);
 void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                                 RestorationInfo *rsi, int components_pattern,
                                 int partial_frame, YV12_BUFFER_CONFIG *dst);
 void av1_loop_restoration_precal();
+
+// Return 1 iff the block at mi_row, mi_col with size bsize is a
+// top-level superblock containing the top-left corner of at least one
+// loop restoration tile.
+//
+// If the block is a top-level superblock, the function writes to
+// *rcol0, *rcol1, *rrow0, *rrow1. The rectangle of indices given by
+// [*rcol0, *rcol1) x [*rrow0, *rrow1) will point at the set of rtiles
+// whose top left corners lie in the superblock. Note that the set is
+// only nonempty if *rcol0 < *rcol1 and *rrow0 < *rrow1.
+int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
+                                       int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                       int *rcol0, int *rcol1, int *rrow0,
+                                       int *rrow1, int *nhtiles);
+
+void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+                                              struct AV1Common *cm);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/scale.h b/third_party/aom/av1/common/scale.h
index 3aa61eb34..900e6bf47 100644
--- a/third_party/aom/av1/common/scale.h
+++ b/third_party/aom/av1/common/scale.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define SCALE_DENOMINATOR 16
+#define SCALE_NUMERATOR 8
 
 #define REF_SCALE_SHIFT 14
 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
diff --git a/third_party/aom/av1/common/scan.c b/third_party/aom/av1/common/scan.c
index 7bdeb6239..3c8f3d7ac 100644
--- a/third_party/aom/av1/common/scan.c
+++ b/third_party/aom/av1/common/scan.c
@@ -1171,6 +1171,328 @@ DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = {
 };
 
 #if CONFIG_TX64X64
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x64[2048]) = {
+  0,    1,    32,   2,    33,   64,   3,    34,   65,   96,   4,    35,   66,
+  97,   128,  5,    36,   67,   98,   129,  160,  6,    37,   68,   99,   130,
+  161,  192,  7,    38,   69,   100,  131,  162,  193,  224,  8,    39,   70,
+  101,  132,  163,  194,  225,  256,  9,    40,   71,   102,  133,  164,  195,
+  226,  257,  288,  10,   41,   72,   103,  134,  165,  196,  227,  258,  289,
+  320,  11,   42,   73,   104,  135,  166,  197,  228,  259,  290,  321,  352,
+  12,   43,   74,   105,  136,  167,  198,  229,  260,  291,  322,  353,  384,
+  13,   44,   75,   106,  137,  168,  199,  230,  261,  292,  323,  354,  385,
+  416,  14,   45,   76,   107,  138,  169,  200,  231,  262,  293,  324,  355,
+  386,  417,  448,  15,   46,   77,   108,  139,  170,  201,  232,  263,  294,
+  325,  356,  387,  418,  449,  480,  16,   47,   78,   109,  140,  171,  202,
+  233,  264,  295,  326,  357,  388,  419,  450,  481,  512,  17,   48,   79,
+  110,  141,  172,  203,  234,  265,  296,  327,  358,  389,  420,  451,  482,
+  513,  544,  18,   49,   80,   111,  142,  173,  204,  235,  266,  297,  328,
+  359,  390,  421,  452,  483,  514,  545,  576,  19,   50,   81,   112,  143,
+  174,  205,  236,  267,  298,  329,  360,  391,  422,  453,  484,  515,  546,
+  577,  608,  20,   51,   82,   113,  144,  175,  206,  237,  268,  299,  330,
+  361,  392,  423,  454,  485,  516,  547,  578,  609,  640,  21,   52,   83,
+  114,  145,  176,  207,  238,  269,  300,  331,  362,  393,  424,  455,  486,
+  517,  548,  579,  610,  641,  672,  22,   53,   84,   115,  146,  177,  208,
+  239,  270,  301,  332,  363,  394,  425,  456,  487,  518,  549,  580,  611,
+  642,  673,  704,  23,   54,   85,   116,  147,  178,  209,  240,  271,  302,
+  333,  364,  395,  426,  457,  488,  519,  550,  581,  612,  643,  674,  705,
+  736,  24,   55,   86,   117,  148,  179,  210,  241,  272,  303,  334,  365,
+  396,  427,  458,  489,  520,  551,  582,  613,  644,  675,  706,  737,  768,
+  25,   56,   87,   118,  149,  180,  211,  242,  273,  304,  335,  366,  397,
+  428,  459,  490,  521,  552,  583,  614,  645,  676,  707,  738,  769,  800,
+  26,   57,   88,   119,  150,  181,  212,  243,  274,  305,  336,  367,  398,
+  429,  460,  491,  522,  553,  584,  615,  646,  677,  708,  739,  770,  801,
+  832,  27,   58,   89,   120,  151,  182,  213,  244,  275,  306,  337,  368,
+  399,  430,  461,  492,  523,  554,  585,  616,  647,  678,  709,  740,  771,
+  802,  833,  864,  28,   59,   90,   121,  152,  183,  214,  245,  276,  307,
+  338,  369,  400,  431,  462,  493,  524,  555,  586,  617,  648,  679,  710,
+  741,  772,  803,  834,  865,  896,  29,   60,   91,   122,  153,  184,  215,
+  246,  277,  308,  339,  370,  401,  432,  463,  494,  525,  556,  587,  618,
+  649,  680,  711,  742,  773,  804,  835,  866,  897,  928,  30,   61,   92,
+  123,  154,  185,  216,  247,  278,  309,  340,  371,  402,  433,  464,  495,
+  526,  557,  588,  619,  650,  681,  712,  743,  774,  805,  836,  867,  898,
+  929,  960,  31,   62,   93,   124,  155,  186,  217,  248,  279,  310,  341,
+  372,  403,  434,  465,  496,  527,  558,  589,  620,  651,  682,  713,  744,
+  775,  806,  837,  868,  899,  930,  961,  992,  63,   94,   125,  156,  187,
+  218,  249,  280,  311,  342,  373,  404,  435,  466,  497,  528,  559,  590,
+  621,  652,  683,  714,  745,  776,  807,  838,  869,  900,  931,  962,  993,
+  1024, 95,   126,  157,  188,  219,  250,  281,  312,  343,  374,  405,  436,
+  467,  498,  529,  560,  591,  622,  653,  684,  715,  746,  777,  808,  839,
+  870,  901,  932,  963,  994,  1025, 1056, 127,  158,  189,  220,  251,  282,
+  313,  344,  375,  406,  437,  468,  499,  530,  561,  592,  623,  654,  685,
+  716,  747,  778,  809,  840,  871,  902,  933,  964,  995,  1026, 1057, 1088,
+  159,  190,  221,  252,  283,  314,  345,  376,  407,  438,  469,  500,  531,
+  562,  593,  624,  655,  686,  717,  748,  779,  810,  841,  872,  903,  934,
+  965,  996,  1027, 1058, 1089, 1120, 191,  222,  253,  284,  315,  346,  377,
+  408,  439,  470,  501,  532,  563,  594,  625,  656,  687,  718,  749,  780,
+  811,  842,  873,  904,  935,  966,  997,  1028, 1059, 1090, 1121, 1152, 223,
+  254,  285,  316,  347,  378,  409,  440,  471,  502,  533,  564,  595,  626,
+  657,  688,  719,  750,  781,  812,  843,  874,  905,  936,  967,  998,  1029,
+  1060, 1091, 1122, 1153, 1184, 255,  286,  317,  348,  379,  410,  441,  472,
+  503,  534,  565,  596,  627,  658,  689,  720,  751,  782,  813,  844,  875,
+  906,  937,  968,  999,  1030, 1061, 1092, 1123, 1154, 1185, 1216, 287,  318,
+  349,  380,  411,  442,  473,  504,  535,  566,  597,  628,  659,  690,  721,
+  752,  783,  814,  845,  876,  907,  938,  969,  1000, 1031, 1062, 1093, 1124,
+  1155, 1186, 1217, 1248, 319,  350,  381,  412,  443,  474,  505,  536,  567,
+  598,  629,  660,  691,  722,  753,  784,  815,  846,  877,  908,  939,  970,
+  1001, 1032, 1063, 1094, 1125, 1156, 1187, 1218, 1249, 1280, 351,  382,  413,
+  444,  475,  506,  537,  568,  599,  630,  661,  692,  723,  754,  785,  816,
+  847,  878,  909,  940,  971,  1002, 1033, 1064, 1095, 1126, 1157, 1188, 1219,
+  1250, 1281, 1312, 383,  414,  445,  476,  507,  538,  569,  600,  631,  662,
+  693,  724,  755,  786,  817,  848,  879,  910,  941,  972,  1003, 1034, 1065,
+  1096, 1127, 1158, 1189, 1220, 1251, 1282, 1313, 1344, 415,  446,  477,  508,
+  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,  911,
+  942,  973,  1004, 1035, 1066, 1097, 1128, 1159, 1190, 1221, 1252, 1283, 1314,
+  1345, 1376, 447,  478,  509,  540,  571,  602,  633,  664,  695,  726,  757,
+  788,  819,  850,  881,  912,  943,  974,  1005, 1036, 1067, 1098, 1129, 1160,
+  1191, 1222, 1253, 1284, 1315, 1346, 1377, 1408, 479,  510,  541,  572,  603,
+  634,  665,  696,  727,  758,  789,  820,  851,  882,  913,  944,  975,  1006,
+  1037, 1068, 1099, 1130, 1161, 1192, 1223, 1254, 1285, 1316, 1347, 1378, 1409,
+  1440, 511,  542,  573,  604,  635,  666,  697,  728,  759,  790,  821,  852,
+  883,  914,  945,  976,  1007, 1038, 1069, 1100, 1131, 1162, 1193, 1224, 1255,
+  1286, 1317, 1348, 1379, 1410, 1441, 1472, 543,  574,  605,  636,  667,  698,
+  729,  760,  791,  822,  853,  884,  915,  946,  977,  1008, 1039, 1070, 1101,
+  1132, 1163, 1194, 1225, 1256, 1287, 1318, 1349, 1380, 1411, 1442, 1473, 1504,
+  575,  606,  637,  668,  699,  730,  761,  792,  823,  854,  885,  916,  947,
+  978,  1009, 1040, 1071, 1102, 1133, 1164, 1195, 1226, 1257, 1288, 1319, 1350,
+  1381, 1412, 1443, 1474, 1505, 1536, 607,  638,  669,  700,  731,  762,  793,
+  824,  855,  886,  917,  948,  979,  1010, 1041, 1072, 1103, 1134, 1165, 1196,
+  1227, 1258, 1289, 1320, 1351, 1382, 1413, 1444, 1475, 1506, 1537, 1568, 639,
+  670,  701,  732,  763,  794,  825,  856,  887,  918,  949,  980,  1011, 1042,
+  1073, 1104, 1135, 1166, 1197, 1228, 1259, 1290, 1321, 1352, 1383, 1414, 1445,
+  1476, 1507, 1538, 1569, 1600, 671,  702,  733,  764,  795,  826,  857,  888,
+  919,  950,  981,  1012, 1043, 1074, 1105, 1136, 1167, 1198, 1229, 1260, 1291,
+  1322, 1353, 1384, 1415, 1446, 1477, 1508, 1539, 1570, 1601, 1632, 703,  734,
+  765,  796,  827,  858,  889,  920,  951,  982,  1013, 1044, 1075, 1106, 1137,
+  1168, 1199, 1230, 1261, 1292, 1323, 1354, 1385, 1416, 1447, 1478, 1509, 1540,
+  1571, 1602, 1633, 1664, 735,  766,  797,  828,  859,  890,  921,  952,  983,
+  1014, 1045, 1076, 1107, 1138, 1169, 1200, 1231, 1262, 1293, 1324, 1355, 1386,
+  1417, 1448, 1479, 1510, 1541, 1572, 1603, 1634, 1665, 1696, 767,  798,  829,
+  860,  891,  922,  953,  984,  1015, 1046, 1077, 1108, 1139, 1170, 1201, 1232,
+  1263, 1294, 1325, 1356, 1387, 1418, 1449, 1480, 1511, 1542, 1573, 1604, 1635,
+  1666, 1697, 1728, 799,  830,  861,  892,  923,  954,  985,  1016, 1047, 1078,
+  1109, 1140, 1171, 1202, 1233, 1264, 1295, 1326, 1357, 1388, 1419, 1450, 1481,
+  1512, 1543, 1574, 1605, 1636, 1667, 1698, 1729, 1760, 831,  862,  893,  924,
+  955,  986,  1017, 1048, 1079, 1110, 1141, 1172, 1203, 1234, 1265, 1296, 1327,
+  1358, 1389, 1420, 1451, 1482, 1513, 1544, 1575, 1606, 1637, 1668, 1699, 1730,
+  1761, 1792, 863,  894,  925,  956,  987,  1018, 1049, 1080, 1111, 1142, 1173,
+  1204, 1235, 1266, 1297, 1328, 1359, 1390, 1421, 1452, 1483, 1514, 1545, 1576,
+  1607, 1638, 1669, 1700, 1731, 1762, 1793, 1824, 895,  926,  957,  988,  1019,
+  1050, 1081, 1112, 1143, 1174, 1205, 1236, 1267, 1298, 1329, 1360, 1391, 1422,
+  1453, 1484, 1515, 1546, 1577, 1608, 1639, 1670, 1701, 1732, 1763, 1794, 1825,
+  1856, 927,  958,  989,  1020, 1051, 1082, 1113, 1144, 1175, 1206, 1237, 1268,
+  1299, 1330, 1361, 1392, 1423, 1454, 1485, 1516, 1547, 1578, 1609, 1640, 1671,
+  1702, 1733, 1764, 1795, 1826, 1857, 1888, 959,  990,  1021, 1052, 1083, 1114,
+  1145, 1176, 1207, 1238, 1269, 1300, 1331, 1362, 1393, 1424, 1455, 1486, 1517,
+  1548, 1579, 1610, 1641, 1672, 1703, 1734, 1765, 1796, 1827, 1858, 1889, 1920,
+  991,  1022, 1053, 1084, 1115, 1146, 1177, 1208, 1239, 1270, 1301, 1332, 1363,
+  1394, 1425, 1456, 1487, 1518, 1549, 1580, 1611, 1642, 1673, 1704, 1735, 1766,
+  1797, 1828, 1859, 1890, 1921, 1952, 1023, 1054, 1085, 1116, 1147, 1178, 1209,
+  1240, 1271, 1302, 1333, 1364, 1395, 1426, 1457, 1488, 1519, 1550, 1581, 1612,
+  1643, 1674, 1705, 1736, 1767, 1798, 1829, 1860, 1891, 1922, 1953, 1984, 1055,
+  1086, 1117, 1148, 1179, 1210, 1241, 1272, 1303, 1334, 1365, 1396, 1427, 1458,
+  1489, 1520, 1551, 1582, 1613, 1644, 1675, 1706, 1737, 1768, 1799, 1830, 1861,
+  1892, 1923, 1954, 1985, 2016, 1087, 1118, 1149, 1180, 1211, 1242, 1273, 1304,
+  1335, 1366, 1397, 1428, 1459, 1490, 1521, 1552, 1583, 1614, 1645, 1676, 1707,
+  1738, 1769, 1800, 1831, 1862, 1893, 1924, 1955, 1986, 2017, 1119, 1150, 1181,
+  1212, 1243, 1274, 1305, 1336, 1367, 1398, 1429, 1460, 1491, 1522, 1553, 1584,
+  1615, 1646, 1677, 1708, 1739, 1770, 1801, 1832, 1863, 1894, 1925, 1956, 1987,
+  2018, 1151, 1182, 1213, 1244, 1275, 1306, 1337, 1368, 1399, 1430, 1461, 1492,
+  1523, 1554, 1585, 1616, 1647, 1678, 1709, 1740, 1771, 1802, 1833, 1864, 1895,
+  1926, 1957, 1988, 2019, 1183, 1214, 1245, 1276, 1307, 1338, 1369, 1400, 1431,
+  1462, 1493, 1524, 1555, 1586, 1617, 1648, 1679, 1710, 1741, 1772, 1803, 1834,
+  1865, 1896, 1927, 1958, 1989, 2020, 1215, 1246, 1277, 1308, 1339, 1370, 1401,
+  1432, 1463, 1494, 1525, 1556, 1587, 1618, 1649, 1680, 1711, 1742, 1773, 1804,
+  1835, 1866, 1897, 1928, 1959, 1990, 2021, 1247, 1278, 1309, 1340, 1371, 1402,
+  1433, 1464, 1495, 1526, 1557, 1588, 1619, 1650, 1681, 1712, 1743, 1774, 1805,
+  1836, 1867, 1898, 1929, 1960, 1991, 2022, 1279, 1310, 1341, 1372, 1403, 1434,
+  1465, 1496, 1527, 1558, 1589, 1620, 1651, 1682, 1713, 1744, 1775, 1806, 1837,
+  1868, 1899, 1930, 1961, 1992, 2023, 1311, 1342, 1373, 1404, 1435, 1466, 1497,
+  1528, 1559, 1590, 1621, 1652, 1683, 1714, 1745, 1776, 1807, 1838, 1869, 1900,
+  1931, 1962, 1993, 2024, 1343, 1374, 1405, 1436, 1467, 1498, 1529, 1560, 1591,
+  1622, 1653, 1684, 1715, 1746, 1777, 1808, 1839, 1870, 1901, 1932, 1963, 1994,
+  2025, 1375, 1406, 1437, 1468, 1499, 1530, 1561, 1592, 1623, 1654, 1685, 1716,
+  1747, 1778, 1809, 1840, 1871, 1902, 1933, 1964, 1995, 2026, 1407, 1438, 1469,
+  1500, 1531, 1562, 1593, 1624, 1655, 1686, 1717, 1748, 1779, 1810, 1841, 1872,
+  1903, 1934, 1965, 1996, 2027, 1439, 1470, 1501, 1532, 1563, 1594, 1625, 1656,
+  1687, 1718, 1749, 1780, 1811, 1842, 1873, 1904, 1935, 1966, 1997, 2028, 1471,
+  1502, 1533, 1564, 1595, 1626, 1657, 1688, 1719, 1750, 1781, 1812, 1843, 1874,
+  1905, 1936, 1967, 1998, 2029, 1503, 1534, 1565, 1596, 1627, 1658, 1689, 1720,
+  1751, 1782, 1813, 1844, 1875, 1906, 1937, 1968, 1999, 2030, 1535, 1566, 1597,
+  1628, 1659, 1690, 1721, 1752, 1783, 1814, 1845, 1876, 1907, 1938, 1969, 2000,
+  2031, 1567, 1598, 1629, 1660, 1691, 1722, 1753, 1784, 1815, 1846, 1877, 1908,
+  1939, 1970, 2001, 2032, 1599, 1630, 1661, 1692, 1723, 1754, 1785, 1816, 1847,
+  1878, 1909, 1940, 1971, 2002, 2033, 1631, 1662, 1693, 1724, 1755, 1786, 1817,
+  1848, 1879, 1910, 1941, 1972, 2003, 2034, 1663, 1694, 1725, 1756, 1787, 1818,
+  1849, 1880, 1911, 1942, 1973, 2004, 2035, 1695, 1726, 1757, 1788, 1819, 1850,
+  1881, 1912, 1943, 1974, 2005, 2036, 1727, 1758, 1789, 1820, 1851, 1882, 1913,
+  1944, 1975, 2006, 2037, 1759, 1790, 1821, 1852, 1883, 1914, 1945, 1976, 2007,
+  2038, 1791, 1822, 1853, 1884, 1915, 1946, 1977, 2008, 2039, 1823, 1854, 1885,
+  1916, 1947, 1978, 2009, 2040, 1855, 1886, 1917, 1948, 1979, 2010, 2041, 1887,
+  1918, 1949, 1980, 2011, 2042, 1919, 1950, 1981, 2012, 2043, 1951, 1982, 2013,
+  2044, 1983, 2014, 2045, 2015, 2046, 2047,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_64x32[2048]) = {
+  0,    1,    64,   2,    65,   128,  3,    66,   129,  192,  4,    67,   130,
+  193,  256,  5,    68,   131,  194,  257,  320,  6,    69,   132,  195,  258,
+  321,  384,  7,    70,   133,  196,  259,  322,  385,  448,  8,    71,   134,
+  197,  260,  323,  386,  449,  512,  9,    72,   135,  198,  261,  324,  387,
+  450,  513,  576,  10,   73,   136,  199,  262,  325,  388,  451,  514,  577,
+  640,  11,   74,   137,  200,  263,  326,  389,  452,  515,  578,  641,  704,
+  12,   75,   138,  201,  264,  327,  390,  453,  516,  579,  642,  705,  768,
+  13,   76,   139,  202,  265,  328,  391,  454,  517,  580,  643,  706,  769,
+  832,  14,   77,   140,  203,  266,  329,  392,  455,  518,  581,  644,  707,
+  770,  833,  896,  15,   78,   141,  204,  267,  330,  393,  456,  519,  582,
+  645,  708,  771,  834,  897,  960,  16,   79,   142,  205,  268,  331,  394,
+  457,  520,  583,  646,  709,  772,  835,  898,  961,  1024, 17,   80,   143,
+  206,  269,  332,  395,  458,  521,  584,  647,  710,  773,  836,  899,  962,
+  1025, 1088, 18,   81,   144,  207,  270,  333,  396,  459,  522,  585,  648,
+  711,  774,  837,  900,  963,  1026, 1089, 1152, 19,   82,   145,  208,  271,
+  334,  397,  460,  523,  586,  649,  712,  775,  838,  901,  964,  1027, 1090,
+  1153, 1216, 20,   83,   146,  209,  272,  335,  398,  461,  524,  587,  650,
+  713,  776,  839,  902,  965,  1028, 1091, 1154, 1217, 1280, 21,   84,   147,
+  210,  273,  336,  399,  462,  525,  588,  651,  714,  777,  840,  903,  966,
+  1029, 1092, 1155, 1218, 1281, 1344, 22,   85,   148,  211,  274,  337,  400,
+  463,  526,  589,  652,  715,  778,  841,  904,  967,  1030, 1093, 1156, 1219,
+  1282, 1345, 1408, 23,   86,   149,  212,  275,  338,  401,  464,  527,  590,
+  653,  716,  779,  842,  905,  968,  1031, 1094, 1157, 1220, 1283, 1346, 1409,
+  1472, 24,   87,   150,  213,  276,  339,  402,  465,  528,  591,  654,  717,
+  780,  843,  906,  969,  1032, 1095, 1158, 1221, 1284, 1347, 1410, 1473, 1536,
+  25,   88,   151,  214,  277,  340,  403,  466,  529,  592,  655,  718,  781,
+  844,  907,  970,  1033, 1096, 1159, 1222, 1285, 1348, 1411, 1474, 1537, 1600,
+  26,   89,   152,  215,  278,  341,  404,  467,  530,  593,  656,  719,  782,
+  845,  908,  971,  1034, 1097, 1160, 1223, 1286, 1349, 1412, 1475, 1538, 1601,
+  1664, 27,   90,   153,  216,  279,  342,  405,  468,  531,  594,  657,  720,
+  783,  846,  909,  972,  1035, 1098, 1161, 1224, 1287, 1350, 1413, 1476, 1539,
+  1602, 1665, 1728, 28,   91,   154,  217,  280,  343,  406,  469,  532,  595,
+  658,  721,  784,  847,  910,  973,  1036, 1099, 1162, 1225, 1288, 1351, 1414,
+  1477, 1540, 1603, 1666, 1729, 1792, 29,   92,   155,  218,  281,  344,  407,
+  470,  533,  596,  659,  722,  785,  848,  911,  974,  1037, 1100, 1163, 1226,
+  1289, 1352, 1415, 1478, 1541, 1604, 1667, 1730, 1793, 1856, 30,   93,   156,
+  219,  282,  345,  408,  471,  534,  597,  660,  723,  786,  849,  912,  975,
+  1038, 1101, 1164, 1227, 1290, 1353, 1416, 1479, 1542, 1605, 1668, 1731, 1794,
+  1857, 1920, 31,   94,   157,  220,  283,  346,  409,  472,  535,  598,  661,
+  724,  787,  850,  913,  976,  1039, 1102, 1165, 1228, 1291, 1354, 1417, 1480,
+  1543, 1606, 1669, 1732, 1795, 1858, 1921, 1984, 32,   95,   158,  221,  284,
+  347,  410,  473,  536,  599,  662,  725,  788,  851,  914,  977,  1040, 1103,
+  1166, 1229, 1292, 1355, 1418, 1481, 1544, 1607, 1670, 1733, 1796, 1859, 1922,
+  1985, 33,   96,   159,  222,  285,  348,  411,  474,  537,  600,  663,  726,
+  789,  852,  915,  978,  1041, 1104, 1167, 1230, 1293, 1356, 1419, 1482, 1545,
+  1608, 1671, 1734, 1797, 1860, 1923, 1986, 34,   97,   160,  223,  286,  349,
+  412,  475,  538,  601,  664,  727,  790,  853,  916,  979,  1042, 1105, 1168,
+  1231, 1294, 1357, 1420, 1483, 1546, 1609, 1672, 1735, 1798, 1861, 1924, 1987,
+  35,   98,   161,  224,  287,  350,  413,  476,  539,  602,  665,  728,  791,
+  854,  917,  980,  1043, 1106, 1169, 1232, 1295, 1358, 1421, 1484, 1547, 1610,
+  1673, 1736, 1799, 1862, 1925, 1988, 36,   99,   162,  225,  288,  351,  414,
+  477,  540,  603,  666,  729,  792,  855,  918,  981,  1044, 1107, 1170, 1233,
+  1296, 1359, 1422, 1485, 1548, 1611, 1674, 1737, 1800, 1863, 1926, 1989, 37,
+  100,  163,  226,  289,  352,  415,  478,  541,  604,  667,  730,  793,  856,
+  919,  982,  1045, 1108, 1171, 1234, 1297, 1360, 1423, 1486, 1549, 1612, 1675,
+  1738, 1801, 1864, 1927, 1990, 38,   101,  164,  227,  290,  353,  416,  479,
+  542,  605,  668,  731,  794,  857,  920,  983,  1046, 1109, 1172, 1235, 1298,
+  1361, 1424, 1487, 1550, 1613, 1676, 1739, 1802, 1865, 1928, 1991, 39,   102,
+  165,  228,  291,  354,  417,  480,  543,  606,  669,  732,  795,  858,  921,
+  984,  1047, 1110, 1173, 1236, 1299, 1362, 1425, 1488, 1551, 1614, 1677, 1740,
+  1803, 1866, 1929, 1992, 40,   103,  166,  229,  292,  355,  418,  481,  544,
+  607,  670,  733,  796,  859,  922,  985,  1048, 1111, 1174, 1237, 1300, 1363,
+  1426, 1489, 1552, 1615, 1678, 1741, 1804, 1867, 1930, 1993, 41,   104,  167,
+  230,  293,  356,  419,  482,  545,  608,  671,  734,  797,  860,  923,  986,
+  1049, 1112, 1175, 1238, 1301, 1364, 1427, 1490, 1553, 1616, 1679, 1742, 1805,
+  1868, 1931, 1994, 42,   105,  168,  231,  294,  357,  420,  483,  546,  609,
+  672,  735,  798,  861,  924,  987,  1050, 1113, 1176, 1239, 1302, 1365, 1428,
+  1491, 1554, 1617, 1680, 1743, 1806, 1869, 1932, 1995, 43,   106,  169,  232,
+  295,  358,  421,  484,  547,  610,  673,  736,  799,  862,  925,  988,  1051,
+  1114, 1177, 1240, 1303, 1366, 1429, 1492, 1555, 1618, 1681, 1744, 1807, 1870,
+  1933, 1996, 44,   107,  170,  233,  296,  359,  422,  485,  548,  611,  674,
+  737,  800,  863,  926,  989,  1052, 1115, 1178, 1241, 1304, 1367, 1430, 1493,
+  1556, 1619, 1682, 1745, 1808, 1871, 1934, 1997, 45,   108,  171,  234,  297,
+  360,  423,  486,  549,  612,  675,  738,  801,  864,  927,  990,  1053, 1116,
+  1179, 1242, 1305, 1368, 1431, 1494, 1557, 1620, 1683, 1746, 1809, 1872, 1935,
+  1998, 46,   109,  172,  235,  298,  361,  424,  487,  550,  613,  676,  739,
+  802,  865,  928,  991,  1054, 1117, 1180, 1243, 1306, 1369, 1432, 1495, 1558,
+  1621, 1684, 1747, 1810, 1873, 1936, 1999, 47,   110,  173,  236,  299,  362,
+  425,  488,  551,  614,  677,  740,  803,  866,  929,  992,  1055, 1118, 1181,
+  1244, 1307, 1370, 1433, 1496, 1559, 1622, 1685, 1748, 1811, 1874, 1937, 2000,
+  48,   111,  174,  237,  300,  363,  426,  489,  552,  615,  678,  741,  804,
+  867,  930,  993,  1056, 1119, 1182, 1245, 1308, 1371, 1434, 1497, 1560, 1623,
+  1686, 1749, 1812, 1875, 1938, 2001, 49,   112,  175,  238,  301,  364,  427,
+  490,  553,  616,  679,  742,  805,  868,  931,  994,  1057, 1120, 1183, 1246,
+  1309, 1372, 1435, 1498, 1561, 1624, 1687, 1750, 1813, 1876, 1939, 2002, 50,
+  113,  176,  239,  302,  365,  428,  491,  554,  617,  680,  743,  806,  869,
+  932,  995,  1058, 1121, 1184, 1247, 1310, 1373, 1436, 1499, 1562, 1625, 1688,
+  1751, 1814, 1877, 1940, 2003, 51,   114,  177,  240,  303,  366,  429,  492,
+  555,  618,  681,  744,  807,  870,  933,  996,  1059, 1122, 1185, 1248, 1311,
+  1374, 1437, 1500, 1563, 1626, 1689, 1752, 1815, 1878, 1941, 2004, 52,   115,
+  178,  241,  304,  367,  430,  493,  556,  619,  682,  745,  808,  871,  934,
+  997,  1060, 1123, 1186, 1249, 1312, 1375, 1438, 1501, 1564, 1627, 1690, 1753,
+  1816, 1879, 1942, 2005, 53,   116,  179,  242,  305,  368,  431,  494,  557,
+  620,  683,  746,  809,  872,  935,  998,  1061, 1124, 1187, 1250, 1313, 1376,
+  1439, 1502, 1565, 1628, 1691, 1754, 1817, 1880, 1943, 2006, 54,   117,  180,
+  243,  306,  369,  432,  495,  558,  621,  684,  747,  810,  873,  936,  999,
+  1062, 1125, 1188, 1251, 1314, 1377, 1440, 1503, 1566, 1629, 1692, 1755, 1818,
+  1881, 1944, 2007, 55,   118,  181,  244,  307,  370,  433,  496,  559,  622,
+  685,  748,  811,  874,  937,  1000, 1063, 1126, 1189, 1252, 1315, 1378, 1441,
+  1504, 1567, 1630, 1693, 1756, 1819, 1882, 1945, 2008, 56,   119,  182,  245,
+  308,  371,  434,  497,  560,  623,  686,  749,  812,  875,  938,  1001, 1064,
+  1127, 1190, 1253, 1316, 1379, 1442, 1505, 1568, 1631, 1694, 1757, 1820, 1883,
+  1946, 2009, 57,   120,  183,  246,  309,  372,  435,  498,  561,  624,  687,
+  750,  813,  876,  939,  1002, 1065, 1128, 1191, 1254, 1317, 1380, 1443, 1506,
+  1569, 1632, 1695, 1758, 1821, 1884, 1947, 2010, 58,   121,  184,  247,  310,
+  373,  436,  499,  562,  625,  688,  751,  814,  877,  940,  1003, 1066, 1129,
+  1192, 1255, 1318, 1381, 1444, 1507, 1570, 1633, 1696, 1759, 1822, 1885, 1948,
+  2011, 59,   122,  185,  248,  311,  374,  437,  500,  563,  626,  689,  752,
+  815,  878,  941,  1004, 1067, 1130, 1193, 1256, 1319, 1382, 1445, 1508, 1571,
+  1634, 1697, 1760, 1823, 1886, 1949, 2012, 60,   123,  186,  249,  312,  375,
+  438,  501,  564,  627,  690,  753,  816,  879,  942,  1005, 1068, 1131, 1194,
+  1257, 1320, 1383, 1446, 1509, 1572, 1635, 1698, 1761, 1824, 1887, 1950, 2013,
+  61,   124,  187,  250,  313,  376,  439,  502,  565,  628,  691,  754,  817,
+  880,  943,  1006, 1069, 1132, 1195, 1258, 1321, 1384, 1447, 1510, 1573, 1636,
+  1699, 1762, 1825, 1888, 1951, 2014, 62,   125,  188,  251,  314,  377,  440,
+  503,  566,  629,  692,  755,  818,  881,  944,  1007, 1070, 1133, 1196, 1259,
+  1322, 1385, 1448, 1511, 1574, 1637, 1700, 1763, 1826, 1889, 1952, 2015, 63,
+  126,  189,  252,  315,  378,  441,  504,  567,  630,  693,  756,  819,  882,
+  945,  1008, 1071, 1134, 1197, 1260, 1323, 1386, 1449, 1512, 1575, 1638, 1701,
+  1764, 1827, 1890, 1953, 2016, 127,  190,  253,  316,  379,  442,  505,  568,
+  631,  694,  757,  820,  883,  946,  1009, 1072, 1135, 1198, 1261, 1324, 1387,
+  1450, 1513, 1576, 1639, 1702, 1765, 1828, 1891, 1954, 2017, 191,  254,  317,
+  380,  443,  506,  569,  632,  695,  758,  821,  884,  947,  1010, 1073, 1136,
+  1199, 1262, 1325, 1388, 1451, 1514, 1577, 1640, 1703, 1766, 1829, 1892, 1955,
+  2018, 255,  318,  381,  444,  507,  570,  633,  696,  759,  822,  885,  948,
+  1011, 1074, 1137, 1200, 1263, 1326, 1389, 1452, 1515, 1578, 1641, 1704, 1767,
+  1830, 1893, 1956, 2019, 319,  382,  445,  508,  571,  634,  697,  760,  823,
+  886,  949,  1012, 1075, 1138, 1201, 1264, 1327, 1390, 1453, 1516, 1579, 1642,
+  1705, 1768, 1831, 1894, 1957, 2020, 383,  446,  509,  572,  635,  698,  761,
+  824,  887,  950,  1013, 1076, 1139, 1202, 1265, 1328, 1391, 1454, 1517, 1580,
+  1643, 1706, 1769, 1832, 1895, 1958, 2021, 447,  510,  573,  636,  699,  762,
+  825,  888,  951,  1014, 1077, 1140, 1203, 1266, 1329, 1392, 1455, 1518, 1581,
+  1644, 1707, 1770, 1833, 1896, 1959, 2022, 511,  574,  637,  700,  763,  826,
+  889,  952,  1015, 1078, 1141, 1204, 1267, 1330, 1393, 1456, 1519, 1582, 1645,
+  1708, 1771, 1834, 1897, 1960, 2023, 575,  638,  701,  764,  827,  890,  953,
+  1016, 1079, 1142, 1205, 1268, 1331, 1394, 1457, 1520, 1583, 1646, 1709, 1772,
+  1835, 1898, 1961, 2024, 639,  702,  765,  828,  891,  954,  1017, 1080, 1143,
+  1206, 1269, 1332, 1395, 1458, 1521, 1584, 1647, 1710, 1773, 1836, 1899, 1962,
+  2025, 703,  766,  829,  892,  955,  1018, 1081, 1144, 1207, 1270, 1333, 1396,
+  1459, 1522, 1585, 1648, 1711, 1774, 1837, 1900, 1963, 2026, 767,  830,  893,
+  956,  1019, 1082, 1145, 1208, 1271, 1334, 1397, 1460, 1523, 1586, 1649, 1712,
+  1775, 1838, 1901, 1964, 2027, 831,  894,  957,  1020, 1083, 1146, 1209, 1272,
+  1335, 1398, 1461, 1524, 1587, 1650, 1713, 1776, 1839, 1902, 1965, 2028, 895,
+  958,  1021, 1084, 1147, 1210, 1273, 1336, 1399, 1462, 1525, 1588, 1651, 1714,
+  1777, 1840, 1903, 1966, 2029, 959,  1022, 1085, 1148, 1211, 1274, 1337, 1400,
+  1463, 1526, 1589, 1652, 1715, 1778, 1841, 1904, 1967, 2030, 1023, 1086, 1149,
+  1212, 1275, 1338, 1401, 1464, 1527, 1590, 1653, 1716, 1779, 1842, 1905, 1968,
+  2031, 1087, 1150, 1213, 1276, 1339, 1402, 1465, 1528, 1591, 1654, 1717, 1780,
+  1843, 1906, 1969, 2032, 1151, 1214, 1277, 1340, 1403, 1466, 1529, 1592, 1655,
+  1718, 1781, 1844, 1907, 1970, 2033, 1215, 1278, 1341, 1404, 1467, 1530, 1593,
+  1656, 1719, 1782, 1845, 1908, 1971, 2034, 1279, 1342, 1405, 1468, 1531, 1594,
+  1657, 1720, 1783, 1846, 1909, 1972, 2035, 1343, 1406, 1469, 1532, 1595, 1658,
+  1721, 1784, 1847, 1910, 1973, 2036, 1407, 1470, 1533, 1596, 1659, 1722, 1785,
+  1848, 1911, 1974, 2037, 1471, 1534, 1597, 1660, 1723, 1786, 1849, 1912, 1975,
+  2038, 1535, 1598, 1661, 1724, 1787, 1850, 1913, 1976, 2039, 1599, 1662, 1725,
+  1788, 1851, 1914, 1977, 2040, 1663, 1726, 1789, 1852, 1915, 1978, 2041, 1727,
+  1790, 1853, 1916, 1979, 2042, 1791, 1854, 1917, 1980, 2043, 1855, 1918, 1981,
+  2044, 1919, 1982, 2045, 1983, 2046, 2047,
+};
+
 DECLARE_ALIGNED(16, static const int16_t, default_scan_64x64[4096]) = {
   0,    1,    64,   65,   2,    128,  66,   129,  130,  3,    192,  67,   193,
   131,  194,  4,    256,  68,   257,  195,  132,  258,  5,    196,  259,  320,
@@ -3614,6 +3936,646 @@ DECLARE_ALIGNED(16, static const int16_t,
 
 #if CONFIG_TX64X64
 DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_32x64_neighbors[2049 * MAX_NEIGHBORS]) = {
+  0,    0,    0,    0,    0,    0,    1,    1,    1,    32,   32,   32,   2,
+  2,    2,    33,   33,   64,   64,   64,   3,    3,    3,    34,   34,   65,
+  65,   96,   96,   96,   4,    4,    4,    35,   35,   66,   66,   97,   97,
+  128,  128,  128,  5,    5,    5,    36,   36,   67,   67,   98,   98,   129,
+  129,  160,  160,  160,  6,    6,    6,    37,   37,   68,   68,   99,   99,
+  130,  130,  161,  161,  192,  192,  192,  7,    7,    7,    38,   38,   69,
+  69,   100,  100,  131,  131,  162,  162,  193,  193,  224,  224,  224,  8,
+  8,    8,    39,   39,   70,   70,   101,  101,  132,  132,  163,  163,  194,
+  194,  225,  225,  256,  256,  256,  9,    9,    9,    40,   40,   71,   71,
+  102,  102,  133,  133,  164,  164,  195,  195,  226,  226,  257,  257,  288,
+  288,  288,  10,   10,   10,   41,   41,   72,   72,   103,  103,  134,  134,
+  165,  165,  196,  196,  227,  227,  258,  258,  289,  289,  320,  320,  320,
+  11,   11,   11,   42,   42,   73,   73,   104,  104,  135,  135,  166,  166,
+  197,  197,  228,  228,  259,  259,  290,  290,  321,  321,  352,  352,  352,
+  12,   12,   12,   43,   43,   74,   74,   105,  105,  136,  136,  167,  167,
+  198,  198,  229,  229,  260,  260,  291,  291,  322,  322,  353,  353,  384,
+  384,  384,  13,   13,   13,   44,   44,   75,   75,   106,  106,  137,  137,
+  168,  168,  199,  199,  230,  230,  261,  261,  292,  292,  323,  323,  354,
+  354,  385,  385,  416,  416,  416,  14,   14,   14,   45,   45,   76,   76,
+  107,  107,  138,  138,  169,  169,  200,  200,  231,  231,  262,  262,  293,
+  293,  324,  324,  355,  355,  386,  386,  417,  417,  448,  448,  448,  15,
+  15,   15,   46,   46,   77,   77,   108,  108,  139,  139,  170,  170,  201,
+  201,  232,  232,  263,  263,  294,  294,  325,  325,  356,  356,  387,  387,
+  418,  418,  449,  449,  480,  480,  480,  16,   16,   16,   47,   47,   78,
+  78,   109,  109,  140,  140,  171,  171,  202,  202,  233,  233,  264,  264,
+  295,  295,  326,  326,  357,  357,  388,  388,  419,  419,  450,  450,  481,
+  481,  512,  512,  512,  17,   17,   17,   48,   48,   79,   79,   110,  110,
+  141,  141,  172,  172,  203,  203,  234,  234,  265,  265,  296,  296,  327,
+  327,  358,  358,  389,  389,  420,  420,  451,  451,  482,  482,  513,  513,
+  544,  544,  544,  18,   18,   18,   49,   49,   80,   80,   111,  111,  142,
+  142,  173,  173,  204,  204,  235,  235,  266,  266,  297,  297,  328,  328,
+  359,  359,  390,  390,  421,  421,  452,  452,  483,  483,  514,  514,  545,
+  545,  576,  576,  576,  19,   19,   19,   50,   50,   81,   81,   112,  112,
+  143,  143,  174,  174,  205,  205,  236,  236,  267,  267,  298,  298,  329,
+  329,  360,  360,  391,  391,  422,  422,  453,  453,  484,  484,  515,  515,
+  546,  546,  577,  577,  608,  608,  608,  20,   20,   20,   51,   51,   82,
+  82,   113,  113,  144,  144,  175,  175,  206,  206,  237,  237,  268,  268,
+  299,  299,  330,  330,  361,  361,  392,  392,  423,  423,  454,  454,  485,
+  485,  516,  516,  547,  547,  578,  578,  609,  609,  640,  640,  640,  21,
+  21,   21,   52,   52,   83,   83,   114,  114,  145,  145,  176,  176,  207,
+  207,  238,  238,  269,  269,  300,  300,  331,  331,  362,  362,  393,  393,
+  424,  424,  455,  455,  486,  486,  517,  517,  548,  548,  579,  579,  610,
+  610,  641,  641,  672,  672,  672,  22,   22,   22,   53,   53,   84,   84,
+  115,  115,  146,  146,  177,  177,  208,  208,  239,  239,  270,  270,  301,
+  301,  332,  332,  363,  363,  394,  394,  425,  425,  456,  456,  487,  487,
+  518,  518,  549,  549,  580,  580,  611,  611,  642,  642,  673,  673,  704,
+  704,  704,  23,   23,   23,   54,   54,   85,   85,   116,  116,  147,  147,
+  178,  178,  209,  209,  240,  240,  271,  271,  302,  302,  333,  333,  364,
+  364,  395,  395,  426,  426,  457,  457,  488,  488,  519,  519,  550,  550,
+  581,  581,  612,  612,  643,  643,  674,  674,  705,  705,  736,  736,  736,
+  24,   24,   24,   55,   55,   86,   86,   117,  117,  148,  148,  179,  179,
+  210,  210,  241,  241,  272,  272,  303,  303,  334,  334,  365,  365,  396,
+  396,  427,  427,  458,  458,  489,  489,  520,  520,  551,  551,  582,  582,
+  613,  613,  644,  644,  675,  675,  706,  706,  737,  737,  768,  768,  768,
+  25,   25,   25,   56,   56,   87,   87,   118,  118,  149,  149,  180,  180,
+  211,  211,  242,  242,  273,  273,  304,  304,  335,  335,  366,  366,  397,
+  397,  428,  428,  459,  459,  490,  490,  521,  521,  552,  552,  583,  583,
+  614,  614,  645,  645,  676,  676,  707,  707,  738,  738,  769,  769,  800,
+  800,  800,  26,   26,   26,   57,   57,   88,   88,   119,  119,  150,  150,
+  181,  181,  212,  212,  243,  243,  274,  274,  305,  305,  336,  336,  367,
+  367,  398,  398,  429,  429,  460,  460,  491,  491,  522,  522,  553,  553,
+  584,  584,  615,  615,  646,  646,  677,  677,  708,  708,  739,  739,  770,
+  770,  801,  801,  832,  832,  832,  27,   27,   27,   58,   58,   89,   89,
+  120,  120,  151,  151,  182,  182,  213,  213,  244,  244,  275,  275,  306,
+  306,  337,  337,  368,  368,  399,  399,  430,  430,  461,  461,  492,  492,
+  523,  523,  554,  554,  585,  585,  616,  616,  647,  647,  678,  678,  709,
+  709,  740,  740,  771,  771,  802,  802,  833,  833,  864,  864,  864,  28,
+  28,   28,   59,   59,   90,   90,   121,  121,  152,  152,  183,  183,  214,
+  214,  245,  245,  276,  276,  307,  307,  338,  338,  369,  369,  400,  400,
+  431,  431,  462,  462,  493,  493,  524,  524,  555,  555,  586,  586,  617,
+  617,  648,  648,  679,  679,  710,  710,  741,  741,  772,  772,  803,  803,
+  834,  834,  865,  865,  896,  896,  896,  29,   29,   29,   60,   60,   91,
+  91,   122,  122,  153,  153,  184,  184,  215,  215,  246,  246,  277,  277,
+  308,  308,  339,  339,  370,  370,  401,  401,  432,  432,  463,  463,  494,
+  494,  525,  525,  556,  556,  587,  587,  618,  618,  649,  649,  680,  680,
+  711,  711,  742,  742,  773,  773,  804,  804,  835,  835,  866,  866,  897,
+  897,  928,  928,  928,  30,   30,   30,   61,   61,   92,   92,   123,  123,
+  154,  154,  185,  185,  216,  216,  247,  247,  278,  278,  309,  309,  340,
+  340,  371,  371,  402,  402,  433,  433,  464,  464,  495,  495,  526,  526,
+  557,  557,  588,  588,  619,  619,  650,  650,  681,  681,  712,  712,  743,
+  743,  774,  774,  805,  805,  836,  836,  867,  867,  898,  898,  929,  929,
+  960,  960,  960,  31,   62,   62,   93,   93,   124,  124,  155,  155,  186,
+  186,  217,  217,  248,  248,  279,  279,  310,  310,  341,  341,  372,  372,
+  403,  403,  434,  434,  465,  465,  496,  496,  527,  527,  558,  558,  589,
+  589,  620,  620,  651,  651,  682,  682,  713,  713,  744,  744,  775,  775,
+  806,  806,  837,  837,  868,  868,  899,  899,  930,  930,  961,  961,  992,
+  992,  992,  63,   94,   94,   125,  125,  156,  156,  187,  187,  218,  218,
+  249,  249,  280,  280,  311,  311,  342,  342,  373,  373,  404,  404,  435,
+  435,  466,  466,  497,  497,  528,  528,  559,  559,  590,  590,  621,  621,
+  652,  652,  683,  683,  714,  714,  745,  745,  776,  776,  807,  807,  838,
+  838,  869,  869,  900,  900,  931,  931,  962,  962,  993,  993,  1024, 1024,
+  1024, 95,   126,  126,  157,  157,  188,  188,  219,  219,  250,  250,  281,
+  281,  312,  312,  343,  343,  374,  374,  405,  405,  436,  436,  467,  467,
+  498,  498,  529,  529,  560,  560,  591,  591,  622,  622,  653,  653,  684,
+  684,  715,  715,  746,  746,  777,  777,  808,  808,  839,  839,  870,  870,
+  901,  901,  932,  932,  963,  963,  994,  994,  1025, 1025, 1056, 1056, 1056,
+  127,  158,  158,  189,  189,  220,  220,  251,  251,  282,  282,  313,  313,
+  344,  344,  375,  375,  406,  406,  437,  437,  468,  468,  499,  499,  530,
+  530,  561,  561,  592,  592,  623,  623,  654,  654,  685,  685,  716,  716,
+  747,  747,  778,  778,  809,  809,  840,  840,  871,  871,  902,  902,  933,
+  933,  964,  964,  995,  995,  1026, 1026, 1057, 1057, 1088, 1088, 1088, 159,
+  190,  190,  221,  221,  252,  252,  283,  283,  314,  314,  345,  345,  376,
+  376,  407,  407,  438,  438,  469,  469,  500,  500,  531,  531,  562,  562,
+  593,  593,  624,  624,  655,  655,  686,  686,  717,  717,  748,  748,  779,
+  779,  810,  810,  841,  841,  872,  872,  903,  903,  934,  934,  965,  965,
+  996,  996,  1027, 1027, 1058, 1058, 1089, 1089, 1120, 1120, 1120, 191,  222,
+  222,  253,  253,  284,  284,  315,  315,  346,  346,  377,  377,  408,  408,
+  439,  439,  470,  470,  501,  501,  532,  532,  563,  563,  594,  594,  625,
+  625,  656,  656,  687,  687,  718,  718,  749,  749,  780,  780,  811,  811,
+  842,  842,  873,  873,  904,  904,  935,  935,  966,  966,  997,  997,  1028,
+  1028, 1059, 1059, 1090, 1090, 1121, 1121, 1152, 1152, 1152, 223,  254,  254,
+  285,  285,  316,  316,  347,  347,  378,  378,  409,  409,  440,  440,  471,
+  471,  502,  502,  533,  533,  564,  564,  595,  595,  626,  626,  657,  657,
+  688,  688,  719,  719,  750,  750,  781,  781,  812,  812,  843,  843,  874,
+  874,  905,  905,  936,  936,  967,  967,  998,  998,  1029, 1029, 1060, 1060,
+  1091, 1091, 1122, 1122, 1153, 1153, 1184, 1184, 1184, 255,  286,  286,  317,
+  317,  348,  348,  379,  379,  410,  410,  441,  441,  472,  472,  503,  503,
+  534,  534,  565,  565,  596,  596,  627,  627,  658,  658,  689,  689,  720,
+  720,  751,  751,  782,  782,  813,  813,  844,  844,  875,  875,  906,  906,
+  937,  937,  968,  968,  999,  999,  1030, 1030, 1061, 1061, 1092, 1092, 1123,
+  1123, 1154, 1154, 1185, 1185, 1216, 1216, 1216, 287,  318,  318,  349,  349,
+  380,  380,  411,  411,  442,  442,  473,  473,  504,  504,  535,  535,  566,
+  566,  597,  597,  628,  628,  659,  659,  690,  690,  721,  721,  752,  752,
+  783,  783,  814,  814,  845,  845,  876,  876,  907,  907,  938,  938,  969,
+  969,  1000, 1000, 1031, 1031, 1062, 1062, 1093, 1093, 1124, 1124, 1155, 1155,
+  1186, 1186, 1217, 1217, 1248, 1248, 1248, 319,  350,  350,  381,  381,  412,
+  412,  443,  443,  474,  474,  505,  505,  536,  536,  567,  567,  598,  598,
+  629,  629,  660,  660,  691,  691,  722,  722,  753,  753,  784,  784,  815,
+  815,  846,  846,  877,  877,  908,  908,  939,  939,  970,  970,  1001, 1001,
+  1032, 1032, 1063, 1063, 1094, 1094, 1125, 1125, 1156, 1156, 1187, 1187, 1218,
+  1218, 1249, 1249, 1280, 1280, 1280, 351,  382,  382,  413,  413,  444,  444,
+  475,  475,  506,  506,  537,  537,  568,  568,  599,  599,  630,  630,  661,
+  661,  692,  692,  723,  723,  754,  754,  785,  785,  816,  816,  847,  847,
+  878,  878,  909,  909,  940,  940,  971,  971,  1002, 1002, 1033, 1033, 1064,
+  1064, 1095, 1095, 1126, 1126, 1157, 1157, 1188, 1188, 1219, 1219, 1250, 1250,
+  1281, 1281, 1312, 1312, 1312, 383,  414,  414,  445,  445,  476,  476,  507,
+  507,  538,  538,  569,  569,  600,  600,  631,  631,  662,  662,  693,  693,
+  724,  724,  755,  755,  786,  786,  817,  817,  848,  848,  879,  879,  910,
+  910,  941,  941,  972,  972,  1003, 1003, 1034, 1034, 1065, 1065, 1096, 1096,
+  1127, 1127, 1158, 1158, 1189, 1189, 1220, 1220, 1251, 1251, 1282, 1282, 1313,
+  1313, 1344, 1344, 1344, 415,  446,  446,  477,  477,  508,  508,  539,  539,
+  570,  570,  601,  601,  632,  632,  663,  663,  694,  694,  725,  725,  756,
+  756,  787,  787,  818,  818,  849,  849,  880,  880,  911,  911,  942,  942,
+  973,  973,  1004, 1004, 1035, 1035, 1066, 1066, 1097, 1097, 1128, 1128, 1159,
+  1159, 1190, 1190, 1221, 1221, 1252, 1252, 1283, 1283, 1314, 1314, 1345, 1345,
+  1376, 1376, 1376, 447,  478,  478,  509,  509,  540,  540,  571,  571,  602,
+  602,  633,  633,  664,  664,  695,  695,  726,  726,  757,  757,  788,  788,
+  819,  819,  850,  850,  881,  881,  912,  912,  943,  943,  974,  974,  1005,
+  1005, 1036, 1036, 1067, 1067, 1098, 1098, 1129, 1129, 1160, 1160, 1191, 1191,
+  1222, 1222, 1253, 1253, 1284, 1284, 1315, 1315, 1346, 1346, 1377, 1377, 1408,
+  1408, 1408, 479,  510,  510,  541,  541,  572,  572,  603,  603,  634,  634,
+  665,  665,  696,  696,  727,  727,  758,  758,  789,  789,  820,  820,  851,
+  851,  882,  882,  913,  913,  944,  944,  975,  975,  1006, 1006, 1037, 1037,
+  1068, 1068, 1099, 1099, 1130, 1130, 1161, 1161, 1192, 1192, 1223, 1223, 1254,
+  1254, 1285, 1285, 1316, 1316, 1347, 1347, 1378, 1378, 1409, 1409, 1440, 1440,
+  1440, 511,  542,  542,  573,  573,  604,  604,  635,  635,  666,  666,  697,
+  697,  728,  728,  759,  759,  790,  790,  821,  821,  852,  852,  883,  883,
+  914,  914,  945,  945,  976,  976,  1007, 1007, 1038, 1038, 1069, 1069, 1100,
+  1100, 1131, 1131, 1162, 1162, 1193, 1193, 1224, 1224, 1255, 1255, 1286, 1286,
+  1317, 1317, 1348, 1348, 1379, 1379, 1410, 1410, 1441, 1441, 1472, 1472, 1472,
+  543,  574,  574,  605,  605,  636,  636,  667,  667,  698,  698,  729,  729,
+  760,  760,  791,  791,  822,  822,  853,  853,  884,  884,  915,  915,  946,
+  946,  977,  977,  1008, 1008, 1039, 1039, 1070, 1070, 1101, 1101, 1132, 1132,
+  1163, 1163, 1194, 1194, 1225, 1225, 1256, 1256, 1287, 1287, 1318, 1318, 1349,
+  1349, 1380, 1380, 1411, 1411, 1442, 1442, 1473, 1473, 1504, 1504, 1504, 575,
+  606,  606,  637,  637,  668,  668,  699,  699,  730,  730,  761,  761,  792,
+  792,  823,  823,  854,  854,  885,  885,  916,  916,  947,  947,  978,  978,
+  1009, 1009, 1040, 1040, 1071, 1071, 1102, 1102, 1133, 1133, 1164, 1164, 1195,
+  1195, 1226, 1226, 1257, 1257, 1288, 1288, 1319, 1319, 1350, 1350, 1381, 1381,
+  1412, 1412, 1443, 1443, 1474, 1474, 1505, 1505, 1536, 1536, 1536, 607,  638,
+  638,  669,  669,  700,  700,  731,  731,  762,  762,  793,  793,  824,  824,
+  855,  855,  886,  886,  917,  917,  948,  948,  979,  979,  1010, 1010, 1041,
+  1041, 1072, 1072, 1103, 1103, 1134, 1134, 1165, 1165, 1196, 1196, 1227, 1227,
+  1258, 1258, 1289, 1289, 1320, 1320, 1351, 1351, 1382, 1382, 1413, 1413, 1444,
+  1444, 1475, 1475, 1506, 1506, 1537, 1537, 1568, 1568, 1568, 639,  670,  670,
+  701,  701,  732,  732,  763,  763,  794,  794,  825,  825,  856,  856,  887,
+  887,  918,  918,  949,  949,  980,  980,  1011, 1011, 1042, 1042, 1073, 1073,
+  1104, 1104, 1135, 1135, 1166, 1166, 1197, 1197, 1228, 1228, 1259, 1259, 1290,
+  1290, 1321, 1321, 1352, 1352, 1383, 1383, 1414, 1414, 1445, 1445, 1476, 1476,
+  1507, 1507, 1538, 1538, 1569, 1569, 1600, 1600, 1600, 671,  702,  702,  733,
+  733,  764,  764,  795,  795,  826,  826,  857,  857,  888,  888,  919,  919,
+  950,  950,  981,  981,  1012, 1012, 1043, 1043, 1074, 1074, 1105, 1105, 1136,
+  1136, 1167, 1167, 1198, 1198, 1229, 1229, 1260, 1260, 1291, 1291, 1322, 1322,
+  1353, 1353, 1384, 1384, 1415, 1415, 1446, 1446, 1477, 1477, 1508, 1508, 1539,
+  1539, 1570, 1570, 1601, 1601, 1632, 1632, 1632, 703,  734,  734,  765,  765,
+  796,  796,  827,  827,  858,  858,  889,  889,  920,  920,  951,  951,  982,
+  982,  1013, 1013, 1044, 1044, 1075, 1075, 1106, 1106, 1137, 1137, 1168, 1168,
+  1199, 1199, 1230, 1230, 1261, 1261, 1292, 1292, 1323, 1323, 1354, 1354, 1385,
+  1385, 1416, 1416, 1447, 1447, 1478, 1478, 1509, 1509, 1540, 1540, 1571, 1571,
+  1602, 1602, 1633, 1633, 1664, 1664, 1664, 735,  766,  766,  797,  797,  828,
+  828,  859,  859,  890,  890,  921,  921,  952,  952,  983,  983,  1014, 1014,
+  1045, 1045, 1076, 1076, 1107, 1107, 1138, 1138, 1169, 1169, 1200, 1200, 1231,
+  1231, 1262, 1262, 1293, 1293, 1324, 1324, 1355, 1355, 1386, 1386, 1417, 1417,
+  1448, 1448, 1479, 1479, 1510, 1510, 1541, 1541, 1572, 1572, 1603, 1603, 1634,
+  1634, 1665, 1665, 1696, 1696, 1696, 767,  798,  798,  829,  829,  860,  860,
+  891,  891,  922,  922,  953,  953,  984,  984,  1015, 1015, 1046, 1046, 1077,
+  1077, 1108, 1108, 1139, 1139, 1170, 1170, 1201, 1201, 1232, 1232, 1263, 1263,
+  1294, 1294, 1325, 1325, 1356, 1356, 1387, 1387, 1418, 1418, 1449, 1449, 1480,
+  1480, 1511, 1511, 1542, 1542, 1573, 1573, 1604, 1604, 1635, 1635, 1666, 1666,
+  1697, 1697, 1728, 1728, 1728, 799,  830,  830,  861,  861,  892,  892,  923,
+  923,  954,  954,  985,  985,  1016, 1016, 1047, 1047, 1078, 1078, 1109, 1109,
+  1140, 1140, 1171, 1171, 1202, 1202, 1233, 1233, 1264, 1264, 1295, 1295, 1326,
+  1326, 1357, 1357, 1388, 1388, 1419, 1419, 1450, 1450, 1481, 1481, 1512, 1512,
+  1543, 1543, 1574, 1574, 1605, 1605, 1636, 1636, 1667, 1667, 1698, 1698, 1729,
+  1729, 1760, 1760, 1760, 831,  862,  862,  893,  893,  924,  924,  955,  955,
+  986,  986,  1017, 1017, 1048, 1048, 1079, 1079, 1110, 1110, 1141, 1141, 1172,
+  1172, 1203, 1203, 1234, 1234, 1265, 1265, 1296, 1296, 1327, 1327, 1358, 1358,
+  1389, 1389, 1420, 1420, 1451, 1451, 1482, 1482, 1513, 1513, 1544, 1544, 1575,
+  1575, 1606, 1606, 1637, 1637, 1668, 1668, 1699, 1699, 1730, 1730, 1761, 1761,
+  1792, 1792, 1792, 863,  894,  894,  925,  925,  956,  956,  987,  987,  1018,
+  1018, 1049, 1049, 1080, 1080, 1111, 1111, 1142, 1142, 1173, 1173, 1204, 1204,
+  1235, 1235, 1266, 1266, 1297, 1297, 1328, 1328, 1359, 1359, 1390, 1390, 1421,
+  1421, 1452, 1452, 1483, 1483, 1514, 1514, 1545, 1545, 1576, 1576, 1607, 1607,
+  1638, 1638, 1669, 1669, 1700, 1700, 1731, 1731, 1762, 1762, 1793, 1793, 1824,
+  1824, 1824, 895,  926,  926,  957,  957,  988,  988,  1019, 1019, 1050, 1050,
+  1081, 1081, 1112, 1112, 1143, 1143, 1174, 1174, 1205, 1205, 1236, 1236, 1267,
+  1267, 1298, 1298, 1329, 1329, 1360, 1360, 1391, 1391, 1422, 1422, 1453, 1453,
+  1484, 1484, 1515, 1515, 1546, 1546, 1577, 1577, 1608, 1608, 1639, 1639, 1670,
+  1670, 1701, 1701, 1732, 1732, 1763, 1763, 1794, 1794, 1825, 1825, 1856, 1856,
+  1856, 927,  958,  958,  989,  989,  1020, 1020, 1051, 1051, 1082, 1082, 1113,
+  1113, 1144, 1144, 1175, 1175, 1206, 1206, 1237, 1237, 1268, 1268, 1299, 1299,
+  1330, 1330, 1361, 1361, 1392, 1392, 1423, 1423, 1454, 1454, 1485, 1485, 1516,
+  1516, 1547, 1547, 1578, 1578, 1609, 1609, 1640, 1640, 1671, 1671, 1702, 1702,
+  1733, 1733, 1764, 1764, 1795, 1795, 1826, 1826, 1857, 1857, 1888, 1888, 1888,
+  959,  990,  990,  1021, 1021, 1052, 1052, 1083, 1083, 1114, 1114, 1145, 1145,
+  1176, 1176, 1207, 1207, 1238, 1238, 1269, 1269, 1300, 1300, 1331, 1331, 1362,
+  1362, 1393, 1393, 1424, 1424, 1455, 1455, 1486, 1486, 1517, 1517, 1548, 1548,
+  1579, 1579, 1610, 1610, 1641, 1641, 1672, 1672, 1703, 1703, 1734, 1734, 1765,
+  1765, 1796, 1796, 1827, 1827, 1858, 1858, 1889, 1889, 1920, 1920, 1920, 991,
+  1022, 1022, 1053, 1053, 1084, 1084, 1115, 1115, 1146, 1146, 1177, 1177, 1208,
+  1208, 1239, 1239, 1270, 1270, 1301, 1301, 1332, 1332, 1363, 1363, 1394, 1394,
+  1425, 1425, 1456, 1456, 1487, 1487, 1518, 1518, 1549, 1549, 1580, 1580, 1611,
+  1611, 1642, 1642, 1673, 1673, 1704, 1704, 1735, 1735, 1766, 1766, 1797, 1797,
+  1828, 1828, 1859, 1859, 1890, 1890, 1921, 1921, 1952, 1952, 1952, 1023, 1054,
+  1054, 1085, 1085, 1116, 1116, 1147, 1147, 1178, 1178, 1209, 1209, 1240, 1240,
+  1271, 1271, 1302, 1302, 1333, 1333, 1364, 1364, 1395, 1395, 1426, 1426, 1457,
+  1457, 1488, 1488, 1519, 1519, 1550, 1550, 1581, 1581, 1612, 1612, 1643, 1643,
+  1674, 1674, 1705, 1705, 1736, 1736, 1767, 1767, 1798, 1798, 1829, 1829, 1860,
+  1860, 1891, 1891, 1922, 1922, 1953, 1953, 1984, 1984, 1984, 1055, 1086, 1086,
+  1117, 1117, 1148, 1148, 1179, 1179, 1210, 1210, 1241, 1241, 1272, 1272, 1303,
+  1303, 1334, 1334, 1365, 1365, 1396, 1396, 1427, 1427, 1458, 1458, 1489, 1489,
+  1520, 1520, 1551, 1551, 1582, 1582, 1613, 1613, 1644, 1644, 1675, 1675, 1706,
+  1706, 1737, 1737, 1768, 1768, 1799, 1799, 1830, 1830, 1861, 1861, 1892, 1892,
+  1923, 1923, 1954, 1954, 1985, 1985, 2016, 1087, 1118, 1118, 1149, 1149, 1180,
+  1180, 1211, 1211, 1242, 1242, 1273, 1273, 1304, 1304, 1335, 1335, 1366, 1366,
+  1397, 1397, 1428, 1428, 1459, 1459, 1490, 1490, 1521, 1521, 1552, 1552, 1583,
+  1583, 1614, 1614, 1645, 1645, 1676, 1676, 1707, 1707, 1738, 1738, 1769, 1769,
+  1800, 1800, 1831, 1831, 1862, 1862, 1893, 1893, 1924, 1924, 1955, 1955, 1986,
+  1986, 2017, 1119, 1150, 1150, 1181, 1181, 1212, 1212, 1243, 1243, 1274, 1274,
+  1305, 1305, 1336, 1336, 1367, 1367, 1398, 1398, 1429, 1429, 1460, 1460, 1491,
+  1491, 1522, 1522, 1553, 1553, 1584, 1584, 1615, 1615, 1646, 1646, 1677, 1677,
+  1708, 1708, 1739, 1739, 1770, 1770, 1801, 1801, 1832, 1832, 1863, 1863, 1894,
+  1894, 1925, 1925, 1956, 1956, 1987, 1987, 2018, 1151, 1182, 1182, 1213, 1213,
+  1244, 1244, 1275, 1275, 1306, 1306, 1337, 1337, 1368, 1368, 1399, 1399, 1430,
+  1430, 1461, 1461, 1492, 1492, 1523, 1523, 1554, 1554, 1585, 1585, 1616, 1616,
+  1647, 1647, 1678, 1678, 1709, 1709, 1740, 1740, 1771, 1771, 1802, 1802, 1833,
+  1833, 1864, 1864, 1895, 1895, 1926, 1926, 1957, 1957, 1988, 1988, 2019, 1183,
+  1214, 1214, 1245, 1245, 1276, 1276, 1307, 1307, 1338, 1338, 1369, 1369, 1400,
+  1400, 1431, 1431, 1462, 1462, 1493, 1493, 1524, 1524, 1555, 1555, 1586, 1586,
+  1617, 1617, 1648, 1648, 1679, 1679, 1710, 1710, 1741, 1741, 1772, 1772, 1803,
+  1803, 1834, 1834, 1865, 1865, 1896, 1896, 1927, 1927, 1958, 1958, 1989, 1989,
+  2020, 1215, 1246, 1246, 1277, 1277, 1308, 1308, 1339, 1339, 1370, 1370, 1401,
+  1401, 1432, 1432, 1463, 1463, 1494, 1494, 1525, 1525, 1556, 1556, 1587, 1587,
+  1618, 1618, 1649, 1649, 1680, 1680, 1711, 1711, 1742, 1742, 1773, 1773, 1804,
+  1804, 1835, 1835, 1866, 1866, 1897, 1897, 1928, 1928, 1959, 1959, 1990, 1990,
+  2021, 1247, 1278, 1278, 1309, 1309, 1340, 1340, 1371, 1371, 1402, 1402, 1433,
+  1433, 1464, 1464, 1495, 1495, 1526, 1526, 1557, 1557, 1588, 1588, 1619, 1619,
+  1650, 1650, 1681, 1681, 1712, 1712, 1743, 1743, 1774, 1774, 1805, 1805, 1836,
+  1836, 1867, 1867, 1898, 1898, 1929, 1929, 1960, 1960, 1991, 1991, 2022, 1279,
+  1310, 1310, 1341, 1341, 1372, 1372, 1403, 1403, 1434, 1434, 1465, 1465, 1496,
+  1496, 1527, 1527, 1558, 1558, 1589, 1589, 1620, 1620, 1651, 1651, 1682, 1682,
+  1713, 1713, 1744, 1744, 1775, 1775, 1806, 1806, 1837, 1837, 1868, 1868, 1899,
+  1899, 1930, 1930, 1961, 1961, 1992, 1992, 2023, 1311, 1342, 1342, 1373, 1373,
+  1404, 1404, 1435, 1435, 1466, 1466, 1497, 1497, 1528, 1528, 1559, 1559, 1590,
+  1590, 1621, 1621, 1652, 1652, 1683, 1683, 1714, 1714, 1745, 1745, 1776, 1776,
+  1807, 1807, 1838, 1838, 1869, 1869, 1900, 1900, 1931, 1931, 1962, 1962, 1993,
+  1993, 2024, 1343, 1374, 1374, 1405, 1405, 1436, 1436, 1467, 1467, 1498, 1498,
+  1529, 1529, 1560, 1560, 1591, 1591, 1622, 1622, 1653, 1653, 1684, 1684, 1715,
+  1715, 1746, 1746, 1777, 1777, 1808, 1808, 1839, 1839, 1870, 1870, 1901, 1901,
+  1932, 1932, 1963, 1963, 1994, 1994, 2025, 1375, 1406, 1406, 1437, 1437, 1468,
+  1468, 1499, 1499, 1530, 1530, 1561, 1561, 1592, 1592, 1623, 1623, 1654, 1654,
+  1685, 1685, 1716, 1716, 1747, 1747, 1778, 1778, 1809, 1809, 1840, 1840, 1871,
+  1871, 1902, 1902, 1933, 1933, 1964, 1964, 1995, 1995, 2026, 1407, 1438, 1438,
+  1469, 1469, 1500, 1500, 1531, 1531, 1562, 1562, 1593, 1593, 1624, 1624, 1655,
+  1655, 1686, 1686, 1717, 1717, 1748, 1748, 1779, 1779, 1810, 1810, 1841, 1841,
+  1872, 1872, 1903, 1903, 1934, 1934, 1965, 1965, 1996, 1996, 2027, 1439, 1470,
+  1470, 1501, 1501, 1532, 1532, 1563, 1563, 1594, 1594, 1625, 1625, 1656, 1656,
+  1687, 1687, 1718, 1718, 1749, 1749, 1780, 1780, 1811, 1811, 1842, 1842, 1873,
+  1873, 1904, 1904, 1935, 1935, 1966, 1966, 1997, 1997, 2028, 1471, 1502, 1502,
+  1533, 1533, 1564, 1564, 1595, 1595, 1626, 1626, 1657, 1657, 1688, 1688, 1719,
+  1719, 1750, 1750, 1781, 1781, 1812, 1812, 1843, 1843, 1874, 1874, 1905, 1905,
+  1936, 1936, 1967, 1967, 1998, 1998, 2029, 1503, 1534, 1534, 1565, 1565, 1596,
+  1596, 1627, 1627, 1658, 1658, 1689, 1689, 1720, 1720, 1751, 1751, 1782, 1782,
+  1813, 1813, 1844, 1844, 1875, 1875, 1906, 1906, 1937, 1937, 1968, 1968, 1999,
+  1999, 2030, 1535, 1566, 1566, 1597, 1597, 1628, 1628, 1659, 1659, 1690, 1690,
+  1721, 1721, 1752, 1752, 1783, 1783, 1814, 1814, 1845, 1845, 1876, 1876, 1907,
+  1907, 1938, 1938, 1969, 1969, 2000, 2000, 2031, 1567, 1598, 1598, 1629, 1629,
+  1660, 1660, 1691, 1691, 1722, 1722, 1753, 1753, 1784, 1784, 1815, 1815, 1846,
+  1846, 1877, 1877, 1908, 1908, 1939, 1939, 1970, 1970, 2001, 2001, 2032, 1599,
+  1630, 1630, 1661, 1661, 1692, 1692, 1723, 1723, 1754, 1754, 1785, 1785, 1816,
+  1816, 1847, 1847, 1878, 1878, 1909, 1909, 1940, 1940, 1971, 1971, 2002, 2002,
+  2033, 1631, 1662, 1662, 1693, 1693, 1724, 1724, 1755, 1755, 1786, 1786, 1817,
+  1817, 1848, 1848, 1879, 1879, 1910, 1910, 1941, 1941, 1972, 1972, 2003, 2003,
+  2034, 1663, 1694, 1694, 1725, 1725, 1756, 1756, 1787, 1787, 1818, 1818, 1849,
+  1849, 1880, 1880, 1911, 1911, 1942, 1942, 1973, 1973, 2004, 2004, 2035, 1695,
+  1726, 1726, 1757, 1757, 1788, 1788, 1819, 1819, 1850, 1850, 1881, 1881, 1912,
+  1912, 1943, 1943, 1974, 1974, 2005, 2005, 2036, 1727, 1758, 1758, 1789, 1789,
+  1820, 1820, 1851, 1851, 1882, 1882, 1913, 1913, 1944, 1944, 1975, 1975, 2006,
+  2006, 2037, 1759, 1790, 1790, 1821, 1821, 1852, 1852, 1883, 1883, 1914, 1914,
+  1945, 1945, 1976, 1976, 2007, 2007, 2038, 1791, 1822, 1822, 1853, 1853, 1884,
+  1884, 1915, 1915, 1946, 1946, 1977, 1977, 2008, 2008, 2039, 1823, 1854, 1854,
+  1885, 1885, 1916, 1916, 1947, 1947, 1978, 1978, 2009, 2009, 2040, 1855, 1886,
+  1886, 1917, 1917, 1948, 1948, 1979, 1979, 2010, 2010, 2041, 1887, 1918, 1918,
+  1949, 1949, 1980, 1980, 2011, 2011, 2042, 1919, 1950, 1950, 1981, 1981, 2012,
+  2012, 2043, 1951, 1982, 1982, 2013, 2013, 2044, 1983, 2014, 2014, 2045, 2015,
+  2046, 0,    0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+                default_scan_64x32_neighbors[2049 * MAX_NEIGHBORS]) = {
+  0,    0,    0,    0,    0,    0,    1,    1,    1,    64,   64,   64,   2,
+  2,    2,    65,   65,   128,  128,  128,  3,    3,    3,    66,   66,   129,
+  129,  192,  192,  192,  4,    4,    4,    67,   67,   130,  130,  193,  193,
+  256,  256,  256,  5,    5,    5,    68,   68,   131,  131,  194,  194,  257,
+  257,  320,  320,  320,  6,    6,    6,    69,   69,   132,  132,  195,  195,
+  258,  258,  321,  321,  384,  384,  384,  7,    7,    7,    70,   70,   133,
+  133,  196,  196,  259,  259,  322,  322,  385,  385,  448,  448,  448,  8,
+  8,    8,    71,   71,   134,  134,  197,  197,  260,  260,  323,  323,  386,
+  386,  449,  449,  512,  512,  512,  9,    9,    9,    72,   72,   135,  135,
+  198,  198,  261,  261,  324,  324,  387,  387,  450,  450,  513,  513,  576,
+  576,  576,  10,   10,   10,   73,   73,   136,  136,  199,  199,  262,  262,
+  325,  325,  388,  388,  451,  451,  514,  514,  577,  577,  640,  640,  640,
+  11,   11,   11,   74,   74,   137,  137,  200,  200,  263,  263,  326,  326,
+  389,  389,  452,  452,  515,  515,  578,  578,  641,  641,  704,  704,  704,
+  12,   12,   12,   75,   75,   138,  138,  201,  201,  264,  264,  327,  327,
+  390,  390,  453,  453,  516,  516,  579,  579,  642,  642,  705,  705,  768,
+  768,  768,  13,   13,   13,   76,   76,   139,  139,  202,  202,  265,  265,
+  328,  328,  391,  391,  454,  454,  517,  517,  580,  580,  643,  643,  706,
+  706,  769,  769,  832,  832,  832,  14,   14,   14,   77,   77,   140,  140,
+  203,  203,  266,  266,  329,  329,  392,  392,  455,  455,  518,  518,  581,
+  581,  644,  644,  707,  707,  770,  770,  833,  833,  896,  896,  896,  15,
+  15,   15,   78,   78,   141,  141,  204,  204,  267,  267,  330,  330,  393,
+  393,  456,  456,  519,  519,  582,  582,  645,  645,  708,  708,  771,  771,
+  834,  834,  897,  897,  960,  960,  960,  16,   16,   16,   79,   79,   142,
+  142,  205,  205,  268,  268,  331,  331,  394,  394,  457,  457,  520,  520,
+  583,  583,  646,  646,  709,  709,  772,  772,  835,  835,  898,  898,  961,
+  961,  1024, 1024, 1024, 17,   17,   17,   80,   80,   143,  143,  206,  206,
+  269,  269,  332,  332,  395,  395,  458,  458,  521,  521,  584,  584,  647,
+  647,  710,  710,  773,  773,  836,  836,  899,  899,  962,  962,  1025, 1025,
+  1088, 1088, 1088, 18,   18,   18,   81,   81,   144,  144,  207,  207,  270,
+  270,  333,  333,  396,  396,  459,  459,  522,  522,  585,  585,  648,  648,
+  711,  711,  774,  774,  837,  837,  900,  900,  963,  963,  1026, 1026, 1089,
+  1089, 1152, 1152, 1152, 19,   19,   19,   82,   82,   145,  145,  208,  208,
+  271,  271,  334,  334,  397,  397,  460,  460,  523,  523,  586,  586,  649,
+  649,  712,  712,  775,  775,  838,  838,  901,  901,  964,  964,  1027, 1027,
+  1090, 1090, 1153, 1153, 1216, 1216, 1216, 20,   20,   20,   83,   83,   146,
+  146,  209,  209,  272,  272,  335,  335,  398,  398,  461,  461,  524,  524,
+  587,  587,  650,  650,  713,  713,  776,  776,  839,  839,  902,  902,  965,
+  965,  1028, 1028, 1091, 1091, 1154, 1154, 1217, 1217, 1280, 1280, 1280, 21,
+  21,   21,   84,   84,   147,  147,  210,  210,  273,  273,  336,  336,  399,
+  399,  462,  462,  525,  525,  588,  588,  651,  651,  714,  714,  777,  777,
+  840,  840,  903,  903,  966,  966,  1029, 1029, 1092, 1092, 1155, 1155, 1218,
+  1218, 1281, 1281, 1344, 1344, 1344, 22,   22,   22,   85,   85,   148,  148,
+  211,  211,  274,  274,  337,  337,  400,  400,  463,  463,  526,  526,  589,
+  589,  652,  652,  715,  715,  778,  778,  841,  841,  904,  904,  967,  967,
+  1030, 1030, 1093, 1093, 1156, 1156, 1219, 1219, 1282, 1282, 1345, 1345, 1408,
+  1408, 1408, 23,   23,   23,   86,   86,   149,  149,  212,  212,  275,  275,
+  338,  338,  401,  401,  464,  464,  527,  527,  590,  590,  653,  653,  716,
+  716,  779,  779,  842,  842,  905,  905,  968,  968,  1031, 1031, 1094, 1094,
+  1157, 1157, 1220, 1220, 1283, 1283, 1346, 1346, 1409, 1409, 1472, 1472, 1472,
+  24,   24,   24,   87,   87,   150,  150,  213,  213,  276,  276,  339,  339,
+  402,  402,  465,  465,  528,  528,  591,  591,  654,  654,  717,  717,  780,
+  780,  843,  843,  906,  906,  969,  969,  1032, 1032, 1095, 1095, 1158, 1158,
+  1221, 1221, 1284, 1284, 1347, 1347, 1410, 1410, 1473, 1473, 1536, 1536, 1536,
+  25,   25,   25,   88,   88,   151,  151,  214,  214,  277,  277,  340,  340,
+  403,  403,  466,  466,  529,  529,  592,  592,  655,  655,  718,  718,  781,
+  781,  844,  844,  907,  907,  970,  970,  1033, 1033, 1096, 1096, 1159, 1159,
+  1222, 1222, 1285, 1285, 1348, 1348, 1411, 1411, 1474, 1474, 1537, 1537, 1600,
+  1600, 1600, 26,   26,   26,   89,   89,   152,  152,  215,  215,  278,  278,
+  341,  341,  404,  404,  467,  467,  530,  530,  593,  593,  656,  656,  719,
+  719,  782,  782,  845,  845,  908,  908,  971,  971,  1034, 1034, 1097, 1097,
+  1160, 1160, 1223, 1223, 1286, 1286, 1349, 1349, 1412, 1412, 1475, 1475, 1538,
+  1538, 1601, 1601, 1664, 1664, 1664, 27,   27,   27,   90,   90,   153,  153,
+  216,  216,  279,  279,  342,  342,  405,  405,  468,  468,  531,  531,  594,
+  594,  657,  657,  720,  720,  783,  783,  846,  846,  909,  909,  972,  972,
+  1035, 1035, 1098, 1098, 1161, 1161, 1224, 1224, 1287, 1287, 1350, 1350, 1413,
+  1413, 1476, 1476, 1539, 1539, 1602, 1602, 1665, 1665, 1728, 1728, 1728, 28,
+  28,   28,   91,   91,   154,  154,  217,  217,  280,  280,  343,  343,  406,
+  406,  469,  469,  532,  532,  595,  595,  658,  658,  721,  721,  784,  784,
+  847,  847,  910,  910,  973,  973,  1036, 1036, 1099, 1099, 1162, 1162, 1225,
+  1225, 1288, 1288, 1351, 1351, 1414, 1414, 1477, 1477, 1540, 1540, 1603, 1603,
+  1666, 1666, 1729, 1729, 1792, 1792, 1792, 29,   29,   29,   92,   92,   155,
+  155,  218,  218,  281,  281,  344,  344,  407,  407,  470,  470,  533,  533,
+  596,  596,  659,  659,  722,  722,  785,  785,  848,  848,  911,  911,  974,
+  974,  1037, 1037, 1100, 1100, 1163, 1163, 1226, 1226, 1289, 1289, 1352, 1352,
+  1415, 1415, 1478, 1478, 1541, 1541, 1604, 1604, 1667, 1667, 1730, 1730, 1793,
+  1793, 1856, 1856, 1856, 30,   30,   30,   93,   93,   156,  156,  219,  219,
+  282,  282,  345,  345,  408,  408,  471,  471,  534,  534,  597,  597,  660,
+  660,  723,  723,  786,  786,  849,  849,  912,  912,  975,  975,  1038, 1038,
+  1101, 1101, 1164, 1164, 1227, 1227, 1290, 1290, 1353, 1353, 1416, 1416, 1479,
+  1479, 1542, 1542, 1605, 1605, 1668, 1668, 1731, 1731, 1794, 1794, 1857, 1857,
+  1920, 1920, 1920, 31,   31,   31,   94,   94,   157,  157,  220,  220,  283,
+  283,  346,  346,  409,  409,  472,  472,  535,  535,  598,  598,  661,  661,
+  724,  724,  787,  787,  850,  850,  913,  913,  976,  976,  1039, 1039, 1102,
+  1102, 1165, 1165, 1228, 1228, 1291, 1291, 1354, 1354, 1417, 1417, 1480, 1480,
+  1543, 1543, 1606, 1606, 1669, 1669, 1732, 1732, 1795, 1795, 1858, 1858, 1921,
+  1921, 1984, 32,   32,   32,   95,   95,   158,  158,  221,  221,  284,  284,
+  347,  347,  410,  410,  473,  473,  536,  536,  599,  599,  662,  662,  725,
+  725,  788,  788,  851,  851,  914,  914,  977,  977,  1040, 1040, 1103, 1103,
+  1166, 1166, 1229, 1229, 1292, 1292, 1355, 1355, 1418, 1418, 1481, 1481, 1544,
+  1544, 1607, 1607, 1670, 1670, 1733, 1733, 1796, 1796, 1859, 1859, 1922, 1922,
+  1985, 33,   33,   33,   96,   96,   159,  159,  222,  222,  285,  285,  348,
+  348,  411,  411,  474,  474,  537,  537,  600,  600,  663,  663,  726,  726,
+  789,  789,  852,  852,  915,  915,  978,  978,  1041, 1041, 1104, 1104, 1167,
+  1167, 1230, 1230, 1293, 1293, 1356, 1356, 1419, 1419, 1482, 1482, 1545, 1545,
+  1608, 1608, 1671, 1671, 1734, 1734, 1797, 1797, 1860, 1860, 1923, 1923, 1986,
+  34,   34,   34,   97,   97,   160,  160,  223,  223,  286,  286,  349,  349,
+  412,  412,  475,  475,  538,  538,  601,  601,  664,  664,  727,  727,  790,
+  790,  853,  853,  916,  916,  979,  979,  1042, 1042, 1105, 1105, 1168, 1168,
+  1231, 1231, 1294, 1294, 1357, 1357, 1420, 1420, 1483, 1483, 1546, 1546, 1609,
+  1609, 1672, 1672, 1735, 1735, 1798, 1798, 1861, 1861, 1924, 1924, 1987, 35,
+  35,   35,   98,   98,   161,  161,  224,  224,  287,  287,  350,  350,  413,
+  413,  476,  476,  539,  539,  602,  602,  665,  665,  728,  728,  791,  791,
+  854,  854,  917,  917,  980,  980,  1043, 1043, 1106, 1106, 1169, 1169, 1232,
+  1232, 1295, 1295, 1358, 1358, 1421, 1421, 1484, 1484, 1547, 1547, 1610, 1610,
+  1673, 1673, 1736, 1736, 1799, 1799, 1862, 1862, 1925, 1925, 1988, 36,   36,
+  36,   99,   99,   162,  162,  225,  225,  288,  288,  351,  351,  414,  414,
+  477,  477,  540,  540,  603,  603,  666,  666,  729,  729,  792,  792,  855,
+  855,  918,  918,  981,  981,  1044, 1044, 1107, 1107, 1170, 1170, 1233, 1233,
+  1296, 1296, 1359, 1359, 1422, 1422, 1485, 1485, 1548, 1548, 1611, 1611, 1674,
+  1674, 1737, 1737, 1800, 1800, 1863, 1863, 1926, 1926, 1989, 37,   37,   37,
+  100,  100,  163,  163,  226,  226,  289,  289,  352,  352,  415,  415,  478,
+  478,  541,  541,  604,  604,  667,  667,  730,  730,  793,  793,  856,  856,
+  919,  919,  982,  982,  1045, 1045, 1108, 1108, 1171, 1171, 1234, 1234, 1297,
+  1297, 1360, 1360, 1423, 1423, 1486, 1486, 1549, 1549, 1612, 1612, 1675, 1675,
+  1738, 1738, 1801, 1801, 1864, 1864, 1927, 1927, 1990, 38,   38,   38,   101,
+  101,  164,  164,  227,  227,  290,  290,  353,  353,  416,  416,  479,  479,
+  542,  542,  605,  605,  668,  668,  731,  731,  794,  794,  857,  857,  920,
+  920,  983,  983,  1046, 1046, 1109, 1109, 1172, 1172, 1235, 1235, 1298, 1298,
+  1361, 1361, 1424, 1424, 1487, 1487, 1550, 1550, 1613, 1613, 1676, 1676, 1739,
+  1739, 1802, 1802, 1865, 1865, 1928, 1928, 1991, 39,   39,   39,   102,  102,
+  165,  165,  228,  228,  291,  291,  354,  354,  417,  417,  480,  480,  543,
+  543,  606,  606,  669,  669,  732,  732,  795,  795,  858,  858,  921,  921,
+  984,  984,  1047, 1047, 1110, 1110, 1173, 1173, 1236, 1236, 1299, 1299, 1362,
+  1362, 1425, 1425, 1488, 1488, 1551, 1551, 1614, 1614, 1677, 1677, 1740, 1740,
+  1803, 1803, 1866, 1866, 1929, 1929, 1992, 40,   40,   40,   103,  103,  166,
+  166,  229,  229,  292,  292,  355,  355,  418,  418,  481,  481,  544,  544,
+  607,  607,  670,  670,  733,  733,  796,  796,  859,  859,  922,  922,  985,
+  985,  1048, 1048, 1111, 1111, 1174, 1174, 1237, 1237, 1300, 1300, 1363, 1363,
+  1426, 1426, 1489, 1489, 1552, 1552, 1615, 1615, 1678, 1678, 1741, 1741, 1804,
+  1804, 1867, 1867, 1930, 1930, 1993, 41,   41,   41,   104,  104,  167,  167,
+  230,  230,  293,  293,  356,  356,  419,  419,  482,  482,  545,  545,  608,
+  608,  671,  671,  734,  734,  797,  797,  860,  860,  923,  923,  986,  986,
+  1049, 1049, 1112, 1112, 1175, 1175, 1238, 1238, 1301, 1301, 1364, 1364, 1427,
+  1427, 1490, 1490, 1553, 1553, 1616, 1616, 1679, 1679, 1742, 1742, 1805, 1805,
+  1868, 1868, 1931, 1931, 1994, 42,   42,   42,   105,  105,  168,  168,  231,
+  231,  294,  294,  357,  357,  420,  420,  483,  483,  546,  546,  609,  609,
+  672,  672,  735,  735,  798,  798,  861,  861,  924,  924,  987,  987,  1050,
+  1050, 1113, 1113, 1176, 1176, 1239, 1239, 1302, 1302, 1365, 1365, 1428, 1428,
+  1491, 1491, 1554, 1554, 1617, 1617, 1680, 1680, 1743, 1743, 1806, 1806, 1869,
+  1869, 1932, 1932, 1995, 43,   43,   43,   106,  106,  169,  169,  232,  232,
+  295,  295,  358,  358,  421,  421,  484,  484,  547,  547,  610,  610,  673,
+  673,  736,  736,  799,  799,  862,  862,  925,  925,  988,  988,  1051, 1051,
+  1114, 1114, 1177, 1177, 1240, 1240, 1303, 1303, 1366, 1366, 1429, 1429, 1492,
+  1492, 1555, 1555, 1618, 1618, 1681, 1681, 1744, 1744, 1807, 1807, 1870, 1870,
+  1933, 1933, 1996, 44,   44,   44,   107,  107,  170,  170,  233,  233,  296,
+  296,  359,  359,  422,  422,  485,  485,  548,  548,  611,  611,  674,  674,
+  737,  737,  800,  800,  863,  863,  926,  926,  989,  989,  1052, 1052, 1115,
+  1115, 1178, 1178, 1241, 1241, 1304, 1304, 1367, 1367, 1430, 1430, 1493, 1493,
+  1556, 1556, 1619, 1619, 1682, 1682, 1745, 1745, 1808, 1808, 1871, 1871, 1934,
+  1934, 1997, 45,   45,   45,   108,  108,  171,  171,  234,  234,  297,  297,
+  360,  360,  423,  423,  486,  486,  549,  549,  612,  612,  675,  675,  738,
+  738,  801,  801,  864,  864,  927,  927,  990,  990,  1053, 1053, 1116, 1116,
+  1179, 1179, 1242, 1242, 1305, 1305, 1368, 1368, 1431, 1431, 1494, 1494, 1557,
+  1557, 1620, 1620, 1683, 1683, 1746, 1746, 1809, 1809, 1872, 1872, 1935, 1935,
+  1998, 46,   46,   46,   109,  109,  172,  172,  235,  235,  298,  298,  361,
+  361,  424,  424,  487,  487,  550,  550,  613,  613,  676,  676,  739,  739,
+  802,  802,  865,  865,  928,  928,  991,  991,  1054, 1054, 1117, 1117, 1180,
+  1180, 1243, 1243, 1306, 1306, 1369, 1369, 1432, 1432, 1495, 1495, 1558, 1558,
+  1621, 1621, 1684, 1684, 1747, 1747, 1810, 1810, 1873, 1873, 1936, 1936, 1999,
+  47,   47,   47,   110,  110,  173,  173,  236,  236,  299,  299,  362,  362,
+  425,  425,  488,  488,  551,  551,  614,  614,  677,  677,  740,  740,  803,
+  803,  866,  866,  929,  929,  992,  992,  1055, 1055, 1118, 1118, 1181, 1181,
+  1244, 1244, 1307, 1307, 1370, 1370, 1433, 1433, 1496, 1496, 1559, 1559, 1622,
+  1622, 1685, 1685, 1748, 1748, 1811, 1811, 1874, 1874, 1937, 1937, 2000, 48,
+  48,   48,   111,  111,  174,  174,  237,  237,  300,  300,  363,  363,  426,
+  426,  489,  489,  552,  552,  615,  615,  678,  678,  741,  741,  804,  804,
+  867,  867,  930,  930,  993,  993,  1056, 1056, 1119, 1119, 1182, 1182, 1245,
+  1245, 1308, 1308, 1371, 1371, 1434, 1434, 1497, 1497, 1560, 1560, 1623, 1623,
+  1686, 1686, 1749, 1749, 1812, 1812, 1875, 1875, 1938, 1938, 2001, 49,   49,
+  49,   112,  112,  175,  175,  238,  238,  301,  301,  364,  364,  427,  427,
+  490,  490,  553,  553,  616,  616,  679,  679,  742,  742,  805,  805,  868,
+  868,  931,  931,  994,  994,  1057, 1057, 1120, 1120, 1183, 1183, 1246, 1246,
+  1309, 1309, 1372, 1372, 1435, 1435, 1498, 1498, 1561, 1561, 1624, 1624, 1687,
+  1687, 1750, 1750, 1813, 1813, 1876, 1876, 1939, 1939, 2002, 50,   50,   50,
+  113,  113,  176,  176,  239,  239,  302,  302,  365,  365,  428,  428,  491,
+  491,  554,  554,  617,  617,  680,  680,  743,  743,  806,  806,  869,  869,
+  932,  932,  995,  995,  1058, 1058, 1121, 1121, 1184, 1184, 1247, 1247, 1310,
+  1310, 1373, 1373, 1436, 1436, 1499, 1499, 1562, 1562, 1625, 1625, 1688, 1688,
+  1751, 1751, 1814, 1814, 1877, 1877, 1940, 1940, 2003, 51,   51,   51,   114,
+  114,  177,  177,  240,  240,  303,  303,  366,  366,  429,  429,  492,  492,
+  555,  555,  618,  618,  681,  681,  744,  744,  807,  807,  870,  870,  933,
+  933,  996,  996,  1059, 1059, 1122, 1122, 1185, 1185, 1248, 1248, 1311, 1311,
+  1374, 1374, 1437, 1437, 1500, 1500, 1563, 1563, 1626, 1626, 1689, 1689, 1752,
+  1752, 1815, 1815, 1878, 1878, 1941, 1941, 2004, 52,   52,   52,   115,  115,
+  178,  178,  241,  241,  304,  304,  367,  367,  430,  430,  493,  493,  556,
+  556,  619,  619,  682,  682,  745,  745,  808,  808,  871,  871,  934,  934,
+  997,  997,  1060, 1060, 1123, 1123, 1186, 1186, 1249, 1249, 1312, 1312, 1375,
+  1375, 1438, 1438, 1501, 1501, 1564, 1564, 1627, 1627, 1690, 1690, 1753, 1753,
+  1816, 1816, 1879, 1879, 1942, 1942, 2005, 53,   53,   53,   116,  116,  179,
+  179,  242,  242,  305,  305,  368,  368,  431,  431,  494,  494,  557,  557,
+  620,  620,  683,  683,  746,  746,  809,  809,  872,  872,  935,  935,  998,
+  998,  1061, 1061, 1124, 1124, 1187, 1187, 1250, 1250, 1313, 1313, 1376, 1376,
+  1439, 1439, 1502, 1502, 1565, 1565, 1628, 1628, 1691, 1691, 1754, 1754, 1817,
+  1817, 1880, 1880, 1943, 1943, 2006, 54,   54,   54,   117,  117,  180,  180,
+  243,  243,  306,  306,  369,  369,  432,  432,  495,  495,  558,  558,  621,
+  621,  684,  684,  747,  747,  810,  810,  873,  873,  936,  936,  999,  999,
+  1062, 1062, 1125, 1125, 1188, 1188, 1251, 1251, 1314, 1314, 1377, 1377, 1440,
+  1440, 1503, 1503, 1566, 1566, 1629, 1629, 1692, 1692, 1755, 1755, 1818, 1818,
+  1881, 1881, 1944, 1944, 2007, 55,   55,   55,   118,  118,  181,  181,  244,
+  244,  307,  307,  370,  370,  433,  433,  496,  496,  559,  559,  622,  622,
+  685,  685,  748,  748,  811,  811,  874,  874,  937,  937,  1000, 1000, 1063,
+  1063, 1126, 1126, 1189, 1189, 1252, 1252, 1315, 1315, 1378, 1378, 1441, 1441,
+  1504, 1504, 1567, 1567, 1630, 1630, 1693, 1693, 1756, 1756, 1819, 1819, 1882,
+  1882, 1945, 1945, 2008, 56,   56,   56,   119,  119,  182,  182,  245,  245,
+  308,  308,  371,  371,  434,  434,  497,  497,  560,  560,  623,  623,  686,
+  686,  749,  749,  812,  812,  875,  875,  938,  938,  1001, 1001, 1064, 1064,
+  1127, 1127, 1190, 1190, 1253, 1253, 1316, 1316, 1379, 1379, 1442, 1442, 1505,
+  1505, 1568, 1568, 1631, 1631, 1694, 1694, 1757, 1757, 1820, 1820, 1883, 1883,
+  1946, 1946, 2009, 57,   57,   57,   120,  120,  183,  183,  246,  246,  309,
+  309,  372,  372,  435,  435,  498,  498,  561,  561,  624,  624,  687,  687,
+  750,  750,  813,  813,  876,  876,  939,  939,  1002, 1002, 1065, 1065, 1128,
+  1128, 1191, 1191, 1254, 1254, 1317, 1317, 1380, 1380, 1443, 1443, 1506, 1506,
+  1569, 1569, 1632, 1632, 1695, 1695, 1758, 1758, 1821, 1821, 1884, 1884, 1947,
+  1947, 2010, 58,   58,   58,   121,  121,  184,  184,  247,  247,  310,  310,
+  373,  373,  436,  436,  499,  499,  562,  562,  625,  625,  688,  688,  751,
+  751,  814,  814,  877,  877,  940,  940,  1003, 1003, 1066, 1066, 1129, 1129,
+  1192, 1192, 1255, 1255, 1318, 1318, 1381, 1381, 1444, 1444, 1507, 1507, 1570,
+  1570, 1633, 1633, 1696, 1696, 1759, 1759, 1822, 1822, 1885, 1885, 1948, 1948,
+  2011, 59,   59,   59,   122,  122,  185,  185,  248,  248,  311,  311,  374,
+  374,  437,  437,  500,  500,  563,  563,  626,  626,  689,  689,  752,  752,
+  815,  815,  878,  878,  941,  941,  1004, 1004, 1067, 1067, 1130, 1130, 1193,
+  1193, 1256, 1256, 1319, 1319, 1382, 1382, 1445, 1445, 1508, 1508, 1571, 1571,
+  1634, 1634, 1697, 1697, 1760, 1760, 1823, 1823, 1886, 1886, 1949, 1949, 2012,
+  60,   60,   60,   123,  123,  186,  186,  249,  249,  312,  312,  375,  375,
+  438,  438,  501,  501,  564,  564,  627,  627,  690,  690,  753,  753,  816,
+  816,  879,  879,  942,  942,  1005, 1005, 1068, 1068, 1131, 1131, 1194, 1194,
+  1257, 1257, 1320, 1320, 1383, 1383, 1446, 1446, 1509, 1509, 1572, 1572, 1635,
+  1635, 1698, 1698, 1761, 1761, 1824, 1824, 1887, 1887, 1950, 1950, 2013, 61,
+  61,   61,   124,  124,  187,  187,  250,  250,  313,  313,  376,  376,  439,
+  439,  502,  502,  565,  565,  628,  628,  691,  691,  754,  754,  817,  817,
+  880,  880,  943,  943,  1006, 1006, 1069, 1069, 1132, 1132, 1195, 1195, 1258,
+  1258, 1321, 1321, 1384, 1384, 1447, 1447, 1510, 1510, 1573, 1573, 1636, 1636,
+  1699, 1699, 1762, 1762, 1825, 1825, 1888, 1888, 1951, 1951, 2014, 62,   62,
+  62,   125,  125,  188,  188,  251,  251,  314,  314,  377,  377,  440,  440,
+  503,  503,  566,  566,  629,  629,  692,  692,  755,  755,  818,  818,  881,
+  881,  944,  944,  1007, 1007, 1070, 1070, 1133, 1133, 1196, 1196, 1259, 1259,
+  1322, 1322, 1385, 1385, 1448, 1448, 1511, 1511, 1574, 1574, 1637, 1637, 1700,
+  1700, 1763, 1763, 1826, 1826, 1889, 1889, 1952, 1952, 2015, 63,   126,  126,
+  189,  189,  252,  252,  315,  315,  378,  378,  441,  441,  504,  504,  567,
+  567,  630,  630,  693,  693,  756,  756,  819,  819,  882,  882,  945,  945,
+  1008, 1008, 1071, 1071, 1134, 1134, 1197, 1197, 1260, 1260, 1323, 1323, 1386,
+  1386, 1449, 1449, 1512, 1512, 1575, 1575, 1638, 1638, 1701, 1701, 1764, 1764,
+  1827, 1827, 1890, 1890, 1953, 1953, 2016, 127,  190,  190,  253,  253,  316,
+  316,  379,  379,  442,  442,  505,  505,  568,  568,  631,  631,  694,  694,
+  757,  757,  820,  820,  883,  883,  946,  946,  1009, 1009, 1072, 1072, 1135,
+  1135, 1198, 1198, 1261, 1261, 1324, 1324, 1387, 1387, 1450, 1450, 1513, 1513,
+  1576, 1576, 1639, 1639, 1702, 1702, 1765, 1765, 1828, 1828, 1891, 1891, 1954,
+  1954, 2017, 191,  254,  254,  317,  317,  380,  380,  443,  443,  506,  506,
+  569,  569,  632,  632,  695,  695,  758,  758,  821,  821,  884,  884,  947,
+  947,  1010, 1010, 1073, 1073, 1136, 1136, 1199, 1199, 1262, 1262, 1325, 1325,
+  1388, 1388, 1451, 1451, 1514, 1514, 1577, 1577, 1640, 1640, 1703, 1703, 1766,
+  1766, 1829, 1829, 1892, 1892, 1955, 1955, 2018, 255,  318,  318,  381,  381,
+  444,  444,  507,  507,  570,  570,  633,  633,  696,  696,  759,  759,  822,
+  822,  885,  885,  948,  948,  1011, 1011, 1074, 1074, 1137, 1137, 1200, 1200,
+  1263, 1263, 1326, 1326, 1389, 1389, 1452, 1452, 1515, 1515, 1578, 1578, 1641,
+  1641, 1704, 1704, 1767, 1767, 1830, 1830, 1893, 1893, 1956, 1956, 2019, 319,
+  382,  382,  445,  445,  508,  508,  571,  571,  634,  634,  697,  697,  760,
+  760,  823,  823,  886,  886,  949,  949,  1012, 1012, 1075, 1075, 1138, 1138,
+  1201, 1201, 1264, 1264, 1327, 1327, 1390, 1390, 1453, 1453, 1516, 1516, 1579,
+  1579, 1642, 1642, 1705, 1705, 1768, 1768, 1831, 1831, 1894, 1894, 1957, 1957,
+  2020, 383,  446,  446,  509,  509,  572,  572,  635,  635,  698,  698,  761,
+  761,  824,  824,  887,  887,  950,  950,  1013, 1013, 1076, 1076, 1139, 1139,
+  1202, 1202, 1265, 1265, 1328, 1328, 1391, 1391, 1454, 1454, 1517, 1517, 1580,
+  1580, 1643, 1643, 1706, 1706, 1769, 1769, 1832, 1832, 1895, 1895, 1958, 1958,
+  2021, 447,  510,  510,  573,  573,  636,  636,  699,  699,  762,  762,  825,
+  825,  888,  888,  951,  951,  1014, 1014, 1077, 1077, 1140, 1140, 1203, 1203,
+  1266, 1266, 1329, 1329, 1392, 1392, 1455, 1455, 1518, 1518, 1581, 1581, 1644,
+  1644, 1707, 1707, 1770, 1770, 1833, 1833, 1896, 1896, 1959, 1959, 2022, 511,
+  574,  574,  637,  637,  700,  700,  763,  763,  826,  826,  889,  889,  952,
+  952,  1015, 1015, 1078, 1078, 1141, 1141, 1204, 1204, 1267, 1267, 1330, 1330,
+  1393, 1393, 1456, 1456, 1519, 1519, 1582, 1582, 1645, 1645, 1708, 1708, 1771,
+  1771, 1834, 1834, 1897, 1897, 1960, 1960, 2023, 575,  638,  638,  701,  701,
+  764,  764,  827,  827,  890,  890,  953,  953,  1016, 1016, 1079, 1079, 1142,
+  1142, 1205, 1205, 1268, 1268, 1331, 1331, 1394, 1394, 1457, 1457, 1520, 1520,
+  1583, 1583, 1646, 1646, 1709, 1709, 1772, 1772, 1835, 1835, 1898, 1898, 1961,
+  1961, 2024, 639,  702,  702,  765,  765,  828,  828,  891,  891,  954,  954,
+  1017, 1017, 1080, 1080, 1143, 1143, 1206, 1206, 1269, 1269, 1332, 1332, 1395,
+  1395, 1458, 1458, 1521, 1521, 1584, 1584, 1647, 1647, 1710, 1710, 1773, 1773,
+  1836, 1836, 1899, 1899, 1962, 1962, 2025, 703,  766,  766,  829,  829,  892,
+  892,  955,  955,  1018, 1018, 1081, 1081, 1144, 1144, 1207, 1207, 1270, 1270,
+  1333, 1333, 1396, 1396, 1459, 1459, 1522, 1522, 1585, 1585, 1648, 1648, 1711,
+  1711, 1774, 1774, 1837, 1837, 1900, 1900, 1963, 1963, 2026, 767,  830,  830,
+  893,  893,  956,  956,  1019, 1019, 1082, 1082, 1145, 1145, 1208, 1208, 1271,
+  1271, 1334, 1334, 1397, 1397, 1460, 1460, 1523, 1523, 1586, 1586, 1649, 1649,
+  1712, 1712, 1775, 1775, 1838, 1838, 1901, 1901, 1964, 1964, 2027, 831,  894,
+  894,  957,  957,  1020, 1020, 1083, 1083, 1146, 1146, 1209, 1209, 1272, 1272,
+  1335, 1335, 1398, 1398, 1461, 1461, 1524, 1524, 1587, 1587, 1650, 1650, 1713,
+  1713, 1776, 1776, 1839, 1839, 1902, 1902, 1965, 1965, 2028, 895,  958,  958,
+  1021, 1021, 1084, 1084, 1147, 1147, 1210, 1210, 1273, 1273, 1336, 1336, 1399,
+  1399, 1462, 1462, 1525, 1525, 1588, 1588, 1651, 1651, 1714, 1714, 1777, 1777,
+  1840, 1840, 1903, 1903, 1966, 1966, 2029, 959,  1022, 1022, 1085, 1085, 1148,
+  1148, 1211, 1211, 1274, 1274, 1337, 1337, 1400, 1400, 1463, 1463, 1526, 1526,
+  1589, 1589, 1652, 1652, 1715, 1715, 1778, 1778, 1841, 1841, 1904, 1904, 1967,
+  1967, 2030, 1023, 1086, 1086, 1149, 1149, 1212, 1212, 1275, 1275, 1338, 1338,
+  1401, 1401, 1464, 1464, 1527, 1527, 1590, 1590, 1653, 1653, 1716, 1716, 1779,
+  1779, 1842, 1842, 1905, 1905, 1968, 1968, 2031, 1087, 1150, 1150, 1213, 1213,
+  1276, 1276, 1339, 1339, 1402, 1402, 1465, 1465, 1528, 1528, 1591, 1591, 1654,
+  1654, 1717, 1717, 1780, 1780, 1843, 1843, 1906, 1906, 1969, 1969, 2032, 1151,
+  1214, 1214, 1277, 1277, 1340, 1340, 1403, 1403, 1466, 1466, 1529, 1529, 1592,
+  1592, 1655, 1655, 1718, 1718, 1781, 1781, 1844, 1844, 1907, 1907, 1970, 1970,
+  2033, 1215, 1278, 1278, 1341, 1341, 1404, 1404, 1467, 1467, 1530, 1530, 1593,
+  1593, 1656, 1656, 1719, 1719, 1782, 1782, 1845, 1845, 1908, 1908, 1971, 1971,
+  2034, 1279, 1342, 1342, 1405, 1405, 1468, 1468, 1531, 1531, 1594, 1594, 1657,
+  1657, 1720, 1720, 1783, 1783, 1846, 1846, 1909, 1909, 1972, 1972, 2035, 1343,
+  1406, 1406, 1469, 1469, 1532, 1532, 1595, 1595, 1658, 1658, 1721, 1721, 1784,
+  1784, 1847, 1847, 1910, 1910, 1973, 1973, 2036, 1407, 1470, 1470, 1533, 1533,
+  1596, 1596, 1659, 1659, 1722, 1722, 1785, 1785, 1848, 1848, 1911, 1911, 1974,
+  1974, 2037, 1471, 1534, 1534, 1597, 1597, 1660, 1660, 1723, 1723, 1786, 1786,
+  1849, 1849, 1912, 1912, 1975, 1975, 2038, 1535, 1598, 1598, 1661, 1661, 1724,
+  1724, 1787, 1787, 1850, 1850, 1913, 1913, 1976, 1976, 2039, 1599, 1662, 1662,
+  1725, 1725, 1788, 1788, 1851, 1851, 1914, 1914, 1977, 1977, 2040, 1663, 1726,
+  1726, 1789, 1789, 1852, 1852, 1915, 1915, 1978, 1978, 2041, 1727, 1790, 1790,
+  1853, 1853, 1916, 1916, 1979, 1979, 2042, 1791, 1854, 1854, 1917, 1917, 1980,
+  1980, 2043, 1855, 1918, 1918, 1981, 1981, 2044, 1919, 1982, 1982, 2045, 1983,
+  2046, 0,    0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
                 default_scan_64x64_neighbors[4097 * MAX_NEIGHBORS]) = {
   0,    0,    0,    0,    0,    0,    1,    64,   1,    1,    64,   64,   2,
   65,   65,   128,  66,   129,  2,    2,    128,  128,  3,    66,   129,  192,
@@ -5398,6 +6360,328 @@ DECLARE_ALIGNED(16, static const int16_t, av1_qtr_iscan_32x32[1024]) = {
 };
 
 #if CONFIG_TX64X64
+DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x64[2048]) = {
+  0,    1,    3,    6,    10,   15,   21,   28,   36,   45,   55,   66,   78,
+  91,   105,  120,  136,  153,  171,  190,  210,  231,  253,  276,  300,  325,
+  351,  378,  406,  435,  465,  496,  2,    4,    7,    11,   16,   22,   29,
+  37,   46,   56,   67,   79,   92,   106,  121,  137,  154,  172,  191,  211,
+  232,  254,  277,  301,  326,  352,  379,  407,  436,  466,  497,  528,  5,
+  8,    12,   17,   23,   30,   38,   47,   57,   68,   80,   93,   107,  122,
+  138,  155,  173,  192,  212,  233,  255,  278,  302,  327,  353,  380,  408,
+  437,  467,  498,  529,  560,  9,    13,   18,   24,   31,   39,   48,   58,
+  69,   81,   94,   108,  123,  139,  156,  174,  193,  213,  234,  256,  279,
+  303,  328,  354,  381,  409,  438,  468,  499,  530,  561,  592,  14,   19,
+  25,   32,   40,   49,   59,   70,   82,   95,   109,  124,  140,  157,  175,
+  194,  214,  235,  257,  280,  304,  329,  355,  382,  410,  439,  469,  500,
+  531,  562,  593,  624,  20,   26,   33,   41,   50,   60,   71,   83,   96,
+  110,  125,  141,  158,  176,  195,  215,  236,  258,  281,  305,  330,  356,
+  383,  411,  440,  470,  501,  532,  563,  594,  625,  656,  27,   34,   42,
+  51,   61,   72,   84,   97,   111,  126,  142,  159,  177,  196,  216,  237,
+  259,  282,  306,  331,  357,  384,  412,  441,  471,  502,  533,  564,  595,
+  626,  657,  688,  35,   43,   52,   62,   73,   85,   98,   112,  127,  143,
+  160,  178,  197,  217,  238,  260,  283,  307,  332,  358,  385,  413,  442,
+  472,  503,  534,  565,  596,  627,  658,  689,  720,  44,   53,   63,   74,
+  86,   99,   113,  128,  144,  161,  179,  198,  218,  239,  261,  284,  308,
+  333,  359,  386,  414,  443,  473,  504,  535,  566,  597,  628,  659,  690,
+  721,  752,  54,   64,   75,   87,   100,  114,  129,  145,  162,  180,  199,
+  219,  240,  262,  285,  309,  334,  360,  387,  415,  444,  474,  505,  536,
+  567,  598,  629,  660,  691,  722,  753,  784,  65,   76,   88,   101,  115,
+  130,  146,  163,  181,  200,  220,  241,  263,  286,  310,  335,  361,  388,
+  416,  445,  475,  506,  537,  568,  599,  630,  661,  692,  723,  754,  785,
+  816,  77,   89,   102,  116,  131,  147,  164,  182,  201,  221,  242,  264,
+  287,  311,  336,  362,  389,  417,  446,  476,  507,  538,  569,  600,  631,
+  662,  693,  724,  755,  786,  817,  848,  90,   103,  117,  132,  148,  165,
+  183,  202,  222,  243,  265,  288,  312,  337,  363,  390,  418,  447,  477,
+  508,  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,
+  104,  118,  133,  149,  166,  184,  203,  223,  244,  266,  289,  313,  338,
+  364,  391,  419,  448,  478,  509,  540,  571,  602,  633,  664,  695,  726,
+  757,  788,  819,  850,  881,  912,  119,  134,  150,  167,  185,  204,  224,
+  245,  267,  290,  314,  339,  365,  392,  420,  449,  479,  510,  541,  572,
+  603,  634,  665,  696,  727,  758,  789,  820,  851,  882,  913,  944,  135,
+  151,  168,  186,  205,  225,  246,  268,  291,  315,  340,  366,  393,  421,
+  450,  480,  511,  542,  573,  604,  635,  666,  697,  728,  759,  790,  821,
+  852,  883,  914,  945,  976,  152,  169,  187,  206,  226,  247,  269,  292,
+  316,  341,  367,  394,  422,  451,  481,  512,  543,  574,  605,  636,  667,
+  698,  729,  760,  791,  822,  853,  884,  915,  946,  977,  1008, 170,  188,
+  207,  227,  248,  270,  293,  317,  342,  368,  395,  423,  452,  482,  513,
+  544,  575,  606,  637,  668,  699,  730,  761,  792,  823,  854,  885,  916,
+  947,  978,  1009, 1040, 189,  208,  228,  249,  271,  294,  318,  343,  369,
+  396,  424,  453,  483,  514,  545,  576,  607,  638,  669,  700,  731,  762,
+  793,  824,  855,  886,  917,  948,  979,  1010, 1041, 1072, 209,  229,  250,
+  272,  295,  319,  344,  370,  397,  425,  454,  484,  515,  546,  577,  608,
+  639,  670,  701,  732,  763,  794,  825,  856,  887,  918,  949,  980,  1011,
+  1042, 1073, 1104, 230,  251,  273,  296,  320,  345,  371,  398,  426,  455,
+  485,  516,  547,  578,  609,  640,  671,  702,  733,  764,  795,  826,  857,
+  888,  919,  950,  981,  1012, 1043, 1074, 1105, 1136, 252,  274,  297,  321,
+  346,  372,  399,  427,  456,  486,  517,  548,  579,  610,  641,  672,  703,
+  734,  765,  796,  827,  858,  889,  920,  951,  982,  1013, 1044, 1075, 1106,
+  1137, 1168, 275,  298,  322,  347,  373,  400,  428,  457,  487,  518,  549,
+  580,  611,  642,  673,  704,  735,  766,  797,  828,  859,  890,  921,  952,
+  983,  1014, 1045, 1076, 1107, 1138, 1169, 1200, 299,  323,  348,  374,  401,
+  429,  458,  488,  519,  550,  581,  612,  643,  674,  705,  736,  767,  798,
+  829,  860,  891,  922,  953,  984,  1015, 1046, 1077, 1108, 1139, 1170, 1201,
+  1232, 324,  349,  375,  402,  430,  459,  489,  520,  551,  582,  613,  644,
+  675,  706,  737,  768,  799,  830,  861,  892,  923,  954,  985,  1016, 1047,
+  1078, 1109, 1140, 1171, 1202, 1233, 1264, 350,  376,  403,  431,  460,  490,
+  521,  552,  583,  614,  645,  676,  707,  738,  769,  800,  831,  862,  893,
+  924,  955,  986,  1017, 1048, 1079, 1110, 1141, 1172, 1203, 1234, 1265, 1296,
+  377,  404,  432,  461,  491,  522,  553,  584,  615,  646,  677,  708,  739,
+  770,  801,  832,  863,  894,  925,  956,  987,  1018, 1049, 1080, 1111, 1142,
+  1173, 1204, 1235, 1266, 1297, 1328, 405,  433,  462,  492,  523,  554,  585,
+  616,  647,  678,  709,  740,  771,  802,  833,  864,  895,  926,  957,  988,
+  1019, 1050, 1081, 1112, 1143, 1174, 1205, 1236, 1267, 1298, 1329, 1360, 434,
+  463,  493,  524,  555,  586,  617,  648,  679,  710,  741,  772,  803,  834,
+  865,  896,  927,  958,  989,  1020, 1051, 1082, 1113, 1144, 1175, 1206, 1237,
+  1268, 1299, 1330, 1361, 1392, 464,  494,  525,  556,  587,  618,  649,  680,
+  711,  742,  773,  804,  835,  866,  897,  928,  959,  990,  1021, 1052, 1083,
+  1114, 1145, 1176, 1207, 1238, 1269, 1300, 1331, 1362, 1393, 1424, 495,  526,
+  557,  588,  619,  650,  681,  712,  743,  774,  805,  836,  867,  898,  929,
+  960,  991,  1022, 1053, 1084, 1115, 1146, 1177, 1208, 1239, 1270, 1301, 1332,
+  1363, 1394, 1425, 1456, 527,  558,  589,  620,  651,  682,  713,  744,  775,
+  806,  837,  868,  899,  930,  961,  992,  1023, 1054, 1085, 1116, 1147, 1178,
+  1209, 1240, 1271, 1302, 1333, 1364, 1395, 1426, 1457, 1488, 559,  590,  621,
+  652,  683,  714,  745,  776,  807,  838,  869,  900,  931,  962,  993,  1024,
+  1055, 1086, 1117, 1148, 1179, 1210, 1241, 1272, 1303, 1334, 1365, 1396, 1427,
+  1458, 1489, 1520, 591,  622,  653,  684,  715,  746,  777,  808,  839,  870,
+  901,  932,  963,  994,  1025, 1056, 1087, 1118, 1149, 1180, 1211, 1242, 1273,
+  1304, 1335, 1366, 1397, 1428, 1459, 1490, 1521, 1552, 623,  654,  685,  716,
+  747,  778,  809,  840,  871,  902,  933,  964,  995,  1026, 1057, 1088, 1119,
+  1150, 1181, 1212, 1243, 1274, 1305, 1336, 1367, 1398, 1429, 1460, 1491, 1522,
+  1553, 1583, 655,  686,  717,  748,  779,  810,  841,  872,  903,  934,  965,
+  996,  1027, 1058, 1089, 1120, 1151, 1182, 1213, 1244, 1275, 1306, 1337, 1368,
+  1399, 1430, 1461, 1492, 1523, 1554, 1584, 1613, 687,  718,  749,  780,  811,
+  842,  873,  904,  935,  966,  997,  1028, 1059, 1090, 1121, 1152, 1183, 1214,
+  1245, 1276, 1307, 1338, 1369, 1400, 1431, 1462, 1493, 1524, 1555, 1585, 1614,
+  1642, 719,  750,  781,  812,  843,  874,  905,  936,  967,  998,  1029, 1060,
+  1091, 1122, 1153, 1184, 1215, 1246, 1277, 1308, 1339, 1370, 1401, 1432, 1463,
+  1494, 1525, 1556, 1586, 1615, 1643, 1670, 751,  782,  813,  844,  875,  906,
+  937,  968,  999,  1030, 1061, 1092, 1123, 1154, 1185, 1216, 1247, 1278, 1309,
+  1340, 1371, 1402, 1433, 1464, 1495, 1526, 1557, 1587, 1616, 1644, 1671, 1697,
+  783,  814,  845,  876,  907,  938,  969,  1000, 1031, 1062, 1093, 1124, 1155,
+  1186, 1217, 1248, 1279, 1310, 1341, 1372, 1403, 1434, 1465, 1496, 1527, 1558,
+  1588, 1617, 1645, 1672, 1698, 1723, 815,  846,  877,  908,  939,  970,  1001,
+  1032, 1063, 1094, 1125, 1156, 1187, 1218, 1249, 1280, 1311, 1342, 1373, 1404,
+  1435, 1466, 1497, 1528, 1559, 1589, 1618, 1646, 1673, 1699, 1724, 1748, 847,
+  878,  909,  940,  971,  1002, 1033, 1064, 1095, 1126, 1157, 1188, 1219, 1250,
+  1281, 1312, 1343, 1374, 1405, 1436, 1467, 1498, 1529, 1560, 1590, 1619, 1647,
+  1674, 1700, 1725, 1749, 1772, 879,  910,  941,  972,  1003, 1034, 1065, 1096,
+  1127, 1158, 1189, 1220, 1251, 1282, 1313, 1344, 1375, 1406, 1437, 1468, 1499,
+  1530, 1561, 1591, 1620, 1648, 1675, 1701, 1726, 1750, 1773, 1795, 911,  942,
+  973,  1004, 1035, 1066, 1097, 1128, 1159, 1190, 1221, 1252, 1283, 1314, 1345,
+  1376, 1407, 1438, 1469, 1500, 1531, 1562, 1592, 1621, 1649, 1676, 1702, 1727,
+  1751, 1774, 1796, 1817, 943,  974,  1005, 1036, 1067, 1098, 1129, 1160, 1191,
+  1222, 1253, 1284, 1315, 1346, 1377, 1408, 1439, 1470, 1501, 1532, 1563, 1593,
+  1622, 1650, 1677, 1703, 1728, 1752, 1775, 1797, 1818, 1838, 975,  1006, 1037,
+  1068, 1099, 1130, 1161, 1192, 1223, 1254, 1285, 1316, 1347, 1378, 1409, 1440,
+  1471, 1502, 1533, 1564, 1594, 1623, 1651, 1678, 1704, 1729, 1753, 1776, 1798,
+  1819, 1839, 1858, 1007, 1038, 1069, 1100, 1131, 1162, 1193, 1224, 1255, 1286,
+  1317, 1348, 1379, 1410, 1441, 1472, 1503, 1534, 1565, 1595, 1624, 1652, 1679,
+  1705, 1730, 1754, 1777, 1799, 1820, 1840, 1859, 1877, 1039, 1070, 1101, 1132,
+  1163, 1194, 1225, 1256, 1287, 1318, 1349, 1380, 1411, 1442, 1473, 1504, 1535,
+  1566, 1596, 1625, 1653, 1680, 1706, 1731, 1755, 1778, 1800, 1821, 1841, 1860,
+  1878, 1895, 1071, 1102, 1133, 1164, 1195, 1226, 1257, 1288, 1319, 1350, 1381,
+  1412, 1443, 1474, 1505, 1536, 1567, 1597, 1626, 1654, 1681, 1707, 1732, 1756,
+  1779, 1801, 1822, 1842, 1861, 1879, 1896, 1912, 1103, 1134, 1165, 1196, 1227,
+  1258, 1289, 1320, 1351, 1382, 1413, 1444, 1475, 1506, 1537, 1568, 1598, 1627,
+  1655, 1682, 1708, 1733, 1757, 1780, 1802, 1823, 1843, 1862, 1880, 1897, 1913,
+  1928, 1135, 1166, 1197, 1228, 1259, 1290, 1321, 1352, 1383, 1414, 1445, 1476,
+  1507, 1538, 1569, 1599, 1628, 1656, 1683, 1709, 1734, 1758, 1781, 1803, 1824,
+  1844, 1863, 1881, 1898, 1914, 1929, 1943, 1167, 1198, 1229, 1260, 1291, 1322,
+  1353, 1384, 1415, 1446, 1477, 1508, 1539, 1570, 1600, 1629, 1657, 1684, 1710,
+  1735, 1759, 1782, 1804, 1825, 1845, 1864, 1882, 1899, 1915, 1930, 1944, 1957,
+  1199, 1230, 1261, 1292, 1323, 1354, 1385, 1416, 1447, 1478, 1509, 1540, 1571,
+  1601, 1630, 1658, 1685, 1711, 1736, 1760, 1783, 1805, 1826, 1846, 1865, 1883,
+  1900, 1916, 1931, 1945, 1958, 1970, 1231, 1262, 1293, 1324, 1355, 1386, 1417,
+  1448, 1479, 1510, 1541, 1572, 1602, 1631, 1659, 1686, 1712, 1737, 1761, 1784,
+  1806, 1827, 1847, 1866, 1884, 1901, 1917, 1932, 1946, 1959, 1971, 1982, 1263,
+  1294, 1325, 1356, 1387, 1418, 1449, 1480, 1511, 1542, 1573, 1603, 1632, 1660,
+  1687, 1713, 1738, 1762, 1785, 1807, 1828, 1848, 1867, 1885, 1902, 1918, 1933,
+  1947, 1960, 1972, 1983, 1993, 1295, 1326, 1357, 1388, 1419, 1450, 1481, 1512,
+  1543, 1574, 1604, 1633, 1661, 1688, 1714, 1739, 1763, 1786, 1808, 1829, 1849,
+  1868, 1886, 1903, 1919, 1934, 1948, 1961, 1973, 1984, 1994, 2003, 1327, 1358,
+  1389, 1420, 1451, 1482, 1513, 1544, 1575, 1605, 1634, 1662, 1689, 1715, 1740,
+  1764, 1787, 1809, 1830, 1850, 1869, 1887, 1904, 1920, 1935, 1949, 1962, 1974,
+  1985, 1995, 2004, 2012, 1359, 1390, 1421, 1452, 1483, 1514, 1545, 1576, 1606,
+  1635, 1663, 1690, 1716, 1741, 1765, 1788, 1810, 1831, 1851, 1870, 1888, 1905,
+  1921, 1936, 1950, 1963, 1975, 1986, 1996, 2005, 2013, 2020, 1391, 1422, 1453,
+  1484, 1515, 1546, 1577, 1607, 1636, 1664, 1691, 1717, 1742, 1766, 1789, 1811,
+  1832, 1852, 1871, 1889, 1906, 1922, 1937, 1951, 1964, 1976, 1987, 1997, 2006,
+  2014, 2021, 2027, 1423, 1454, 1485, 1516, 1547, 1578, 1608, 1637, 1665, 1692,
+  1718, 1743, 1767, 1790, 1812, 1833, 1853, 1872, 1890, 1907, 1923, 1938, 1952,
+  1965, 1977, 1988, 1998, 2007, 2015, 2022, 2028, 2033, 1455, 1486, 1517, 1548,
+  1579, 1609, 1638, 1666, 1693, 1719, 1744, 1768, 1791, 1813, 1834, 1854, 1873,
+  1891, 1908, 1924, 1939, 1953, 1966, 1978, 1989, 1999, 2008, 2016, 2023, 2029,
+  2034, 2038, 1487, 1518, 1549, 1580, 1610, 1639, 1667, 1694, 1720, 1745, 1769,
+  1792, 1814, 1835, 1855, 1874, 1892, 1909, 1925, 1940, 1954, 1967, 1979, 1990,
+  2000, 2009, 2017, 2024, 2030, 2035, 2039, 2042, 1519, 1550, 1581, 1611, 1640,
+  1668, 1695, 1721, 1746, 1770, 1793, 1815, 1836, 1856, 1875, 1893, 1910, 1926,
+  1941, 1955, 1968, 1980, 1991, 2001, 2010, 2018, 2025, 2031, 2036, 2040, 2043,
+  2045, 1551, 1582, 1612, 1641, 1669, 1696, 1722, 1747, 1771, 1794, 1816, 1837,
+  1857, 1876, 1894, 1911, 1927, 1942, 1956, 1969, 1981, 1992, 2002, 2011, 2019,
+  2026, 2032, 2037, 2041, 2044, 2046, 2047,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_64x32[2048]) = {
+  0,    1,    3,    6,    10,   15,   21,   28,   36,   45,   55,   66,   78,
+  91,   105,  120,  136,  153,  171,  190,  210,  231,  253,  276,  300,  325,
+  351,  378,  406,  435,  465,  496,  528,  560,  592,  624,  656,  688,  720,
+  752,  784,  816,  848,  880,  912,  944,  976,  1008, 1040, 1072, 1104, 1136,
+  1168, 1200, 1232, 1264, 1296, 1328, 1360, 1392, 1424, 1456, 1488, 1520, 2,
+  4,    7,    11,   16,   22,   29,   37,   46,   56,   67,   79,   92,   106,
+  121,  137,  154,  172,  191,  211,  232,  254,  277,  301,  326,  352,  379,
+  407,  436,  466,  497,  529,  561,  593,  625,  657,  689,  721,  753,  785,
+  817,  849,  881,  913,  945,  977,  1009, 1041, 1073, 1105, 1137, 1169, 1201,
+  1233, 1265, 1297, 1329, 1361, 1393, 1425, 1457, 1489, 1521, 1552, 5,    8,
+  12,   17,   23,   30,   38,   47,   57,   68,   80,   93,   107,  122,  138,
+  155,  173,  192,  212,  233,  255,  278,  302,  327,  353,  380,  408,  437,
+  467,  498,  530,  562,  594,  626,  658,  690,  722,  754,  786,  818,  850,
+  882,  914,  946,  978,  1010, 1042, 1074, 1106, 1138, 1170, 1202, 1234, 1266,
+  1298, 1330, 1362, 1394, 1426, 1458, 1490, 1522, 1553, 1583, 9,    13,   18,
+  24,   31,   39,   48,   58,   69,   81,   94,   108,  123,  139,  156,  174,
+  193,  213,  234,  256,  279,  303,  328,  354,  381,  409,  438,  468,  499,
+  531,  563,  595,  627,  659,  691,  723,  755,  787,  819,  851,  883,  915,
+  947,  979,  1011, 1043, 1075, 1107, 1139, 1171, 1203, 1235, 1267, 1299, 1331,
+  1363, 1395, 1427, 1459, 1491, 1523, 1554, 1584, 1613, 14,   19,   25,   32,
+  40,   49,   59,   70,   82,   95,   109,  124,  140,  157,  175,  194,  214,
+  235,  257,  280,  304,  329,  355,  382,  410,  439,  469,  500,  532,  564,
+  596,  628,  660,  692,  724,  756,  788,  820,  852,  884,  916,  948,  980,
+  1012, 1044, 1076, 1108, 1140, 1172, 1204, 1236, 1268, 1300, 1332, 1364, 1396,
+  1428, 1460, 1492, 1524, 1555, 1585, 1614, 1642, 20,   26,   33,   41,   50,
+  60,   71,   83,   96,   110,  125,  141,  158,  176,  195,  215,  236,  258,
+  281,  305,  330,  356,  383,  411,  440,  470,  501,  533,  565,  597,  629,
+  661,  693,  725,  757,  789,  821,  853,  885,  917,  949,  981,  1013, 1045,
+  1077, 1109, 1141, 1173, 1205, 1237, 1269, 1301, 1333, 1365, 1397, 1429, 1461,
+  1493, 1525, 1556, 1586, 1615, 1643, 1670, 27,   34,   42,   51,   61,   72,
+  84,   97,   111,  126,  142,  159,  177,  196,  216,  237,  259,  282,  306,
+  331,  357,  384,  412,  441,  471,  502,  534,  566,  598,  630,  662,  694,
+  726,  758,  790,  822,  854,  886,  918,  950,  982,  1014, 1046, 1078, 1110,
+  1142, 1174, 1206, 1238, 1270, 1302, 1334, 1366, 1398, 1430, 1462, 1494, 1526,
+  1557, 1587, 1616, 1644, 1671, 1697, 35,   43,   52,   62,   73,   85,   98,
+  112,  127,  143,  160,  178,  197,  217,  238,  260,  283,  307,  332,  358,
+  385,  413,  442,  472,  503,  535,  567,  599,  631,  663,  695,  727,  759,
+  791,  823,  855,  887,  919,  951,  983,  1015, 1047, 1079, 1111, 1143, 1175,
+  1207, 1239, 1271, 1303, 1335, 1367, 1399, 1431, 1463, 1495, 1527, 1558, 1588,
+  1617, 1645, 1672, 1698, 1723, 44,   53,   63,   74,   86,   99,   113,  128,
+  144,  161,  179,  198,  218,  239,  261,  284,  308,  333,  359,  386,  414,
+  443,  473,  504,  536,  568,  600,  632,  664,  696,  728,  760,  792,  824,
+  856,  888,  920,  952,  984,  1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240,
+  1272, 1304, 1336, 1368, 1400, 1432, 1464, 1496, 1528, 1559, 1589, 1618, 1646,
+  1673, 1699, 1724, 1748, 54,   64,   75,   87,   100,  114,  129,  145,  162,
+  180,  199,  219,  240,  262,  285,  309,  334,  360,  387,  415,  444,  474,
+  505,  537,  569,  601,  633,  665,  697,  729,  761,  793,  825,  857,  889,
+  921,  953,  985,  1017, 1049, 1081, 1113, 1145, 1177, 1209, 1241, 1273, 1305,
+  1337, 1369, 1401, 1433, 1465, 1497, 1529, 1560, 1590, 1619, 1647, 1674, 1700,
+  1725, 1749, 1772, 65,   76,   88,   101,  115,  130,  146,  163,  181,  200,
+  220,  241,  263,  286,  310,  335,  361,  388,  416,  445,  475,  506,  538,
+  570,  602,  634,  666,  698,  730,  762,  794,  826,  858,  890,  922,  954,
+  986,  1018, 1050, 1082, 1114, 1146, 1178, 1210, 1242, 1274, 1306, 1338, 1370,
+  1402, 1434, 1466, 1498, 1530, 1561, 1591, 1620, 1648, 1675, 1701, 1726, 1750,
+  1773, 1795, 77,   89,   102,  116,  131,  147,  164,  182,  201,  221,  242,
+  264,  287,  311,  336,  362,  389,  417,  446,  476,  507,  539,  571,  603,
+  635,  667,  699,  731,  763,  795,  827,  859,  891,  923,  955,  987,  1019,
+  1051, 1083, 1115, 1147, 1179, 1211, 1243, 1275, 1307, 1339, 1371, 1403, 1435,
+  1467, 1499, 1531, 1562, 1592, 1621, 1649, 1676, 1702, 1727, 1751, 1774, 1796,
+  1817, 90,   103,  117,  132,  148,  165,  183,  202,  222,  243,  265,  288,
+  312,  337,  363,  390,  418,  447,  477,  508,  540,  572,  604,  636,  668,
+  700,  732,  764,  796,  828,  860,  892,  924,  956,  988,  1020, 1052, 1084,
+  1116, 1148, 1180, 1212, 1244, 1276, 1308, 1340, 1372, 1404, 1436, 1468, 1500,
+  1532, 1563, 1593, 1622, 1650, 1677, 1703, 1728, 1752, 1775, 1797, 1818, 1838,
+  104,  118,  133,  149,  166,  184,  203,  223,  244,  266,  289,  313,  338,
+  364,  391,  419,  448,  478,  509,  541,  573,  605,  637,  669,  701,  733,
+  765,  797,  829,  861,  893,  925,  957,  989,  1021, 1053, 1085, 1117, 1149,
+  1181, 1213, 1245, 1277, 1309, 1341, 1373, 1405, 1437, 1469, 1501, 1533, 1564,
+  1594, 1623, 1651, 1678, 1704, 1729, 1753, 1776, 1798, 1819, 1839, 1858, 119,
+  134,  150,  167,  185,  204,  224,  245,  267,  290,  314,  339,  365,  392,
+  420,  449,  479,  510,  542,  574,  606,  638,  670,  702,  734,  766,  798,
+  830,  862,  894,  926,  958,  990,  1022, 1054, 1086, 1118, 1150, 1182, 1214,
+  1246, 1278, 1310, 1342, 1374, 1406, 1438, 1470, 1502, 1534, 1565, 1595, 1624,
+  1652, 1679, 1705, 1730, 1754, 1777, 1799, 1820, 1840, 1859, 1877, 135,  151,
+  168,  186,  205,  225,  246,  268,  291,  315,  340,  366,  393,  421,  450,
+  480,  511,  543,  575,  607,  639,  671,  703,  735,  767,  799,  831,  863,
+  895,  927,  959,  991,  1023, 1055, 1087, 1119, 1151, 1183, 1215, 1247, 1279,
+  1311, 1343, 1375, 1407, 1439, 1471, 1503, 1535, 1566, 1596, 1625, 1653, 1680,
+  1706, 1731, 1755, 1778, 1800, 1821, 1841, 1860, 1878, 1895, 152,  169,  187,
+  206,  226,  247,  269,  292,  316,  341,  367,  394,  422,  451,  481,  512,
+  544,  576,  608,  640,  672,  704,  736,  768,  800,  832,  864,  896,  928,
+  960,  992,  1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280, 1312, 1344,
+  1376, 1408, 1440, 1472, 1504, 1536, 1567, 1597, 1626, 1654, 1681, 1707, 1732,
+  1756, 1779, 1801, 1822, 1842, 1861, 1879, 1896, 1912, 170,  188,  207,  227,
+  248,  270,  293,  317,  342,  368,  395,  423,  452,  482,  513,  545,  577,
+  609,  641,  673,  705,  737,  769,  801,  833,  865,  897,  929,  961,  993,
+  1025, 1057, 1089, 1121, 1153, 1185, 1217, 1249, 1281, 1313, 1345, 1377, 1409,
+  1441, 1473, 1505, 1537, 1568, 1598, 1627, 1655, 1682, 1708, 1733, 1757, 1780,
+  1802, 1823, 1843, 1862, 1880, 1897, 1913, 1928, 189,  208,  228,  249,  271,
+  294,  318,  343,  369,  396,  424,  453,  483,  514,  546,  578,  610,  642,
+  674,  706,  738,  770,  802,  834,  866,  898,  930,  962,  994,  1026, 1058,
+  1090, 1122, 1154, 1186, 1218, 1250, 1282, 1314, 1346, 1378, 1410, 1442, 1474,
+  1506, 1538, 1569, 1599, 1628, 1656, 1683, 1709, 1734, 1758, 1781, 1803, 1824,
+  1844, 1863, 1881, 1898, 1914, 1929, 1943, 209,  229,  250,  272,  295,  319,
+  344,  370,  397,  425,  454,  484,  515,  547,  579,  611,  643,  675,  707,
+  739,  771,  803,  835,  867,  899,  931,  963,  995,  1027, 1059, 1091, 1123,
+  1155, 1187, 1219, 1251, 1283, 1315, 1347, 1379, 1411, 1443, 1475, 1507, 1539,
+  1570, 1600, 1629, 1657, 1684, 1710, 1735, 1759, 1782, 1804, 1825, 1845, 1864,
+  1882, 1899, 1915, 1930, 1944, 1957, 230,  251,  273,  296,  320,  345,  371,
+  398,  426,  455,  485,  516,  548,  580,  612,  644,  676,  708,  740,  772,
+  804,  836,  868,  900,  932,  964,  996,  1028, 1060, 1092, 1124, 1156, 1188,
+  1220, 1252, 1284, 1316, 1348, 1380, 1412, 1444, 1476, 1508, 1540, 1571, 1601,
+  1630, 1658, 1685, 1711, 1736, 1760, 1783, 1805, 1826, 1846, 1865, 1883, 1900,
+  1916, 1931, 1945, 1958, 1970, 252,  274,  297,  321,  346,  372,  399,  427,
+  456,  486,  517,  549,  581,  613,  645,  677,  709,  741,  773,  805,  837,
+  869,  901,  933,  965,  997,  1029, 1061, 1093, 1125, 1157, 1189, 1221, 1253,
+  1285, 1317, 1349, 1381, 1413, 1445, 1477, 1509, 1541, 1572, 1602, 1631, 1659,
+  1686, 1712, 1737, 1761, 1784, 1806, 1827, 1847, 1866, 1884, 1901, 1917, 1932,
+  1946, 1959, 1971, 1982, 275,  298,  322,  347,  373,  400,  428,  457,  487,
+  518,  550,  582,  614,  646,  678,  710,  742,  774,  806,  838,  870,  902,
+  934,  966,  998,  1030, 1062, 1094, 1126, 1158, 1190, 1222, 1254, 1286, 1318,
+  1350, 1382, 1414, 1446, 1478, 1510, 1542, 1573, 1603, 1632, 1660, 1687, 1713,
+  1738, 1762, 1785, 1807, 1828, 1848, 1867, 1885, 1902, 1918, 1933, 1947, 1960,
+  1972, 1983, 1993, 299,  323,  348,  374,  401,  429,  458,  488,  519,  551,
+  583,  615,  647,  679,  711,  743,  775,  807,  839,  871,  903,  935,  967,
+  999,  1031, 1063, 1095, 1127, 1159, 1191, 1223, 1255, 1287, 1319, 1351, 1383,
+  1415, 1447, 1479, 1511, 1543, 1574, 1604, 1633, 1661, 1688, 1714, 1739, 1763,
+  1786, 1808, 1829, 1849, 1868, 1886, 1903, 1919, 1934, 1948, 1961, 1973, 1984,
+  1994, 2003, 324,  349,  375,  402,  430,  459,  489,  520,  552,  584,  616,
+  648,  680,  712,  744,  776,  808,  840,  872,  904,  936,  968,  1000, 1032,
+  1064, 1096, 1128, 1160, 1192, 1224, 1256, 1288, 1320, 1352, 1384, 1416, 1448,
+  1480, 1512, 1544, 1575, 1605, 1634, 1662, 1689, 1715, 1740, 1764, 1787, 1809,
+  1830, 1850, 1869, 1887, 1904, 1920, 1935, 1949, 1962, 1974, 1985, 1995, 2004,
+  2012, 350,  376,  403,  431,  460,  490,  521,  553,  585,  617,  649,  681,
+  713,  745,  777,  809,  841,  873,  905,  937,  969,  1001, 1033, 1065, 1097,
+  1129, 1161, 1193, 1225, 1257, 1289, 1321, 1353, 1385, 1417, 1449, 1481, 1513,
+  1545, 1576, 1606, 1635, 1663, 1690, 1716, 1741, 1765, 1788, 1810, 1831, 1851,
+  1870, 1888, 1905, 1921, 1936, 1950, 1963, 1975, 1986, 1996, 2005, 2013, 2020,
+  377,  404,  432,  461,  491,  522,  554,  586,  618,  650,  682,  714,  746,
+  778,  810,  842,  874,  906,  938,  970,  1002, 1034, 1066, 1098, 1130, 1162,
+  1194, 1226, 1258, 1290, 1322, 1354, 1386, 1418, 1450, 1482, 1514, 1546, 1577,
+  1607, 1636, 1664, 1691, 1717, 1742, 1766, 1789, 1811, 1832, 1852, 1871, 1889,
+  1906, 1922, 1937, 1951, 1964, 1976, 1987, 1997, 2006, 2014, 2021, 2027, 405,
+  433,  462,  492,  523,  555,  587,  619,  651,  683,  715,  747,  779,  811,
+  843,  875,  907,  939,  971,  1003, 1035, 1067, 1099, 1131, 1163, 1195, 1227,
+  1259, 1291, 1323, 1355, 1387, 1419, 1451, 1483, 1515, 1547, 1578, 1608, 1637,
+  1665, 1692, 1718, 1743, 1767, 1790, 1812, 1833, 1853, 1872, 1890, 1907, 1923,
+  1938, 1952, 1965, 1977, 1988, 1998, 2007, 2015, 2022, 2028, 2033, 434,  463,
+  493,  524,  556,  588,  620,  652,  684,  716,  748,  780,  812,  844,  876,
+  908,  940,  972,  1004, 1036, 1068, 1100, 1132, 1164, 1196, 1228, 1260, 1292,
+  1324, 1356, 1388, 1420, 1452, 1484, 1516, 1548, 1579, 1609, 1638, 1666, 1693,
+  1719, 1744, 1768, 1791, 1813, 1834, 1854, 1873, 1891, 1908, 1924, 1939, 1953,
+  1966, 1978, 1989, 1999, 2008, 2016, 2023, 2029, 2034, 2038, 464,  494,  525,
+  557,  589,  621,  653,  685,  717,  749,  781,  813,  845,  877,  909,  941,
+  973,  1005, 1037, 1069, 1101, 1133, 1165, 1197, 1229, 1261, 1293, 1325, 1357,
+  1389, 1421, 1453, 1485, 1517, 1549, 1580, 1610, 1639, 1667, 1694, 1720, 1745,
+  1769, 1792, 1814, 1835, 1855, 1874, 1892, 1909, 1925, 1940, 1954, 1967, 1979,
+  1990, 2000, 2009, 2017, 2024, 2030, 2035, 2039, 2042, 495,  526,  558,  590,
+  622,  654,  686,  718,  750,  782,  814,  846,  878,  910,  942,  974,  1006,
+  1038, 1070, 1102, 1134, 1166, 1198, 1230, 1262, 1294, 1326, 1358, 1390, 1422,
+  1454, 1486, 1518, 1550, 1581, 1611, 1640, 1668, 1695, 1721, 1746, 1770, 1793,
+  1815, 1836, 1856, 1875, 1893, 1910, 1926, 1941, 1955, 1968, 1980, 1991, 2001,
+  2010, 2018, 2025, 2031, 2036, 2040, 2043, 2045, 527,  559,  591,  623,  655,
+  687,  719,  751,  783,  815,  847,  879,  911,  943,  975,  1007, 1039, 1071,
+  1103, 1135, 1167, 1199, 1231, 1263, 1295, 1327, 1359, 1391, 1423, 1455, 1487,
+  1519, 1551, 1582, 1612, 1641, 1669, 1696, 1722, 1747, 1771, 1794, 1816, 1837,
+  1857, 1876, 1894, 1911, 1927, 1942, 1956, 1969, 1981, 1992, 2002, 2011, 2019,
+  2026, 2032, 2037, 2041, 2044, 2046, 2047,
+};
+
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_64x64[4096]) = {
   0,    1,    4,    9,    15,   22,   33,   43,   56,   71,   86,   104,  121,
   142,  166,  189,  214,  239,  269,  300,  331,  363,  400,  435,  471,  510,
@@ -6040,6 +7324,82 @@ const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
       { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
 #endif  // CONFIG_EXT_TX
   },
+#if CONFIG_TX64X64
+  {
+      // TX_32X64
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+#if CONFIG_EXT_TX
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+#endif  // CONFIG_EXT_TX
+  },
+  {
+      // TX_64X32
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+#if CONFIG_EXT_TX
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+#endif  // CONFIG_EXT_TX
+  }
+#endif  // CONFIG_TX64X64
 };
 
 const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
@@ -6361,6 +7721,82 @@ const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
       { mcol_scan_32x16, av1_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
 #endif  // CONFIG_EXT_TX
   },
+#if CONFIG_TX64X64
+  {
+      // TX_32X64
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+#if CONFIG_EXT_TX
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+      { default_scan_32x64, av1_default_iscan_32x64,
+        default_scan_32x64_neighbors },
+#endif  // CONFIG_EXT_TX
+  },
+  {
+      // TX_64X32
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+#if CONFIG_EXT_TX
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+      { default_scan_64x32, av1_default_iscan_64x32,
+        default_scan_64x32_neighbors },
+#endif  // CONFIG_EXT_TX
+  },
+#endif  // CONFIG_TX64X64
   {
       // TX_4X16
       { default_scan_4x16, av1_default_iscan_4x16,
@@ -6604,6 +8040,151 @@ static INLINE int clamp_64(int64_t value, int low, int high) {
   return value < low ? low : (value > high ? high : (int)value);
 }
 
+#if USE_2X2_PROB
+static int do_down_sample(TX_SIZE tx_size) {
+  const int tx_w = tx_size_wide[tx_size];
+  const int tx_h = tx_size_high[tx_size];
+  if (tx_w > 8 || tx_h > 8) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
+                                const uint32_t *non_zero_count,
+                                TX_SIZE tx_size) {
+  const int tx_w = tx_size_wide[tx_size];
+  const int tx_h = tx_size_high[tx_size];
+  if (tx_w > 8 && tx_h > 8) {
+    const int tx_w_ds = tx_w >> 1;
+    const int tx_h_ds = tx_h >> 1;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds << 1;
+        const int c = c_ds << 1;
+        const int ci = r * tx_w + c;
+        non_zero_count_ds[ci_ds] = non_zero_count[ci];
+      }
+    }
+  } else if (tx_w > 8 && tx_h <= 8) {
+    const int tx_w_ds = tx_w >> 1;
+    const int tx_h_ds = tx_h;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds;
+        const int c = c_ds << 1;
+        const int ci = r * tx_w + c;
+        non_zero_count_ds[ci_ds] = non_zero_count[ci];
+      }
+    }
+  } else if (tx_w <= 8 && tx_h > 8) {
+    const int tx_w_ds = tx_w;
+    const int tx_h_ds = tx_h >> 1;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds << 1;
+        const int c = c_ds;
+        const int ci = r * tx_w + c;
+        non_zero_count_ds[ci_ds] = non_zero_count[ci];
+      }
+    }
+  } else {
+    assert(0);
+  }
+}
+
+void av1_up_sample_scan_count(uint32_t *non_zero_count,
+                              const uint32_t *non_zero_count_ds,
+                              TX_SIZE tx_size, unsigned int block_num) {
+  const int tx_w = tx_size_wide[tx_size];
+  const int tx_h = tx_size_high[tx_size];
+  if (tx_w > 8 && tx_h > 8) {
+    const int tx_w_ds = tx_w >> 1;
+    const int tx_h_ds = tx_h >> 1;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds << 1;
+        const int c = c_ds << 1;
+        const int ci = r * tx_w + c;
+        non_zero_count[ci] = non_zero_count_ds[ci_ds];
+        if (c_ds + 1 < tx_w_ds) {
+          uint32_t count =
+              non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + 1];
+          count = ROUND_POWER_OF_TWO(count, 1);
+          count = clamp32u(count, 0, block_num);
+          non_zero_count[ci + 1] = count;
+        } else {
+          non_zero_count[ci + 1] = non_zero_count_ds[ci_ds];
+        }
+      }
+    }
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c = 0; c < tx_w; ++c) {
+        const int r = r_ds << 1;
+        const int ci = r * tx_w + c;
+        if (r + 2 < tx_h) {
+          uint32_t count = non_zero_count[ci] + non_zero_count[ci + 2 * tx_w];
+          count = ROUND_POWER_OF_TWO(count, 1);
+          count = clamp32u(count, 0, block_num);
+          non_zero_count[ci + tx_w] = count;
+        } else {
+          non_zero_count[ci + tx_w] = non_zero_count[ci];
+        }
+      }
+    }
+  } else if (tx_w > 8 && tx_h <= 8) {
+    const int tx_w_ds = tx_w >> 1;
+    const int tx_h_ds = tx_h;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds;
+        const int c = c_ds << 1;
+        const int ci = r * tx_w + c;
+        non_zero_count[ci] = non_zero_count_ds[ci_ds];
+        if (c_ds + 1 < tx_w_ds) {
+          uint32_t count =
+              non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + 1];
+          count = ROUND_POWER_OF_TWO(count, 1);
+          count = clamp32u(count, 0, block_num);
+          non_zero_count[ci + 1] = count;
+        } else {
+          non_zero_count[ci + 1] = non_zero_count_ds[ci_ds];
+        }
+      }
+    }
+  } else if (tx_w <= 8 && tx_h > 8) {
+    const int tx_w_ds = tx_w;
+    const int tx_h_ds = tx_h >> 1;
+    for (int r_ds = 0; r_ds < tx_h_ds; ++r_ds) {
+      for (int c_ds = 0; c_ds < tx_w_ds; ++c_ds) {
+        const int ci_ds = r_ds * tx_w_ds + c_ds;
+        const int r = r_ds << 1;
+        const int c = c_ds;
+        const int ci = r * tx_w + c;
+        non_zero_count[ci] = non_zero_count_ds[ci_ds];
+        if (r_ds + 1 < tx_h_ds) {
+          uint32_t count =
+              non_zero_count_ds[ci_ds] + non_zero_count_ds[ci_ds + tx_w_ds];
+          count = ROUND_POWER_OF_TWO(count, 1);
+          count = clamp32u(count, 0, block_num);
+          non_zero_count[ci + tx_w] = count;
+        } else {
+          non_zero_count[ci + tx_w] = non_zero_count_ds[ci_ds];
+        }
+      }
+    }
+  } else {
+    assert(0);
+  }
+}
+#endif
+
 static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
                              int rate) {
   FRAME_CONTEXT *pre_fc = cm->pre_fc;
@@ -6612,12 +8193,27 @@ static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
   uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type);
   const int tx2d_size = tx_size_2d[tx_size];
   unsigned int block_num = cm->counts.txb_count[tx_size][tx_type];
+#if USE_2X2_PROB
+#if CONFIG_TX64X64
+  DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[1024]);
+  assert((tx2d_size >> 2) <= 1024);
+#else   // CONFIG_TX64X64
+  DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[256]);
+  assert((tx2d_size >> 2) <= 256);
+#endif  // CONFIG_TX64X64
+  if (do_down_sample(tx_size)) {
+    av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, tx_size);
+    av1_up_sample_scan_count(non_zero_count, non_zero_count_ds, tx_size,
+                             block_num);
+  }
+#endif
   int i;
+  const int inv_precision = 30;
+  int32_t inv_block_num = block_num == 0 ? 0 : (1 << inv_precision) / block_num;
   for (i = 0; i < tx2d_size; i++) {
     int64_t curr_prob =
-        block_num == 0
-            ? 0
-            : (non_zero_count[i] << ADAPT_SCAN_PROB_PRECISION) / block_num;
+        block_num == 0 ? 0 : ((non_zero_count[i] * inv_block_num) >>
+                              (inv_precision - ADAPT_SCAN_PROB_PRECISION));
     int64_t prev_prob = prev_non_zero_prob[i];
     int64_t pred_prob =
         (curr_prob * rate +
@@ -6643,10 +8239,12 @@ static void update_scan_count(int16_t *scan, int max_scan,
 void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
                                   TX_SIZE tx_size, TX_TYPE tx_type,
                                   const tran_low_t *dqcoeffs, int max_scan) {
-  int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
-  uint32_t *non_zero_count = get_non_zero_counts(counts, tx_size, tx_type);
-  update_scan_count(scan, max_scan, dqcoeffs, non_zero_count);
-  ++counts->txb_count[tx_size][tx_type];
+  if (cm->use_adapt_scan && do_adapt_scan(tx_size, tx_type)) {
+    int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
+    uint32_t *non_zero_count = get_non_zero_counts(counts, tx_size, tx_type);
+    update_scan_count(scan, max_scan, dqcoeffs, non_zero_count);
+    ++counts->txb_count[tx_size][tx_type];
+  }
 }
 
 static int cmp_prob(const void *a, const void *b) {
@@ -6670,24 +8268,7 @@ void av1_augment_prob(TX_SIZE tx_size, TX_TYPE tx_type, uint32_t *prob) {
   }
 }
 
-// topological sort
-static void dfs_scan(int tx1d_size, int *scan_idx, int coeff_idx, int16_t *scan,
-                     int16_t *iscan) {
-  const int r = coeff_idx / tx1d_size;
-  const int c = coeff_idx % tx1d_size;
-
-  if (iscan[coeff_idx] != -1) return;
-
-  if (r > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - tx1d_size, scan, iscan);
-
-  if (c > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - 1, scan, iscan);
-
-  scan[*scan_idx] = coeff_idx;
-  iscan[coeff_idx] = *scan_idx;
-  ++(*scan_idx);
-}
-
-void av1_update_neighbors(int tx_size, const int16_t *scan,
+void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan,
                           const int16_t *iscan, int16_t *neighbors) {
   const int tx1d_wide = tx_size_wide[tx_size];
   const int tx1d_high = tx_size_high[tx_size];
@@ -6731,6 +8312,74 @@ void av1_update_neighbors(int tx_size, const int16_t *scan,
   neighbors[tx2d_size * MAX_NEIGHBORS + 1] = scan[0];
 }
 
+#if USE_LIMIT_SCAN_DISTANCE
+typedef struct SCAN_NB_QUEUE {
+  int nb_ci_queue[COEFF_IDX_SIZE + 1];
+  int pr_si_queue[COEFF_IDX_SIZE + 1];
+  int size;
+  int start;
+  int end;
+} SCAN_NB_QUEUE;
+
+static void assign_scan_idx(int16_t coeff_idx, int16_t *scan_idx, int tx_width,
+                            int tx_height, int16_t *scan, int16_t *iscan,
+                            int16_t *visit, SCAN_NB_QUEUE *queue) {
+  if (visit[coeff_idx] != 2) {
+    assert(*scan_idx < tx_width * tx_height);
+    scan[*scan_idx] = coeff_idx;
+    iscan[coeff_idx] = *scan_idx;
+    visit[coeff_idx] = 2;
+    int row = coeff_idx / tx_width;
+    int col = coeff_idx % tx_width;
+    int right_ci = coeff_idx + 1;
+    if (col + 1 < tx_width && visit[right_ci] == 0) {
+      visit[right_ci] = 1;
+      queue->pr_si_queue[queue->end] = *scan_idx;
+      queue->nb_ci_queue[queue->end] = right_ci;
+      queue->end = (queue->end + 1) % queue->size;
+    }
+    int down_ci = coeff_idx + tx_width;
+    if (row + 1 < tx_height && visit[down_ci] == 0) {
+      visit[down_ci] = 1;
+      queue->pr_si_queue[queue->end] = *scan_idx;
+      queue->nb_ci_queue[queue->end] = down_ci;
+      queue->end = (queue->end + 1) % queue->size;
+    }
+    ++(*scan_idx);
+  }
+}
+static void limit_nb_scan_distance(TX_SIZE tx_size, int16_t *scan,
+                                   int16_t *iscan) {
+  const int tx2d_size = tx_size_2d[tx_size];
+  int16_t visit[COEFF_IDX_SIZE] = { 0 };
+  int16_t org_scan[COEFF_IDX_SIZE];
+  memcpy(org_scan, scan, tx2d_size * sizeof(*scan));
+  const int tx_width = tx_size_wide[tx_size];
+  const int tx_height = tx_size_high[tx_size];
+  const int limit = 2 * AOMMAX(tx_width, tx_height);
+  SCAN_NB_QUEUE queue;
+  queue.size = tx2d_size;
+  queue.start = 0;
+  queue.end = 0;
+  int16_t new_si = 0;
+  for (int16_t si = 0; si < tx2d_size; ++si) {
+    while (queue.start != queue.end &&
+           queue.pr_si_queue[queue.start] + limit <= new_si) {
+      int nb_ci = queue.nb_ci_queue[queue.start];
+      assign_scan_idx(nb_ci, &new_si, tx_width, tx_height, scan, iscan, visit,
+                      &queue);
+      queue.start = (queue.start + 1) % queue.size;
+    }
+
+    int16_t ci = org_scan[si];
+    assign_scan_idx(ci, &new_si, tx_width, tx_height, scan, iscan, visit,
+                    &queue);
+  }
+  assert(new_si == tx2d_size);
+}
+#endif  // USE_LIMIT_SCAN_DISTANCE
+
+#if USE_TOPOLOGICAL_SORT
 void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
                            const uint32_t *non_zero_prob, int16_t *sort_order) {
   const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
@@ -6749,6 +8398,23 @@ void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
   }
 }
 
+// topological sort
+static void dfs_scan(int tx1d_size, int *scan_idx, int coeff_idx, int16_t *scan,
+                     int16_t *iscan) {
+  const int r = coeff_idx / tx1d_size;
+  const int c = coeff_idx % tx1d_size;
+
+  if (iscan[coeff_idx] != -1) return;
+
+  if (r > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - tx1d_size, scan, iscan);
+
+  if (c > 0) dfs_scan(tx1d_size, scan_idx, coeff_idx - 1, scan, iscan);
+
+  scan[*scan_idx] = coeff_idx;
+  iscan[coeff_idx] = *scan_idx;
+  ++(*scan_idx);
+}
+
 void av1_update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
                            int16_t *iscan) {
   int coeff_idx;
@@ -6767,17 +8433,66 @@ void av1_update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
     dfs_scan(tx1d_size, &scan_idx, coeff_idx, scan, iscan);
   }
 }
+#else
+
+static void filter_prob(TX_SIZE tx_size, uint32_t *prob) {
+  const int tx1d_wide = tx_size_wide[tx_size];
+  const int tx1d_high = tx_size_high[tx_size];
+  for (int r = tx1d_high - 1; r >= 0; --r) {
+    for (int c = tx1d_wide - 1; c >= 0; --c) {
+      int idx = r * tx1d_wide + c;
+      uint32_t v = prob[idx];
+      if (r > 0 && prob[idx - tx1d_wide] < v) prob[idx - tx1d_wide] = v;
+      if (c > 0 && prob[idx - 1] < v) prob[idx - 1] = v;
+    }
+  }
+}
+
+void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type,
+                           uint32_t *non_zero_prob, int16_t *scan,
+                           int16_t *iscan) {
+  const SCAN_ORDER *sc = get_default_scan(tx_size, tx_type, 0);
+  uint32_t temp[COEFF_IDX_SIZE];
+  const int tx2d_size = tx_size_2d[tx_size];
+  int scan_idx;
+  assert(tx2d_size <= COEFF_IDX_SIZE);
+  memcpy(temp, non_zero_prob, tx2d_size * sizeof(*non_zero_prob));
+  filter_prob(tx_size, temp);
+  av1_augment_prob(tx_size, tx_type, temp);
+  qsort(temp, tx2d_size, sizeof(*temp), cmp_prob);
+  for (scan_idx = 0; scan_idx < tx2d_size; ++scan_idx) {
+    const int default_scan_idx =
+        (temp[scan_idx] & COEFF_IDX_MASK) ^ COEFF_IDX_MASK;
+    const int coeff_idx = sc->scan[default_scan_idx];
+    scan[scan_idx] = coeff_idx;
+    iscan[coeff_idx] = scan_idx;
+  }
+}
+#endif
 
 static void update_scan_order_facade(AV1_COMMON *cm, TX_SIZE tx_size,
-                                     TX_TYPE tx_type) {
+                                     TX_TYPE tx_type, int use_curr_frame) {
+#if USE_TOPOLOGICAL_SORT
   int16_t sort_order[COEFF_IDX_SIZE];
-  uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
+#endif
+  uint32_t *non_zero_prob;
+  if (use_curr_frame)
+    non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
+  else
+    non_zero_prob = get_non_zero_prob(cm->pre_fc, tx_size, tx_type);
   int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
   int16_t *iscan = get_adapt_iscan(cm->fc, tx_size, tx_type);
   int16_t *nb = get_adapt_nb(cm->fc, tx_size, tx_type);
   assert(tx_size_2d[tx_size] <= COEFF_IDX_SIZE);
+#if USE_TOPOLOGICAL_SORT
   av1_update_sort_order(tx_size, tx_type, non_zero_prob, sort_order);
   av1_update_scan_order(tx_size, sort_order, scan, iscan);
+#else
+  av1_update_scan_order(tx_size, tx_type, non_zero_prob, scan, iscan);
+#endif
+#if USE_LIMIT_SCAN_DISTANCE
+  limit_nb_scan_distance(tx_size, scan, iscan);
+#endif  // USE_LIMIT_SCAN_DISTANCE
   av1_update_neighbors(tx_size, scan, iscan, nb);
 }
 
@@ -6819,36 +8534,48 @@ void av1_init_scan_order(AV1_COMMON *cm) {
     if (tx_size >= TX_SIZES) continue;
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
-      uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
-      const int tx2d_size = tx_size_2d[tx_size];
-      int i;
-      SCAN_ORDER *sc = &cm->fc->sc[tx_size][tx_type];
-      for (i = 0; i < tx2d_size; ++i) {
-        non_zero_prob[i] =
-            (1 << ADAPT_SCAN_PROB_PRECISION) / 2;  // init non_zero_prob to 0.5
+      if (do_adapt_scan(tx_size, tx_type)) {
+        uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
+        const int tx2d_size = tx_size_2d[tx_size];
+        int i;
+        SCAN_ORDER *sc = &cm->fc->sc[tx_size][tx_type];
+        for (i = 0; i < tx2d_size; ++i) {
+          non_zero_prob[i] = (1 << ADAPT_SCAN_PROB_PRECISION) /
+                             2;  // init non_zero_prob to 0.5
+        }
+        update_scan_order_facade(cm, tx_size, tx_type, 1);
+        sc->scan = get_adapt_scan(cm->fc, tx_size, tx_type);
+        sc->iscan = get_adapt_iscan(cm->fc, tx_size, tx_type);
+        sc->neighbors = get_adapt_nb(cm->fc, tx_size, tx_type);
+        update_eob_threshold(cm, tx_size, tx_type);
       }
-      update_scan_order_facade(cm, tx_size, tx_type);
-      sc->scan = get_adapt_scan(cm->fc, tx_size, tx_type);
-      sc->iscan = get_adapt_iscan(cm->fc, tx_size, tx_type);
-      sc->neighbors = get_adapt_nb(cm->fc, tx_size, tx_type);
-      update_eob_threshold(cm, tx_size, tx_type);
     }
   }
 }
 
 void av1_adapt_scan_order(AV1_COMMON *cm) {
-  TX_SIZE tx_size;
-  for (tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
+  if (cm->use_adapt_scan) {
+    TX_SIZE tx_size;
+#if CACHE_SCAN_PROB
+    int use_curr_frame = 0;
+#else   // CACHE_SCAN_PROB
+    int use_curr_frame = 1;
+#endif  // CACHE_SCAN_PROB
+
+    for (tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
 #if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-    if (tx_size > TX_32X16) continue;
+      if (tx_size > TX_32X16) continue;
 #else
-    if (tx_size >= TX_SIZES) continue;
+      if (tx_size >= TX_SIZES) continue;
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-    TX_TYPE tx_type;
-    for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
-      update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE);
-      update_scan_order_facade(cm, tx_size, tx_type);
-      update_eob_threshold(cm, tx_size, tx_type);
+      TX_TYPE tx_type;
+      for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+        if (do_adapt_scan(tx_size, tx_type)) {
+          update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE);
+          update_scan_order_facade(cm, tx_size, tx_type, use_curr_frame);
+          update_eob_threshold(cm, tx_size, tx_type);
+        }
+      }
     }
   }
 }
diff --git a/third_party/aom/av1/common/scan.h b/third_party/aom/av1/common/scan.h
index c9911de4e..82d2e917f 100644
--- a/third_party/aom/av1/common/scan.h
+++ b/third_party/aom/av1/common/scan.h
@@ -30,6 +30,9 @@ extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES_ALL][TX_TYPES];
 extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
 
 #if CONFIG_ADAPT_SCAN
+#define USE_2X2_PROB 1
+#define USE_TOPOLOGICAL_SORT 0
+#define USE_LIMIT_SCAN_DISTANCE 0
 void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
                                   TX_SIZE tx_size, TX_TYPE tx_type,
                                   const tran_low_t *dqcoeffs, int max_scan);
@@ -39,6 +42,7 @@ void av1_update_scan_count_facade(AV1_COMMON *cm, FRAME_COUNTS *counts,
 // will be scanned first
 void av1_augment_prob(TX_SIZE tx_size, TX_TYPE tx_type, uint32_t *prob);
 
+#if USE_TOPOLOGICAL_SORT
 // apply quick sort on nonzero probabilities to obtain a sort order
 void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
                            const uint32_t *non_zero_prob, int16_t *sort_order);
@@ -48,14 +52,24 @@ void av1_update_sort_order(TX_SIZE tx_size, TX_TYPE tx_type,
 // scanned before the to-be-scanned coefficient.
 void av1_update_scan_order(TX_SIZE tx_size, int16_t *sort_order, int16_t *scan,
                            int16_t *iscan);
+#else   // USE_TOPOLOGICAL_SORT
+void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type,
+                           uint32_t *non_zero_prob, int16_t *scan,
+                           int16_t *iscan);
+#endif  // USE_TOPOLOGICAL_SORT
 
 // For each coeff_idx in scan[], update its above and left neighbors in
 // neighbors[] accordingly.
-void av1_update_neighbors(int tx_size, const int16_t *scan,
+void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan,
                           const int16_t *iscan, int16_t *neighbors);
 void av1_init_scan_order(AV1_COMMON *cm);
 void av1_adapt_scan_order(AV1_COMMON *cm);
-#endif
+#if USE_2X2_PROB
+void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
+                                const uint32_t *non_zero_count,
+                                TX_SIZE tx_size);
+#endif  // USE_2X2_PROB
+#endif  // CONFIG_ADAPT_SCAN
 void av1_deliver_eob_threshold(const AV1_COMMON *cm, MACROBLOCKD *xd);
 
 static INLINE int get_coef_context(const int16_t *neighbors,
@@ -77,6 +91,17 @@ static INLINE const SCAN_ORDER *get_default_scan(TX_SIZE tx_size,
 #endif  // CONFIG_EXT_TX
 }
 
+static INLINE int do_adapt_scan(TX_SIZE tx_size, TX_TYPE tx_type) {
+  (void)tx_size;
+#if CONFIG_EXT_TX
+  if (tx_size_2d[tx_size] >= 1024 && tx_type != DCT_DCT) return 0;
+  return tx_type < IDTX;
+#else
+  (void)tx_type;
+  return 1;
+#endif
+}
+
 static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
                                          TX_TYPE tx_type,
                                          const MB_MODE_INFO *mbmi) {
@@ -84,12 +109,15 @@ static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
   // use the DCT_DCT scan order for MRC_DCT for now
   if (tx_type == MRC_DCT) tx_type = DCT_DCT;
 #endif  // CONFIG_MRC_TX
+#if CONFIG_LGT_FROM_PRED
+  if (mbmi->use_lgt) tx_type = DCT_DCT;
+#endif
   const int is_inter = is_inter_block(mbmi);
 #if CONFIG_ADAPT_SCAN
   (void)mbmi;
   (void)is_inter;
 #if CONFIG_EXT_TX
-  if (tx_type >= IDTX)
+  if (!do_adapt_scan(tx_size, tx_type))
     return get_default_scan(tx_size, tx_type, is_inter);
   else
 #endif  // CONFIG_EXT_TX
diff --git a/third_party/aom/av1/common/seg_common.c b/third_party/aom/av1/common/seg_common.c
index 21a853629..4603026bd 100644
--- a/third_party/aom/av1/common/seg_common.c
+++ b/third_party/aom/av1/common/seg_common.c
@@ -16,10 +16,18 @@
 #include "av1/common/seg_common.h"
 #include "av1/common/quant_common.h"
 
+#if CONFIG_LOOPFILTER_LEVEL
+static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 1, 1, 0, 0 };
+
+static const int seg_feature_data_max[SEG_LVL_MAX] = {
+  MAXQ, MAX_LOOP_FILTER, MAX_LOOP_FILTER, MAX_LOOP_FILTER, 0
+};
+#else
 static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
 
 static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3,
                                                        0 };
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
 // These functions provide access to new segment level features.
 // Eventually these function may be "optimized out" but for the moment,
@@ -46,10 +54,11 @@ int av1_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
 
 void av1_set_segdata(struct segmentation *seg, int segment_id,
                      SEG_LVL_FEATURES feature_id, int seg_data) {
-  assert(seg_data <= seg_feature_data_max[feature_id]);
   if (seg_data < 0) {
     assert(seg_feature_data_signed[feature_id]);
     assert(-seg_data <= seg_feature_data_max[feature_id]);
+  } else {
+    assert(seg_data <= seg_feature_data_max[feature_id]);
   }
 
   seg->feature_data[segment_id][feature_id] = seg_data;
diff --git a/third_party/aom/av1/common/seg_common.h b/third_party/aom/av1/common/seg_common.h
index 8b199e64e..6d16aedb6 100644
--- a/third_party/aom/av1/common/seg_common.h
+++ b/third_party/aom/av1/common/seg_common.h
@@ -26,14 +26,37 @@ extern "C" {
 
 #define PREDICTION_PROBS 3
 
+#if CONFIG_LOOPFILTER_LEVEL
+typedef enum {
+  SEG_LVL_ALT_Q,       // Use alternate Quantizer ....
+  SEG_LVL_ALT_LF_Y_V,  // Use alternate loop filter value on y plane vertical
+  SEG_LVL_ALT_LF_Y_H,  // Use alternate loop filter value on y plane horizontal
+  SEG_LVL_ALT_LF_U,    // Use alternate loop filter value on u plane
+  SEG_LVL_ALT_LF_V,    // Use alternate loop filter value on v plane
+  SEG_LVL_REF_FRAME,   // Optional Segment reference frame
+  SEG_LVL_SKIP,        // Optional Segment (0,0) + skip mode
+#if CONFIG_SEGMENT_ZEROMV
+  SEG_LVL_ZEROMV,
+  SEG_LVL_MAX
+#else
+  SEG_LVL_MAX
+#endif
+} SEG_LVL_FEATURES;
+#else  // CONFIG_LOOPFILTER_LEVEL
 // Segment level features.
 typedef enum {
   SEG_LVL_ALT_Q = 0,      // Use alternate Quantizer ....
   SEG_LVL_ALT_LF = 1,     // Use alternate loop filter value...
   SEG_LVL_REF_FRAME = 2,  // Optional Segment reference frame
-  SEG_LVL_SKIP = 3,       // Optional Segment (0,0) + skip mode
-  SEG_LVL_MAX = 4         // Number of features supported
+  SEG_LVL_SKIP = 3,  // Optional Segment (0,0) + skip mode
+#if CONFIG_SEGMENT_ZEROMV
+  SEG_LVL_ZEROMV = 4,
+  SEG_LVL_MAX = 5
+#else
+  SEG_LVL_MAX = 4
+#endif
 } SEG_LVL_FEATURES;
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
 struct segmentation {
   uint8_t enabled;
diff --git a/third_party/aom/av1/common/thread_common.c b/third_party/aom/av1/common/thread_common.c
index d96a71aef..eec8629ff 100644
--- a/third_party/aom/av1/common/thread_common.c
+++ b/third_party/aom/av1/common/thread_common.c
@@ -290,6 +290,13 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                                 int start, int stop, int y_only,
                                 AVxWorker *workers, int nworkers,
                                 AV1LfSync *lf_sync) {
+#if CONFIG_EXT_PARTITION
+  printf(
+      "STOPPING: This code has not been modified to work with the "
+      "extended coding unit size experiment");
+  exit(EXIT_FAILURE);
+#endif  // CONFIG_EXT_PARTITION
+
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   // Number of superblock rows and cols
   const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
@@ -299,13 +306,6 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
   const int num_workers = AOMMIN(nworkers, tile_cols);
   int i;
 
-#if CONFIG_EXT_PARTITION
-  printf(
-      "STOPPING: This code has not been modified to work with the "
-      "extended coding unit size experiment");
-  exit(EXIT_FAILURE);
-#endif  // CONFIG_EXT_PARTITION
-
   if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
       num_workers > lf_sync->num_workers) {
     av1_loop_filter_dealloc(lf_sync);
@@ -416,8 +416,11 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                               struct macroblockd_plane planes[MAX_MB_PLANE],
-                              int frame_filter_level, int y_only,
-                              int partial_frame, AVxWorker *workers,
+                              int frame_filter_level,
+#if CONFIG_LOOPFILTER_LEVEL
+                              int frame_filter_level_r,
+#endif
+                              int y_only, int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
 
@@ -431,8 +434,12 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
     mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
-  av1_loop_filter_frame_init(cm, frame_filter_level);
-
+#if CONFIG_LOOPFILTER_LEVEL
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
+                             y_only);
+#else
+  av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
+#endif  // CONFIG_LOOPFILTER_LEVEL
   loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
                       workers, num_workers, lf_sync);
 }
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
index 7b57ae8f3..6d118e60b 100644
--- a/third_party/aom/av1/common/thread_common.h
+++ b/third_party/aom/av1/common/thread_common.h
@@ -50,8 +50,11 @@ void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
 // Multi-threaded loopfilter that uses the tile threads.
 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                               struct macroblockd_plane planes[MAX_MB_PLANE],
-                              int frame_filter_level, int y_only,
-                              int partial_frame, AVxWorker *workers,
+                              int frame_filter_level,
+#if CONFIG_LOOPFILTER_LEVEL
+                              int frame_filter_level_r,
+#endif
+                              int y_only, int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync);
 
 void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts,
diff --git a/third_party/aom/av1/common/tile_common.c b/third_party/aom/av1/common/tile_common.c
index 3bff53c22..507a01265 100644
--- a/third_party/aom/av1/common/tile_common.c
+++ b/third_party/aom/av1/common/tile_common.c
@@ -13,29 +13,18 @@
 #include "av1/common/onyxc_int.h"
 #include "aom_dsp/aom_dsp_common.h"
 
-void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
-  tile->mi_row_start = row * cm->tile_height;
-  tile->mi_row_end = AOMMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows);
-}
-
-void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
-  tile->mi_col_start = col * cm->tile_width;
-  tile->mi_col_end = AOMMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols);
-}
-
 #if CONFIG_DEPENDENT_HORZTILES
 void av1_tile_set_tg_boundary(TileInfo *tile, const AV1_COMMON *const cm,
                               int row, int col) {
-  if (row < cm->tile_rows - 1) {
-    tile->tg_horz_boundary =
-        col >= cm->tile_group_start_col[row][col]
-            ? (row == cm->tile_group_start_row[row][col] ? 1 : 0)
-            : (row == cm->tile_group_start_row[row + 1][col] ? 1 : 0);
-  } else {
-    assert(col >= cm->tile_group_start_col[row][col]);
-    tile->tg_horz_boundary =
-        (row == cm->tile_group_start_row[row][col] ? 1 : 0);
+  const int tg_start_row = cm->tile_group_start_row[row][col];
+  const int tg_start_col = cm->tile_group_start_col[row][col];
+  tile->tg_horz_boundary = ((row == tg_start_row && col >= tg_start_col) ||
+                            (row == tg_start_row + 1 && col < tg_start_col));
+#if CONFIG_MAX_TILE
+  if (cm->tile_row_independent[row]) {
+    tile->tg_horz_boundary = 1;  // this tile row is independent
   }
+#endif
 }
 #endif
 void av1_tile_init(TileInfo *tile, const AV1_COMMON *cm, int row, int col) {
@@ -46,6 +35,125 @@ void av1_tile_init(TileInfo *tile, const AV1_COMMON *cm, int row, int col) {
 #endif
 }
 
+#if CONFIG_MAX_TILE
+
+// Find smallest k>=0 such that (blk_size << k) >= target
+static int tile_log2(int blk_size, int target) {
+  int k;
+  for (k = 0; (blk_size << k) < target; k++) {
+  }
+  return k;
+}
+
+void av1_get_tile_limits(AV1_COMMON *const cm) {
+  int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  int sb_cols = mi_cols >> MAX_MIB_SIZE_LOG2;
+  int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+
+  cm->min_log2_tile_cols = tile_log2(MAX_TILE_WIDTH_SB, sb_cols);
+  cm->max_log2_tile_cols = tile_log2(1, AOMMIN(sb_cols, MAX_TILE_COLS));
+  cm->max_log2_tile_rows = tile_log2(1, AOMMIN(sb_rows, MAX_TILE_ROWS));
+  cm->min_log2_tiles = tile_log2(MAX_TILE_AREA_SB, sb_cols * sb_rows);
+  cm->min_log2_tiles = AOMMAX(cm->min_log2_tiles, cm->min_log2_tile_cols);
+  // TODO(dominic.symes@arm.com):
+  // Add in levelMinLog2Tiles as a lower limit when levels are defined
+}
+
+void av1_calculate_tile_cols(AV1_COMMON *const cm) {
+  int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  int sb_cols = mi_cols >> MAX_MIB_SIZE_LOG2;
+  int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+  int i;
+
+  if (cm->uniform_tile_spacing_flag) {
+    int start_sb;
+    int size_sb = ALIGN_POWER_OF_TWO(sb_cols, cm->log2_tile_cols);
+    size_sb >>= cm->log2_tile_cols;
+    assert(size_sb > 0);
+    for (i = 0, start_sb = 0; start_sb < sb_cols; i++) {
+      cm->tile_col_start_sb[i] = start_sb;
+      start_sb += size_sb;
+    }
+    cm->tile_cols = i;
+    cm->tile_col_start_sb[i] = sb_cols;
+    cm->min_log2_tile_rows = AOMMAX(cm->min_log2_tiles - cm->log2_tile_cols, 0);
+    cm->max_tile_height_sb = sb_rows >> cm->min_log2_tile_rows;
+  } else {
+    int max_tile_area_sb = (sb_rows * sb_cols);
+    int max_tile_width_sb = 0;
+    cm->log2_tile_cols = tile_log2(1, cm->tile_cols);
+    for (i = 0; i < cm->tile_cols; i++) {
+      int size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
+      max_tile_width_sb = AOMMAX(max_tile_width_sb, size_sb);
+    }
+    if (cm->min_log2_tiles) {
+      max_tile_area_sb >>= (cm->min_log2_tiles + 1);
+    }
+    cm->max_tile_height_sb = AOMMAX(max_tile_area_sb / max_tile_width_sb, 1);
+  }
+}
+
+void av1_calculate_tile_rows(AV1_COMMON *const cm) {
+  int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+  int start_sb, size_sb, i;
+
+  if (cm->uniform_tile_spacing_flag) {
+    size_sb = ALIGN_POWER_OF_TWO(sb_rows, cm->log2_tile_rows);
+    size_sb >>= cm->log2_tile_rows;
+    assert(size_sb > 0);
+    for (i = 0, start_sb = 0; start_sb < sb_rows; i++) {
+      cm->tile_row_start_sb[i] = start_sb;
+      start_sb += size_sb;
+    }
+    cm->tile_rows = i;
+    cm->tile_row_start_sb[i] = sb_rows;
+  } else {
+    cm->log2_tile_rows = tile_log2(1, cm->tile_rows);
+  }
+
+#if CONFIG_DEPENDENT_HORZTILES
+  // Record which tile rows must be indpendent for parallelism
+  for (i = 0, start_sb = 0; i < cm->tile_rows; i++) {
+    cm->tile_row_independent[i] = 0;
+    if (cm->tile_row_start_sb[i + 1] - start_sb > cm->max_tile_height_sb) {
+      cm->tile_row_independent[i] = 1;
+      start_sb = cm->tile_row_start_sb[i];
+    }
+  }
+#endif
+}
+
+void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
+  assert(row < cm->tile_rows);
+  int mi_row_start = cm->tile_row_start_sb[row] << MAX_MIB_SIZE_LOG2;
+  int mi_row_end = cm->tile_row_start_sb[row + 1] << MAX_MIB_SIZE_LOG2;
+  tile->mi_row_start = mi_row_start;
+  tile->mi_row_end = AOMMIN(mi_row_end, cm->mi_rows);
+}
+
+void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
+  assert(col < cm->tile_cols);
+  int mi_col_start = cm->tile_col_start_sb[col] << MAX_MIB_SIZE_LOG2;
+  int mi_col_end = cm->tile_col_start_sb[col + 1] << MAX_MIB_SIZE_LOG2;
+  tile->mi_col_start = mi_col_start;
+  tile->mi_col_end = AOMMIN(mi_col_end, cm->mi_cols);
+}
+
+#else
+
+void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
+  tile->mi_row_start = row * cm->tile_height;
+  tile->mi_row_end = AOMMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows);
+}
+
+void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
+  tile->mi_col_start = col * cm->tile_width;
+  tile->mi_col_end = AOMMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols);
+}
+
 #if CONFIG_EXT_PARTITION
 #define MIN_TILE_WIDTH_MAX_SB 2
 #define MAX_TILE_WIDTH_MAX_SB 32
@@ -74,6 +182,7 @@ void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
   *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
   assert(*min_log2_tile_cols <= *max_log2_tile_cols);
 }
+#endif  // CONFIG_MAX_TILE
 
 void av1_setup_frame_boundary_info(const AV1_COMMON *const cm) {
   MODE_INFO *mi = cm->mi;
@@ -103,16 +212,38 @@ void av1_setup_frame_boundary_info(const AV1_COMMON *const cm) {
   }
 }
 
+int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles) {
+  // Round the frame up to a whole number of max superblocks
+  mi_frame_size = ALIGN_POWER_OF_TWO(mi_frame_size, MAX_MIB_SIZE_LOG2);
+
+  // Divide by the signalled number of tiles, rounding up to the multiple of
+  // the max superblock size. To do this, shift right (and round up) to get the
+  // tile size in max super-blocks and then shift left again to convert it to
+  // mi units.
+  const int shift = log2_tile_num + MAX_MIB_SIZE_LOG2;
+  const int max_sb_tile_size =
+      ALIGN_POWER_OF_TWO(mi_frame_size, shift) >> shift;
+  const int mi_tile_size = max_sb_tile_size << MAX_MIB_SIZE_LOG2;
+
+  // The actual number of tiles is the ceiling of the frame size in mi units
+  // divided by mi_size. This is at most 1 << log2_tile_num but might be
+  // strictly less if max_sb_tile_size got rounded up significantly.
+  if (ntiles) {
+    *ntiles = (mi_frame_size + mi_tile_size - 1) / mi_tile_size;
+    assert(*ntiles <= (1 << log2_tile_num));
+  }
+
+  return mi_tile_size;
+}
+
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
 void av1_setup_across_tile_boundary_info(const AV1_COMMON *const cm,
                                          const TileInfo *const tile_info) {
-  int lpf_across_tiles_enabled = 1;
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-  lpf_across_tiles_enabled = cm->loop_filter_across_tiles_enabled;
-#endif
-  if ((cm->tile_cols * cm->tile_rows > 1) && (!lpf_across_tiles_enabled)) {
+  if (cm->tile_cols * cm->tile_rows > 1) {
     const int mi_row = tile_info->mi_row_start;
     const int mi_col = tile_info->mi_col_start;
     MODE_INFO *const mi_start = cm->mi + mi_row * cm->mi_stride + mi_col;
+    assert(mi_start < cm->mip + cm->mi_alloc_size);
     MODE_INFO *mi = 0;
     const int row_diff = tile_info->mi_row_end - tile_info->mi_row_start;
     const int col_diff = tile_info->mi_col_end - tile_info->mi_col_start;
@@ -136,6 +267,10 @@ void av1_setup_across_tile_boundary_info(const AV1_COMMON *const cm,
     }
 
     mi = mi_start + (row_diff - 1) * cm->mi_stride;
+
+    // explicit bounds checking
+    assert(mi + col_diff <= cm->mip + cm->mi_alloc_size);
+
     for (col = 0; col < col_diff; ++col) {
       mi->mbmi.boundary_info |= TILE_BOTTOM_BOUNDARY;
       mi += 1;
@@ -149,7 +284,6 @@ void av1_setup_across_tile_boundary_info(const AV1_COMMON *const cm,
   }
 }
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
 int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm) {
   return (!cm->loop_filter_across_tiles_enabled &&
           (cm->tile_cols * cm->tile_rows > 1));
diff --git a/third_party/aom/av1/common/tile_common.h b/third_party/aom/av1/common/tile_common.h
index e5b8a0b82..be21e1482 100644
--- a/third_party/aom/av1/common/tile_common.h
+++ b/third_party/aom/av1/common/tile_common.h
@@ -43,13 +43,32 @@ void av1_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
                          int *max_log2_tile_cols);
 
 void av1_setup_frame_boundary_info(const struct AV1Common *const cm);
-void av1_setup_across_tile_boundary_info(const struct AV1Common *const cm,
-                                         const TileInfo *const tile_info);
+
+// Calculate the correct tile size (width or height) for (1 << log2_tile_num)
+// tiles horizontally or vertically in the frame.
+int get_tile_size(int mi_frame_size, int log2_tile_num, int *ntiles);
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+void av1_setup_across_tile_boundary_info(const struct AV1Common *const cm,
+                                         const TileInfo *const tile_info);
 int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm);
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
+#if CONFIG_MAX_TILE
+
+// Define tile maximum width and area
+// There is no maximum height since height is limited by area and width limits
+// The minimum tile width or height is fixed at one superblock
+#define MAX_TILE_WIDTH (4096)  // Max Tile width in pixels
+#define MAX_TILE_WIDTH_SB (MAX_TILE_WIDTH >> MAX_SB_SIZE_LOG2)
+#define MAX_TILE_AREA (4096 * 2304)  // Maximum tile area in pixels
+#define MAX_TILE_AREA_SB (MAX_TILE_AREA >> (2 * MAX_SB_SIZE_LOG2))
+
+void av1_get_tile_limits(struct AV1Common *const cm);
+void av1_calculate_tile_cols(struct AV1Common *const cm);
+void av1_calculate_tile_rows(struct AV1Common *const cm);
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/common/token_cdfs.h b/third_party/aom/av1/common/token_cdfs.h
new file mode 100644
index 000000000..c4f0f94c0
--- /dev/null
+++ b/third_party/aom/av1/common/token_cdfs.h
@@ -0,0 +1,5253 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./aom_config.h"
+#include "av1/common/entropy.h"
+
+/* clang-format off */
+static const coeff_cdf_model
+av1_default_coef_head_cdfs_q0[TX_SIZES][PLANE_TYPES] = {
+    {  // TX 4X4
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(21029), AOM_ICDF(21848), AOM_ICDF(26326), AOM_ICDF(29423),
+    AOM_ICDF(30610), AOM_ICDF(32768), },
+    {AOM_ICDF(10066), AOM_ICDF(12716), AOM_ICDF(18523), AOM_ICDF(23277),
+    AOM_ICDF(24780), AOM_ICDF(32768), },
+    {AOM_ICDF(1655), AOM_ICDF(4793), AOM_ICDF(6429), AOM_ICDF(11430),
+    AOM_ICDF(12206), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(10364), AOM_ICDF(14773), AOM_ICDF(25084), AOM_ICDF(25599),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10060), AOM_ICDF(14834), AOM_ICDF(24695), AOM_ICDF(25188),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8279), AOM_ICDF(11106), AOM_ICDF(21159), AOM_ICDF(21671),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5914), AOM_ICDF(6961), AOM_ICDF(15824), AOM_ICDF(16314),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3542), AOM_ICDF(3935), AOM_ICDF(10073), AOM_ICDF(10456),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1492), AOM_ICDF(1808), AOM_ICDF(4428), AOM_ICDF(4747),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(15783), AOM_ICDF(19657), AOM_ICDF(28753), AOM_ICDF(29248),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12047), AOM_ICDF(15766), AOM_ICDF(26989), AOM_ICDF(27464),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8412), AOM_ICDF(9971), AOM_ICDF(21538), AOM_ICDF(22026),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5438), AOM_ICDF(6039), AOM_ICDF(15108), AOM_ICDF(15570),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3247), AOM_ICDF(3593), AOM_ICDF(9495), AOM_ICDF(9859),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1428), AOM_ICDF(1742), AOM_ICDF(4322), AOM_ICDF(4638),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(18469), AOM_ICDF(21675), AOM_ICDF(30172), AOM_ICDF(30563),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12582), AOM_ICDF(16559), AOM_ICDF(27995), AOM_ICDF(28423),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8183), AOM_ICDF(9915), AOM_ICDF(21836), AOM_ICDF(22336),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5255), AOM_ICDF(5845), AOM_ICDF(15137), AOM_ICDF(15593),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3140), AOM_ICDF(3478), AOM_ICDF(9376), AOM_ICDF(9739),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1549), AOM_ICDF(1864), AOM_ICDF(4660), AOM_ICDF(4984),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(18319), AOM_ICDF(23757), AOM_ICDF(30989), AOM_ICDF(31399),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12864), AOM_ICDF(18051), AOM_ICDF(28729), AOM_ICDF(29218),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8090), AOM_ICDF(10047), AOM_ICDF(22011), AOM_ICDF(22680),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5061), AOM_ICDF(5688), AOM_ICDF(14783), AOM_ICDF(15379),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3425), AOM_ICDF(3784), AOM_ICDF(9565), AOM_ICDF(9998),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1564), AOM_ICDF(1884), AOM_ICDF(4703), AOM_ICDF(5054),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(8329), AOM_ICDF(23625), AOM_ICDF(30376), AOM_ICDF(31182),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7265), AOM_ICDF(19981), AOM_ICDF(27965), AOM_ICDF(29333),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5797), AOM_ICDF(12014), AOM_ICDF(21143), AOM_ICDF(23728),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4525), AOM_ICDF(7029), AOM_ICDF(14661), AOM_ICDF(17493),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3200), AOM_ICDF(4082), AOM_ICDF(9679), AOM_ICDF(11816),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1930), AOM_ICDF(2344), AOM_ICDF(5504), AOM_ICDF(6684),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(12366), AOM_ICDF(20513), AOM_ICDF(22133), AOM_ICDF(29810),
+    AOM_ICDF(30422), AOM_ICDF(32768), },
+    {AOM_ICDF(7182), AOM_ICDF(16662), AOM_ICDF(18633), AOM_ICDF(27443),
+    AOM_ICDF(28120), AOM_ICDF(32768), },
+    {AOM_ICDF(1791), AOM_ICDF(10613), AOM_ICDF(11616), AOM_ICDF(21520),
+    AOM_ICDF(22191), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(18943), AOM_ICDF(19755), AOM_ICDF(30340), AOM_ICDF(30674),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15702), AOM_ICDF(17160), AOM_ICDF(28778), AOM_ICDF(29115),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9337), AOM_ICDF(10054), AOM_ICDF(22492), AOM_ICDF(22845),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6550), AOM_ICDF(7019), AOM_ICDF(17068), AOM_ICDF(17420),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4169), AOM_ICDF(4566), AOM_ICDF(11849), AOM_ICDF(12185),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2495), AOM_ICDF(2839), AOM_ICDF(6895), AOM_ICDF(7221),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20241), AOM_ICDF(21593), AOM_ICDF(31083), AOM_ICDF(31425),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15276), AOM_ICDF(16626), AOM_ICDF(28787), AOM_ICDF(29136),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7656), AOM_ICDF(8102), AOM_ICDF(20347), AOM_ICDF(20700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4527), AOM_ICDF(4880), AOM_ICDF(13482), AOM_ICDF(13819),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2538), AOM_ICDF(2860), AOM_ICDF(7975), AOM_ICDF(8298),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1394), AOM_ICDF(1707), AOM_ICDF(3770), AOM_ICDF(4086),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19968), AOM_ICDF(21872), AOM_ICDF(30812), AOM_ICDF(31172),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15081), AOM_ICDF(16805), AOM_ICDF(28957), AOM_ICDF(29326),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8196), AOM_ICDF(8748), AOM_ICDF(21434), AOM_ICDF(21825),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5297), AOM_ICDF(5675), AOM_ICDF(15007), AOM_ICDF(15385),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3102), AOM_ICDF(3429), AOM_ICDF(9255), AOM_ICDF(9607),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1502), AOM_ICDF(1815), AOM_ICDF(4662), AOM_ICDF(4983),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(19362), AOM_ICDF(22537), AOM_ICDF(31260), AOM_ICDF(31624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14450), AOM_ICDF(17789), AOM_ICDF(29362), AOM_ICDF(29788),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7957), AOM_ICDF(8982), AOM_ICDF(21542), AOM_ICDF(22120),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4819), AOM_ICDF(5280), AOM_ICDF(14199), AOM_ICDF(14724),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2831), AOM_ICDF(3180), AOM_ICDF(8511), AOM_ICDF(8950),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1385), AOM_ICDF(1700), AOM_ICDF(4300), AOM_ICDF(4633),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(14134), AOM_ICDF(22252), AOM_ICDF(31119), AOM_ICDF(31577),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11383), AOM_ICDF(19847), AOM_ICDF(29451), AOM_ICDF(30205),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7338), AOM_ICDF(11314), AOM_ICDF(22338), AOM_ICDF(24028),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5071), AOM_ICDF(6634), AOM_ICDF(15379), AOM_ICDF(17178),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2969), AOM_ICDF(3703), AOM_ICDF(9896), AOM_ICDF(11246),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1809), AOM_ICDF(2173), AOM_ICDF(5573), AOM_ICDF(6229),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(25213), AOM_ICDF(26007), AOM_ICDF(29751), AOM_ICDF(31199),
+    AOM_ICDF(31688), AOM_ICDF(32768), },
+    {AOM_ICDF(13781), AOM_ICDF(16489), AOM_ICDF(23298), AOM_ICDF(27505),
+    AOM_ICDF(28405), AOM_ICDF(32768), },
+    {AOM_ICDF(4621), AOM_ICDF(9194), AOM_ICDF(12764), AOM_ICDF(19842),
+    AOM_ICDF(20708), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(12686), AOM_ICDF(19031), AOM_ICDF(28910), AOM_ICDF(29358),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12732), AOM_ICDF(18729), AOM_ICDF(28346), AOM_ICDF(28824),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9753), AOM_ICDF(12954), AOM_ICDF(24344), AOM_ICDF(24920),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6853), AOM_ICDF(7851), AOM_ICDF(18601), AOM_ICDF(19110),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3697), AOM_ICDF(4071), AOM_ICDF(11373), AOM_ICDF(11743),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1738), AOM_ICDF(2057), AOM_ICDF(5307), AOM_ICDF(5627),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18299), AOM_ICDF(24455), AOM_ICDF(30930), AOM_ICDF(31398),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14316), AOM_ICDF(19083), AOM_ICDF(29266), AOM_ICDF(29766),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9584), AOM_ICDF(11344), AOM_ICDF(23898), AOM_ICDF(24407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6076), AOM_ICDF(6645), AOM_ICDF(16805), AOM_ICDF(17237),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3535), AOM_ICDF(3885), AOM_ICDF(10393), AOM_ICDF(10746),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1909), AOM_ICDF(2222), AOM_ICDF(5010), AOM_ICDF(5328),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(21106), AOM_ICDF(25258), AOM_ICDF(31172), AOM_ICDF(31576),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14944), AOM_ICDF(20229), AOM_ICDF(29903), AOM_ICDF(30361),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10454), AOM_ICDF(13063), AOM_ICDF(25548), AOM_ICDF(26138),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7667), AOM_ICDF(8529), AOM_ICDF(20025), AOM_ICDF(20588),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4813), AOM_ICDF(5176), AOM_ICDF(13672), AOM_ICDF(14085),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2450), AOM_ICDF(2763), AOM_ICDF(7515), AOM_ICDF(7873),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(18297), AOM_ICDF(25980), AOM_ICDF(31547), AOM_ICDF(31946),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13370), AOM_ICDF(21048), AOM_ICDF(30193), AOM_ICDF(30703),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9326), AOM_ICDF(13020), AOM_ICDF(25206), AOM_ICDF(26074),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6117), AOM_ICDF(7480), AOM_ICDF(18243), AOM_ICDF(19130),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6408), AOM_ICDF(6819), AOM_ICDF(13596), AOM_ICDF(14098),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2179), AOM_ICDF(2485), AOM_ICDF(7393), AOM_ICDF(7768),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(11255), AOM_ICDF(26931), AOM_ICDF(31505), AOM_ICDF(32033),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9120), AOM_ICDF(23148), AOM_ICDF(30070), AOM_ICDF(31091),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7927), AOM_ICDF(15909), AOM_ICDF(25162), AOM_ICDF(27329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6416), AOM_ICDF(10706), AOM_ICDF(19959), AOM_ICDF(22732),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4232), AOM_ICDF(5545), AOM_ICDF(13107), AOM_ICDF(15118),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2626), AOM_ICDF(2941), AOM_ICDF(8665), AOM_ICDF(9872),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(27618), AOM_ICDF(28976), AOM_ICDF(30940), AOM_ICDF(31993),
+    AOM_ICDF(32336), AOM_ICDF(32768), },
+    {AOM_ICDF(16119), AOM_ICDF(21691), AOM_ICDF(26334), AOM_ICDF(30705),
+    AOM_ICDF(31244), AOM_ICDF(32768), },
+    {AOM_ICDF(5114), AOM_ICDF(14755), AOM_ICDF(17865), AOM_ICDF(27048),
+    AOM_ICDF(27895), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(19468), AOM_ICDF(23767), AOM_ICDF(31339), AOM_ICDF(31674),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16878), AOM_ICDF(20966), AOM_ICDF(30654), AOM_ICDF(31007),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12213), AOM_ICDF(14415), AOM_ICDF(26909), AOM_ICDF(27338),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9404), AOM_ICDF(10670), AOM_ICDF(22239), AOM_ICDF(22719),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6856), AOM_ICDF(7784), AOM_ICDF(17127), AOM_ICDF(17609),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5034), AOM_ICDF(5529), AOM_ICDF(13229), AOM_ICDF(13634),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21214), AOM_ICDF(25570), AOM_ICDF(31656), AOM_ICDF(31994),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17012), AOM_ICDF(20535), AOM_ICDF(30403), AOM_ICDF(30787),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10855), AOM_ICDF(12147), AOM_ICDF(25451), AOM_ICDF(25874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7055), AOM_ICDF(7837), AOM_ICDF(19116), AOM_ICDF(19553),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4141), AOM_ICDF(4531), AOM_ICDF(11911), AOM_ICDF(12296),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1706), AOM_ICDF(2041), AOM_ICDF(5622), AOM_ICDF(5957),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22092), AOM_ICDF(26330), AOM_ICDF(31642), AOM_ICDF(32015),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16433), AOM_ICDF(20889), AOM_ICDF(30263), AOM_ICDF(30704),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11015), AOM_ICDF(13045), AOM_ICDF(26253), AOM_ICDF(26743),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9188), AOM_ICDF(9924), AOM_ICDF(21991), AOM_ICDF(22551),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5259), AOM_ICDF(5634), AOM_ICDF(14131), AOM_ICDF(14627),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1916), AOM_ICDF(2218), AOM_ICDF(6453), AOM_ICDF(6780),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(20331), AOM_ICDF(26854), AOM_ICDF(31896), AOM_ICDF(32255),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15738), AOM_ICDF(22741), AOM_ICDF(31108), AOM_ICDF(31557),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11693), AOM_ICDF(15508), AOM_ICDF(27435), AOM_ICDF(28103),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8066), AOM_ICDF(9281), AOM_ICDF(20855), AOM_ICDF(21631),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4427), AOM_ICDF(4860), AOM_ICDF(12951), AOM_ICDF(13543),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1928), AOM_ICDF(2372), AOM_ICDF(5634), AOM_ICDF(6672),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(15605), AOM_ICDF(27749), AOM_ICDF(31907), AOM_ICDF(32303),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11920), AOM_ICDF(24653), AOM_ICDF(31013), AOM_ICDF(31675),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8007), AOM_ICDF(14898), AOM_ICDF(25377), AOM_ICDF(27353),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6010), AOM_ICDF(8920), AOM_ICDF(18956), AOM_ICDF(21554),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4573), AOM_ICDF(5611), AOM_ICDF(13522), AOM_ICDF(15795),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4274), AOM_ICDF(6411), AOM_ICDF(11398), AOM_ICDF(14247),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 8X8
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(22195), AOM_ICDF(22830), AOM_ICDF(25684), AOM_ICDF(28569),
+    AOM_ICDF(30557), AOM_ICDF(32768), },
+    {AOM_ICDF(9973), AOM_ICDF(12001), AOM_ICDF(15354), AOM_ICDF(20353),
+    AOM_ICDF(23020), AOM_ICDF(32768), },
+    {AOM_ICDF(1514), AOM_ICDF(3998), AOM_ICDF(4873), AOM_ICDF(9182),
+    AOM_ICDF(9967), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(11609), AOM_ICDF(14013), AOM_ICDF(24609), AOM_ICDF(25092),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10302), AOM_ICDF(15208), AOM_ICDF(24145), AOM_ICDF(24658),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7991), AOM_ICDF(10895), AOM_ICDF(20438), AOM_ICDF(21146),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5831), AOM_ICDF(7006), AOM_ICDF(15716), AOM_ICDF(16394),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3536), AOM_ICDF(3969), AOM_ICDF(10117), AOM_ICDF(10569),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1369), AOM_ICDF(1686), AOM_ICDF(4062), AOM_ICDF(4385),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(17334), AOM_ICDF(19416), AOM_ICDF(28420), AOM_ICDF(28798),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13512), AOM_ICDF(15917), AOM_ICDF(26736), AOM_ICDF(27122),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9322), AOM_ICDF(10491), AOM_ICDF(21892), AOM_ICDF(22281),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6187), AOM_ICDF(6682), AOM_ICDF(15992), AOM_ICDF(16351),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3733), AOM_ICDF(4073), AOM_ICDF(10406), AOM_ICDF(10735),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1606), AOM_ICDF(1920), AOM_ICDF(4715), AOM_ICDF(5028),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(20589), AOM_ICDF(22106), AOM_ICDF(30065), AOM_ICDF(30422),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14731), AOM_ICDF(16342), AOM_ICDF(27701), AOM_ICDF(28059),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8554), AOM_ICDF(9080), AOM_ICDF(20831), AOM_ICDF(21182),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5011), AOM_ICDF(5354), AOM_ICDF(13968), AOM_ICDF(14296),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2867), AOM_ICDF(3184), AOM_ICDF(8524), AOM_ICDF(8840),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1174), AOM_ICDF(1486), AOM_ICDF(3643), AOM_ICDF(3955),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(23439), AOM_ICDF(24729), AOM_ICDF(31199), AOM_ICDF(31537),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15716), AOM_ICDF(17015), AOM_ICDF(28650), AOM_ICDF(28989),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8381), AOM_ICDF(8812), AOM_ICDF(21032), AOM_ICDF(21369),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4868), AOM_ICDF(5197), AOM_ICDF(13740), AOM_ICDF(14065),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2744), AOM_ICDF(3058), AOM_ICDF(8333), AOM_ICDF(8648),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1185), AOM_ICDF(1497), AOM_ICDF(3656), AOM_ICDF(3968),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(23980), AOM_ICDF(26041), AOM_ICDF(31566), AOM_ICDF(31904),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16060), AOM_ICDF(18243), AOM_ICDF(29508), AOM_ICDF(29868),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8844), AOM_ICDF(9414), AOM_ICDF(22119), AOM_ICDF(22496),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5265), AOM_ICDF(5612), AOM_ICDF(14599), AOM_ICDF(14944),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3058), AOM_ICDF(3375), AOM_ICDF(9028), AOM_ICDF(9351),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1414), AOM_ICDF(1726), AOM_ICDF(4249), AOM_ICDF(4563),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(9994), AOM_ICDF(19506), AOM_ICDF(21744), AOM_ICDF(29408),
+    AOM_ICDF(30809), AOM_ICDF(32768), },
+    {AOM_ICDF(3771), AOM_ICDF(14862), AOM_ICDF(16756), AOM_ICDF(26385),
+    AOM_ICDF(27927), AOM_ICDF(32768), },
+    {AOM_ICDF(964), AOM_ICDF(10643), AOM_ICDF(11416), AOM_ICDF(21060),
+    AOM_ICDF(22316), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23263), AOM_ICDF(23761), AOM_ICDF(31250), AOM_ICDF(31580),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19631), AOM_ICDF(21067), AOM_ICDF(30262), AOM_ICDF(30596),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12419), AOM_ICDF(13646), AOM_ICDF(25959), AOM_ICDF(26329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9274), AOM_ICDF(10229), AOM_ICDF(21588), AOM_ICDF(21981),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6778), AOM_ICDF(7496), AOM_ICDF(17069), AOM_ICDF(17469),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4655), AOM_ICDF(5089), AOM_ICDF(12206), AOM_ICDF(12574),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24055), AOM_ICDF(24771), AOM_ICDF(31529), AOM_ICDF(31851),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18300), AOM_ICDF(19177), AOM_ICDF(29983), AOM_ICDF(30310),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9684), AOM_ICDF(10239), AOM_ICDF(23130), AOM_ICDF(23465),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6112), AOM_ICDF(6511), AOM_ICDF(16539), AOM_ICDF(16874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3508), AOM_ICDF(3841), AOM_ICDF(10475), AOM_ICDF(10798),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1647), AOM_ICDF(1963), AOM_ICDF(5379), AOM_ICDF(5693),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24875), AOM_ICDF(25551), AOM_ICDF(31757), AOM_ICDF(32078),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18585), AOM_ICDF(19328), AOM_ICDF(30217), AOM_ICDF(30542),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8948), AOM_ICDF(9350), AOM_ICDF(22251), AOM_ICDF(22577),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5148), AOM_ICDF(5481), AOM_ICDF(14806), AOM_ICDF(15127),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2852), AOM_ICDF(3169), AOM_ICDF(8930), AOM_ICDF(9249),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1298), AOM_ICDF(1609), AOM_ICDF(4289), AOM_ICDF(4600),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25149), AOM_ICDF(25840), AOM_ICDF(31833), AOM_ICDF(32153),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19051), AOM_ICDF(19689), AOM_ICDF(30461), AOM_ICDF(30785),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8956), AOM_ICDF(9308), AOM_ICDF(22406), AOM_ICDF(22729),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5001), AOM_ICDF(5325), AOM_ICDF(14586), AOM_ICDF(14906),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2875), AOM_ICDF(3189), AOM_ICDF(8639), AOM_ICDF(8955),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1311), AOM_ICDF(1623), AOM_ICDF(4261), AOM_ICDF(4572),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(25212), AOM_ICDF(26544), AOM_ICDF(31879), AOM_ICDF(32209),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18967), AOM_ICDF(20523), AOM_ICDF(30778), AOM_ICDF(31126),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9672), AOM_ICDF(10140), AOM_ICDF(23740), AOM_ICDF(24117),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5732), AOM_ICDF(6079), AOM_ICDF(16067), AOM_ICDF(16423),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3370), AOM_ICDF(3687), AOM_ICDF(10101), AOM_ICDF(10429),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1696), AOM_ICDF(2007), AOM_ICDF(5320), AOM_ICDF(5648),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(26296), AOM_ICDF(26903), AOM_ICDF(30027), AOM_ICDF(31098),
+    AOM_ICDF(31851), AOM_ICDF(32768), },
+    {AOM_ICDF(13982), AOM_ICDF(16223), AOM_ICDF(22840), AOM_ICDF(26540),
+    AOM_ICDF(28301), AOM_ICDF(32768), },
+    {AOM_ICDF(5643), AOM_ICDF(9834), AOM_ICDF(13670), AOM_ICDF(20220),
+    AOM_ICDF(21734), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(14291), AOM_ICDF(20303), AOM_ICDF(29319), AOM_ICDF(29879),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13407), AOM_ICDF(20905), AOM_ICDF(29052), AOM_ICDF(29644),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10860), AOM_ICDF(15525), AOM_ICDF(25872), AOM_ICDF(26766),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7801), AOM_ICDF(9554), AOM_ICDF(20530), AOM_ICDF(21309),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4523), AOM_ICDF(4994), AOM_ICDF(12583), AOM_ICDF(13069),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1784), AOM_ICDF(2110), AOM_ICDF(5198), AOM_ICDF(5511),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20153), AOM_ICDF(24114), AOM_ICDF(30802), AOM_ICDF(31195),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16079), AOM_ICDF(19936), AOM_ICDF(29580), AOM_ICDF(29992),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10977), AOM_ICDF(12993), AOM_ICDF(25245), AOM_ICDF(25687),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7386), AOM_ICDF(8212), AOM_ICDF(19223), AOM_ICDF(19683),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4797), AOM_ICDF(5164), AOM_ICDF(12928), AOM_ICDF(13288),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2188), AOM_ICDF(2498), AOM_ICDF(6396), AOM_ICDF(6706),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24221), AOM_ICDF(26746), AOM_ICDF(31634), AOM_ICDF(31980),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17492), AOM_ICDF(20348), AOM_ICDF(30067), AOM_ICDF(30432),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10522), AOM_ICDF(11531), AOM_ICDF(24642), AOM_ICDF(25031),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6567), AOM_ICDF(7006), AOM_ICDF(17688), AOM_ICDF(18036),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4123), AOM_ICDF(4447), AOM_ICDF(11775), AOM_ICDF(12095),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1770), AOM_ICDF(2065), AOM_ICDF(6491), AOM_ICDF(6786),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25862), AOM_ICDF(27744), AOM_ICDF(31611), AOM_ICDF(31969),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17752), AOM_ICDF(20079), AOM_ICDF(30169), AOM_ICDF(30530),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10588), AOM_ICDF(11308), AOM_ICDF(24834), AOM_ICDF(25180),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7459), AOM_ICDF(7820), AOM_ICDF(17949), AOM_ICDF(18281),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3984), AOM_ICDF(4294), AOM_ICDF(11863), AOM_ICDF(12173),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2689), AOM_ICDF(2969), AOM_ICDF(11371), AOM_ICDF(11651),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27703), AOM_ICDF(29662), AOM_ICDF(31910), AOM_ICDF(32262),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17904), AOM_ICDF(21878), AOM_ICDF(30510), AOM_ICDF(30969),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10660), AOM_ICDF(12299), AOM_ICDF(24907), AOM_ICDF(25524),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6972), AOM_ICDF(7545), AOM_ICDF(18660), AOM_ICDF(19251),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5359), AOM_ICDF(5768), AOM_ICDF(14022), AOM_ICDF(14397),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5030), AOM_ICDF(5487), AOM_ICDF(10364), AOM_ICDF(10973),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(27980), AOM_ICDF(28880), AOM_ICDF(31045), AOM_ICDF(31931),
+    AOM_ICDF(32370), AOM_ICDF(32768), },
+    {AOM_ICDF(15958), AOM_ICDF(19891), AOM_ICDF(25963), AOM_ICDF(29601),
+    AOM_ICDF(30931), AOM_ICDF(32768), },
+    {AOM_ICDF(3897), AOM_ICDF(12331), AOM_ICDF(15935), AOM_ICDF(24489),
+    AOM_ICDF(26773), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(21443), AOM_ICDF(24237), AOM_ICDF(31473), AOM_ICDF(31808),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18617), AOM_ICDF(22378), AOM_ICDF(30958), AOM_ICDF(31301),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14626), AOM_ICDF(17725), AOM_ICDF(28852), AOM_ICDF(29246),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12155), AOM_ICDF(14598), AOM_ICDF(26000), AOM_ICDF(26506),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10111), AOM_ICDF(12149), AOM_ICDF(23415), AOM_ICDF(24002),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11352), AOM_ICDF(12864), AOM_ICDF(22589), AOM_ICDF(23010),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22855), AOM_ICDF(25401), AOM_ICDF(31675), AOM_ICDF(31999),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19086), AOM_ICDF(21008), AOM_ICDF(30886), AOM_ICDF(31214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13477), AOM_ICDF(14473), AOM_ICDF(28104), AOM_ICDF(28450),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9553), AOM_ICDF(10401), AOM_ICDF(23815), AOM_ICDF(24225),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5795), AOM_ICDF(6172), AOM_ICDF(18068), AOM_ICDF(18445),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4297), AOM_ICDF(5909), AOM_ICDF(10206), AOM_ICDF(11818),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24424), AOM_ICDF(26344), AOM_ICDF(31912), AOM_ICDF(32231),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20229), AOM_ICDF(21775), AOM_ICDF(31283), AOM_ICDF(31610),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14224), AOM_ICDF(14882), AOM_ICDF(28673), AOM_ICDF(29012),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10881), AOM_ICDF(11494), AOM_ICDF(23829), AOM_ICDF(24238),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6367), AOM_ICDF(6988), AOM_ICDF(15685), AOM_ICDF(16306),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7447), AOM_ICDF(11916), AOM_ICDF(17873), AOM_ICDF(22342),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25536), AOM_ICDF(27216), AOM_ICDF(31570), AOM_ICDF(31916),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19600), AOM_ICDF(21062), AOM_ICDF(30095), AOM_ICDF(30444),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11491), AOM_ICDF(12044), AOM_ICDF(26170), AOM_ICDF(26497),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9629), AOM_ICDF(9963), AOM_ICDF(23790), AOM_ICDF(24112),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8073), AOM_ICDF(8359), AOM_ICDF(22212), AOM_ICDF(22498),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27425), AOM_ICDF(29611), AOM_ICDF(32005), AOM_ICDF(32347),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20590), AOM_ICDF(24265), AOM_ICDF(31252), AOM_ICDF(31658),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14072), AOM_ICDF(15705), AOM_ICDF(28945), AOM_ICDF(29389),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11295), AOM_ICDF(11926), AOM_ICDF(26485), AOM_ICDF(26872),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10627), AOM_ICDF(11292), AOM_ICDF(22141), AOM_ICDF(22805),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 16X16
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(9850), AOM_ICDF(11321), AOM_ICDF(13211), AOM_ICDF(18246),
+    AOM_ICDF(21613), AOM_ICDF(32768), },
+    {AOM_ICDF(4128), AOM_ICDF(6155), AOM_ICDF(7367), AOM_ICDF(11928),
+    AOM_ICDF(14060), AOM_ICDF(32768), },
+    {AOM_ICDF(932), AOM_ICDF(2794), AOM_ICDF(3234), AOM_ICDF(6647),
+    AOM_ICDF(7340), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(9101), AOM_ICDF(10823), AOM_ICDF(21291), AOM_ICDF(22109),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8086), AOM_ICDF(13032), AOM_ICDF(21855), AOM_ICDF(22748),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6563), AOM_ICDF(10137), AOM_ICDF(18484), AOM_ICDF(20069),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4987), AOM_ICDF(6567), AOM_ICDF(14425), AOM_ICDF(15700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3399), AOM_ICDF(3947), AOM_ICDF(9950), AOM_ICDF(10738),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1474), AOM_ICDF(1793), AOM_ICDF(4347), AOM_ICDF(4690),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(17035), AOM_ICDF(18650), AOM_ICDF(27401), AOM_ICDF(27793),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13213), AOM_ICDF(16039), AOM_ICDF(26044), AOM_ICDF(26448),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9916), AOM_ICDF(11812), AOM_ICDF(22497), AOM_ICDF(22945),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7227), AOM_ICDF(8059), AOM_ICDF(17399), AOM_ICDF(17817),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5144), AOM_ICDF(5572), AOM_ICDF(12546), AOM_ICDF(12892),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2364), AOM_ICDF(2678), AOM_ICDF(6057), AOM_ICDF(6372),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19805), AOM_ICDF(21667), AOM_ICDF(29302), AOM_ICDF(29680),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14232), AOM_ICDF(16314), AOM_ICDF(27120), AOM_ICDF(27515),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8796), AOM_ICDF(9578), AOM_ICDF(21112), AOM_ICDF(21479),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5203), AOM_ICDF(5552), AOM_ICDF(14231), AOM_ICDF(14563),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2943), AOM_ICDF(3257), AOM_ICDF(8676), AOM_ICDF(8994),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1363), AOM_ICDF(1675), AOM_ICDF(4064), AOM_ICDF(4376),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24214), AOM_ICDF(25083), AOM_ICDF(30916), AOM_ICDF(31249),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15904), AOM_ICDF(17001), AOM_ICDF(28199), AOM_ICDF(28532),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8324), AOM_ICDF(8717), AOM_ICDF(20480), AOM_ICDF(20808),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4752), AOM_ICDF(5070), AOM_ICDF(13245), AOM_ICDF(13565),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2729), AOM_ICDF(3042), AOM_ICDF(8218), AOM_ICDF(8530),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1385), AOM_ICDF(1697), AOM_ICDF(4196), AOM_ICDF(4508),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(26956), AOM_ICDF(27719), AOM_ICDF(31679), AOM_ICDF(32005),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16913), AOM_ICDF(17759), AOM_ICDF(29092), AOM_ICDF(29422),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8166), AOM_ICDF(8510), AOM_ICDF(20577), AOM_ICDF(20901),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4804), AOM_ICDF(5119), AOM_ICDF(13537), AOM_ICDF(13853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2951), AOM_ICDF(3263), AOM_ICDF(8766), AOM_ICDF(9079),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1498), AOM_ICDF(1810), AOM_ICDF(4515), AOM_ICDF(4827),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(7335), AOM_ICDF(13463), AOM_ICDF(14286), AOM_ICDF(24588),
+    AOM_ICDF(29117), AOM_ICDF(32768), },
+    {AOM_ICDF(3212), AOM_ICDF(9869), AOM_ICDF(10336), AOM_ICDF(20172),
+    AOM_ICDF(25029), AOM_ICDF(32768), },
+    {AOM_ICDF(917), AOM_ICDF(6904), AOM_ICDF(7251), AOM_ICDF(15225),
+    AOM_ICDF(18595), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23988), AOM_ICDF(24467), AOM_ICDF(31033), AOM_ICDF(31407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20390), AOM_ICDF(23805), AOM_ICDF(30556), AOM_ICDF(30920),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13566), AOM_ICDF(16666), AOM_ICDF(27478), AOM_ICDF(27995),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10353), AOM_ICDF(12637), AOM_ICDF(23789), AOM_ICDF(24437),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7956), AOM_ICDF(9364), AOM_ICDF(19994), AOM_ICDF(20621),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6036), AOM_ICDF(6495), AOM_ICDF(15543), AOM_ICDF(16033),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(25643), AOM_ICDF(26692), AOM_ICDF(31634), AOM_ICDF(31957),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18721), AOM_ICDF(20381), AOM_ICDF(30130), AOM_ICDF(30466),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10914), AOM_ICDF(12337), AOM_ICDF(24817), AOM_ICDF(25177),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7843), AOM_ICDF(8667), AOM_ICDF(19826), AOM_ICDF(20212),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5080), AOM_ICDF(5484), AOM_ICDF(14225), AOM_ICDF(14587),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2880), AOM_ICDF(3192), AOM_ICDF(7916), AOM_ICDF(8236),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26447), AOM_ICDF(27233), AOM_ICDF(31779), AOM_ICDF(32097),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19042), AOM_ICDF(20153), AOM_ICDF(30217), AOM_ICDF(30540),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9858), AOM_ICDF(10440), AOM_ICDF(23424), AOM_ICDF(23753),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6276), AOM_ICDF(6657), AOM_ICDF(17158), AOM_ICDF(17489),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3725), AOM_ICDF(4039), AOM_ICDF(10981), AOM_ICDF(11303),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2041), AOM_ICDF(2345), AOM_ICDF(6069), AOM_ICDF(6373),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27189), AOM_ICDF(27737), AOM_ICDF(31897), AOM_ICDF(32213),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19763), AOM_ICDF(20443), AOM_ICDF(30288), AOM_ICDF(30607),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9033), AOM_ICDF(9393), AOM_ICDF(22097), AOM_ICDF(22415),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5417), AOM_ICDF(5747), AOM_ICDF(15230), AOM_ICDF(15545),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3397), AOM_ICDF(3709), AOM_ICDF(10342), AOM_ICDF(10655),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2805), AOM_ICDF(3108), AOM_ICDF(6119), AOM_ICDF(6422),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27874), AOM_ICDF(28490), AOM_ICDF(31981), AOM_ICDF(32301),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20112), AOM_ICDF(20724), AOM_ICDF(30607), AOM_ICDF(30935),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9188), AOM_ICDF(9549), AOM_ICDF(22544), AOM_ICDF(22875),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5590), AOM_ICDF(5918), AOM_ICDF(15550), AOM_ICDF(15878),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3567), AOM_ICDF(4015), AOM_ICDF(10658), AOM_ICDF(10988),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1950), AOM_ICDF(2388), AOM_ICDF(6246), AOM_ICDF(6681),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(25724), AOM_ICDF(26337), AOM_ICDF(28579), AOM_ICDF(29957),
+    AOM_ICDF(30927), AOM_ICDF(32768), },
+    {AOM_ICDF(9657), AOM_ICDF(12074), AOM_ICDF(16790), AOM_ICDF(21738),
+    AOM_ICDF(23899), AOM_ICDF(32768), },
+    {AOM_ICDF(4158), AOM_ICDF(7646), AOM_ICDF(10690), AOM_ICDF(16969),
+    AOM_ICDF(18800), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(14330), AOM_ICDF(19826), AOM_ICDF(28364), AOM_ICDF(29154),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13503), AOM_ICDF(21352), AOM_ICDF(28714), AOM_ICDF(29534),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11754), AOM_ICDF(16853), AOM_ICDF(25931), AOM_ICDF(27325),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8311), AOM_ICDF(10581), AOM_ICDF(21258), AOM_ICDF(22633),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5297), AOM_ICDF(5819), AOM_ICDF(14162), AOM_ICDF(14892),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2887), AOM_ICDF(3208), AOM_ICDF(7455), AOM_ICDF(7768),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22005), AOM_ICDF(24480), AOM_ICDF(30925), AOM_ICDF(31309),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17332), AOM_ICDF(20557), AOM_ICDF(29696), AOM_ICDF(30096),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11930), AOM_ICDF(14337), AOM_ICDF(25931), AOM_ICDF(26358),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8888), AOM_ICDF(10020), AOM_ICDF(20964), AOM_ICDF(21352),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5694), AOM_ICDF(6135), AOM_ICDF(14997), AOM_ICDF(15376),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2521), AOM_ICDF(2842), AOM_ICDF(7765), AOM_ICDF(8069),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(23993), AOM_ICDF(25546), AOM_ICDF(31427), AOM_ICDF(31762),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18134), AOM_ICDF(20327), AOM_ICDF(29992), AOM_ICDF(30386),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10997), AOM_ICDF(12057), AOM_ICDF(24719), AOM_ICDF(25141),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5719), AOM_ICDF(6153), AOM_ICDF(16654), AOM_ICDF(17032),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3637), AOM_ICDF(3953), AOM_ICDF(11392), AOM_ICDF(11696),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1837), AOM_ICDF(2127), AOM_ICDF(5703), AOM_ICDF(5993),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26095), AOM_ICDF(26989), AOM_ICDF(31766), AOM_ICDF(32091),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19524), AOM_ICDF(20820), AOM_ICDF(30413), AOM_ICDF(30738),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9962), AOM_ICDF(10551), AOM_ICDF(22667), AOM_ICDF(23010),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5773), AOM_ICDF(6093), AOM_ICDF(15402), AOM_ICDF(15748),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3546), AOM_ICDF(3850), AOM_ICDF(9983), AOM_ICDF(10287),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2387), AOM_ICDF(2668), AOM_ICDF(5711), AOM_ICDF(5992),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29071), AOM_ICDF(29675), AOM_ICDF(31761), AOM_ICDF(32087),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18709), AOM_ICDF(19761), AOM_ICDF(29374), AOM_ICDF(29730),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9336), AOM_ICDF(10048), AOM_ICDF(22625), AOM_ICDF(22988),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6446), AOM_ICDF(6793), AOM_ICDF(16834), AOM_ICDF(17172),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4227), AOM_ICDF(4539), AOM_ICDF(11587), AOM_ICDF(11909),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2624), AOM_ICDF(2929), AOM_ICDF(7139), AOM_ICDF(7444),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(25114), AOM_ICDF(25872), AOM_ICDF(29577), AOM_ICDF(31173),
+    AOM_ICDF(32008), AOM_ICDF(32768), },
+    {AOM_ICDF(11286), AOM_ICDF(14376), AOM_ICDF(22156), AOM_ICDF(26266),
+    AOM_ICDF(29278), AOM_ICDF(32768), },
+    {AOM_ICDF(2680), AOM_ICDF(11055), AOM_ICDF(14683), AOM_ICDF(23068),
+    AOM_ICDF(26651), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(22838), AOM_ICDF(24926), AOM_ICDF(31689), AOM_ICDF(32019),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19245), AOM_ICDF(24299), AOM_ICDF(31481), AOM_ICDF(31852),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15429), AOM_ICDF(21159), AOM_ICDF(30176), AOM_ICDF(30732),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12373), AOM_ICDF(17092), AOM_ICDF(26912), AOM_ICDF(27758),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10899), AOM_ICDF(13395), AOM_ICDF(23604), AOM_ICDF(24329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12767), AOM_ICDF(13096), AOM_ICDF(21644), AOM_ICDF(22083),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24527), AOM_ICDF(26101), AOM_ICDF(31912), AOM_ICDF(32226),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20576), AOM_ICDF(22265), AOM_ICDF(31439), AOM_ICDF(31762),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13792), AOM_ICDF(15369), AOM_ICDF(28531), AOM_ICDF(28942),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9392), AOM_ICDF(11153), AOM_ICDF(23790), AOM_ICDF(24274),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5587), AOM_ICDF(6191), AOM_ICDF(19027), AOM_ICDF(19480),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24737), AOM_ICDF(25605), AOM_ICDF(31953), AOM_ICDF(32268),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20933), AOM_ICDF(21817), AOM_ICDF(31546), AOM_ICDF(31861),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13887), AOM_ICDF(14656), AOM_ICDF(28490), AOM_ICDF(28817),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10018), AOM_ICDF(11047), AOM_ICDF(23593), AOM_ICDF(23967),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3855), AOM_ICDF(6746), AOM_ICDF(15420), AOM_ICDF(18312),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25861), AOM_ICDF(26475), AOM_ICDF(32028), AOM_ICDF(32343),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22221), AOM_ICDF(22755), AOM_ICDF(31735), AOM_ICDF(32050),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15517), AOM_ICDF(15928), AOM_ICDF(29558), AOM_ICDF(29870),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7719), AOM_ICDF(8507), AOM_ICDF(20165), AOM_ICDF(20638),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(28675), AOM_ICDF(29326), AOM_ICDF(31767), AOM_ICDF(32092),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21491), AOM_ICDF(22422), AOM_ICDF(29827), AOM_ICDF(30197),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10080), AOM_ICDF(11350), AOM_ICDF(23883), AOM_ICDF(24321),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8383), AOM_ICDF(8793), AOM_ICDF(21382), AOM_ICDF(21739),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6835), AOM_ICDF(7137), AOM_ICDF(20646), AOM_ICDF(20947),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 32X32
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(15501), AOM_ICDF(16574), AOM_ICDF(17941), AOM_ICDF(20080),
+    AOM_ICDF(21984), AOM_ICDF(32768), },
+    {AOM_ICDF(1676), AOM_ICDF(3221), AOM_ICDF(3952), AOM_ICDF(6916),
+    AOM_ICDF(7628), AOM_ICDF(32768), },
+    {AOM_ICDF(468), AOM_ICDF(1825), AOM_ICDF(2211), AOM_ICDF(4504),
+    AOM_ICDF(4877), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(5597), AOM_ICDF(9461), AOM_ICDF(16777), AOM_ICDF(17896),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5231), AOM_ICDF(9185), AOM_ICDF(16569), AOM_ICDF(17688),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4128), AOM_ICDF(6983), AOM_ICDF(13860), AOM_ICDF(15674),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2908), AOM_ICDF(4209), AOM_ICDF(9762), AOM_ICDF(11321),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2269), AOM_ICDF(2797), AOM_ICDF(7063), AOM_ICDF(7999),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1270), AOM_ICDF(1588), AOM_ICDF(3710), AOM_ICDF(4051),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(14862), AOM_ICDF(16903), AOM_ICDF(25712), AOM_ICDF(26189),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12778), AOM_ICDF(15420), AOM_ICDF(25395), AOM_ICDF(25836),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10402), AOM_ICDF(12279), AOM_ICDF(22858), AOM_ICDF(23302),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8026), AOM_ICDF(8897), AOM_ICDF(18866), AOM_ICDF(19290),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6610), AOM_ICDF(7121), AOM_ICDF(15967), AOM_ICDF(16322),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3980), AOM_ICDF(4296), AOM_ICDF(10443), AOM_ICDF(10757),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19177), AOM_ICDF(21516), AOM_ICDF(28474), AOM_ICDF(28892),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14901), AOM_ICDF(17006), AOM_ICDF(27100), AOM_ICDF(27500),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10655), AOM_ICDF(11487), AOM_ICDF(23288), AOM_ICDF(23664),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6980), AOM_ICDF(7408), AOM_ICDF(17955), AOM_ICDF(18288),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3891), AOM_ICDF(4206), AOM_ICDF(11255), AOM_ICDF(11570),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1532), AOM_ICDF(1844), AOM_ICDF(4593), AOM_ICDF(4905),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24338), AOM_ICDF(25864), AOM_ICDF(30962), AOM_ICDF(31346),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16430), AOM_ICDF(18166), AOM_ICDF(28700), AOM_ICDF(29068),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9726), AOM_ICDF(10244), AOM_ICDF(22575), AOM_ICDF(22934),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5539), AOM_ICDF(5868), AOM_ICDF(15030), AOM_ICDF(15363),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3305), AOM_ICDF(3620), AOM_ICDF(9405), AOM_ICDF(9720),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1482), AOM_ICDF(1794), AOM_ICDF(4429), AOM_ICDF(4741),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29843), AOM_ICDF(30312), AOM_ICDF(31922), AOM_ICDF(32242),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17390), AOM_ICDF(18061), AOM_ICDF(28932), AOM_ICDF(29258),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7968), AOM_ICDF(8308), AOM_ICDF(20128), AOM_ICDF(20447),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4523), AOM_ICDF(4838), AOM_ICDF(12959), AOM_ICDF(13274),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2765), AOM_ICDF(3077), AOM_ICDF(8284), AOM_ICDF(8596),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1422), AOM_ICDF(1733), AOM_ICDF(4244), AOM_ICDF(4556),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(19066), AOM_ICDF(20217), AOM_ICDF(21504), AOM_ICDF(24559),
+    AOM_ICDF(26831), AOM_ICDF(32768), },
+    {AOM_ICDF(5708), AOM_ICDF(7393), AOM_ICDF(8108), AOM_ICDF(11986),
+    AOM_ICDF(17424), AOM_ICDF(32768), },
+    {AOM_ICDF(1144), AOM_ICDF(2709), AOM_ICDF(3111), AOM_ICDF(6009),
+    AOM_ICDF(10882), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17586), AOM_ICDF(17895), AOM_ICDF(27561), AOM_ICDF(28179),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16442), AOM_ICDF(19113), AOM_ICDF(27944), AOM_ICDF(28764),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12438), AOM_ICDF(17724), AOM_ICDF(26435), AOM_ICDF(27714),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9439), AOM_ICDF(12708), AOM_ICDF(22594), AOM_ICDF(24060),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7762), AOM_ICDF(9639), AOM_ICDF(19669), AOM_ICDF(20614),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5324), AOM_ICDF(5894), AOM_ICDF(14504), AOM_ICDF(15100),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23414), AOM_ICDF(25239), AOM_ICDF(31300), AOM_ICDF(31670),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18089), AOM_ICDF(22136), AOM_ICDF(30318), AOM_ICDF(30720),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12081), AOM_ICDF(15216), AOM_ICDF(27074), AOM_ICDF(27531),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9327), AOM_ICDF(10783), AOM_ICDF(22927), AOM_ICDF(23384),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6381), AOM_ICDF(6914), AOM_ICDF(17070), AOM_ICDF(17506),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3854), AOM_ICDF(4164), AOM_ICDF(10355), AOM_ICDF(10665),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24366), AOM_ICDF(25993), AOM_ICDF(31678), AOM_ICDF(32001),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18041), AOM_ICDF(21047), AOM_ICDF(30693), AOM_ICDF(31031),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11271), AOM_ICDF(12970), AOM_ICDF(26794), AOM_ICDF(27180),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8173), AOM_ICDF(8758), AOM_ICDF(21941), AOM_ICDF(22340),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5248), AOM_ICDF(5568), AOM_ICDF(15646), AOM_ICDF(15994),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2689), AOM_ICDF(3193), AOM_ICDF(6722), AOM_ICDF(7226),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27565), AOM_ICDF(28694), AOM_ICDF(31993), AOM_ICDF(32314),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20591), AOM_ICDF(22532), AOM_ICDF(31143), AOM_ICDF(31473),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11268), AOM_ICDF(12113), AOM_ICDF(25966), AOM_ICDF(26331),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7268), AOM_ICDF(7674), AOM_ICDF(19409), AOM_ICDF(19747),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4404), AOM_ICDF(4686), AOM_ICDF(13213), AOM_ICDF(13495),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2637), AOM_ICDF(3766), AOM_ICDF(7533), AOM_ICDF(8663),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29847), AOM_ICDF(30306), AOM_ICDF(32081), AOM_ICDF(32397),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22752), AOM_ICDF(23329), AOM_ICDF(31334), AOM_ICDF(31658),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10305), AOM_ICDF(10672), AOM_ICDF(24328), AOM_ICDF(24657),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5712), AOM_ICDF(6031), AOM_ICDF(16694), AOM_ICDF(17018),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3979), AOM_ICDF(4278), AOM_ICDF(10985), AOM_ICDF(11284),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2465), AOM_ICDF(2900), AOM_ICDF(6815), AOM_ICDF(7250),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(21609), AOM_ICDF(22111), AOM_ICDF(24624), AOM_ICDF(26045),
+    AOM_ICDF(27916), AOM_ICDF(32768), },
+    {AOM_ICDF(5498), AOM_ICDF(7300), AOM_ICDF(12100), AOM_ICDF(15851),
+    AOM_ICDF(18370), AOM_ICDF(32768), },
+    {AOM_ICDF(1268), AOM_ICDF(3284), AOM_ICDF(6295), AOM_ICDF(10711),
+    AOM_ICDF(12999), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(9621), AOM_ICDF(16733), AOM_ICDF(26354), AOM_ICDF(27609),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9619), AOM_ICDF(18339), AOM_ICDF(27578), AOM_ICDF(28547),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9575), AOM_ICDF(18177), AOM_ICDF(24044), AOM_ICDF(25625),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5999), AOM_ICDF(11578), AOM_ICDF(20125), AOM_ICDF(22544),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4842), AOM_ICDF(6220), AOM_ICDF(12898), AOM_ICDF(14944),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(948), AOM_ICDF(1247), AOM_ICDF(3292), AOM_ICDF(3791),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21002), AOM_ICDF(25135), AOM_ICDF(31208), AOM_ICDF(31629),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18106), AOM_ICDF(22116), AOM_ICDF(29422), AOM_ICDF(30013),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14580), AOM_ICDF(15855), AOM_ICDF(26171), AOM_ICDF(26535),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9965), AOM_ICDF(10971), AOM_ICDF(23043), AOM_ICDF(23378),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7123), AOM_ICDF(7395), AOM_ICDF(16893), AOM_ICDF(17232),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3187), AOM_ICDF(3432), AOM_ICDF(7600), AOM_ICDF(7845),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26393), AOM_ICDF(27823), AOM_ICDF(31691), AOM_ICDF(32028),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18449), AOM_ICDF(20915), AOM_ICDF(30092), AOM_ICDF(30531),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11710), AOM_ICDF(12263), AOM_ICDF(26838), AOM_ICDF(27139),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7737), AOM_ICDF(8192), AOM_ICDF(21299), AOM_ICDF(21572),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3572), AOM_ICDF(4038), AOM_ICDF(13822), AOM_ICDF(14287),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1689), AOM_ICDF(2703), AOM_ICDF(3716), AOM_ICDF(4729),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28371), AOM_ICDF(29507), AOM_ICDF(31986), AOM_ICDF(32314),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19411), AOM_ICDF(21758), AOM_ICDF(30225), AOM_ICDF(30579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11995), AOM_ICDF(12434), AOM_ICDF(26661), AOM_ICDF(27026),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9175), AOM_ICDF(9721), AOM_ICDF(22173), AOM_ICDF(22501),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9078), AOM_ICDF(9742), AOM_ICDF(13063), AOM_ICDF(13727),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3192), AOM_ICDF(3830), AOM_ICDF(6809), AOM_ICDF(7447),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(31351), AOM_ICDF(31682), AOM_ICDF(32124), AOM_ICDF(32438),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20883), AOM_ICDF(22618), AOM_ICDF(30828), AOM_ICDF(31173),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11388), AOM_ICDF(12381), AOM_ICDF(24266), AOM_ICDF(24700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6987), AOM_ICDF(7380), AOM_ICDF(18401), AOM_ICDF(18795),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2016), AOM_ICDF(2773), AOM_ICDF(7814), AOM_ICDF(8570),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2849), AOM_ICDF(4986), AOM_ICDF(8548), AOM_ICDF(10685),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(19461), AOM_ICDF(21728), AOM_ICDF(26601), AOM_ICDF(29082),
+    AOM_ICDF(30105), AOM_ICDF(32768), },
+    {AOM_ICDF(2845), AOM_ICDF(10798), AOM_ICDF(14316), AOM_ICDF(23118),
+    AOM_ICDF(24609), AOM_ICDF(32768), },
+    {AOM_ICDF(705), AOM_ICDF(10138), AOM_ICDF(12123), AOM_ICDF(21473),
+    AOM_ICDF(23327), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(24780), AOM_ICDF(25836), AOM_ICDF(31623), AOM_ICDF(31938),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22703), AOM_ICDF(24390), AOM_ICDF(31353), AOM_ICDF(31797),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18218), AOM_ICDF(20834), AOM_ICDF(29429), AOM_ICDF(30327),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12517), AOM_ICDF(15626), AOM_ICDF(26000), AOM_ICDF(27281),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9988), AOM_ICDF(12791), AOM_ICDF(24073), AOM_ICDF(25295),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8529), AOM_ICDF(9202), AOM_ICDF(18853), AOM_ICDF(19751),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(26497), AOM_ICDF(27282), AOM_ICDF(32016), AOM_ICDF(32333),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22767), AOM_ICDF(24548), AOM_ICDF(31680), AOM_ICDF(32007),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10455), AOM_ICDF(13458), AOM_ICDF(26448), AOM_ICDF(26995),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3684), AOM_ICDF(4847), AOM_ICDF(20940), AOM_ICDF(21522),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9063), AOM_ICDF(11155), AOM_ICDF(17430), AOM_ICDF(19521),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(16384), AOM_ICDF(21299),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26212), AOM_ICDF(26755), AOM_ICDF(32090), AOM_ICDF(32400),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22239), AOM_ICDF(23123), AOM_ICDF(31406), AOM_ICDF(31725),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7220), AOM_ICDF(7609), AOM_ICDF(22715), AOM_ICDF(22993),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5554), AOM_ICDF(6387), AOM_ICDF(11941), AOM_ICDF(12774),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4915), AOM_ICDF(9830), AOM_ICDF(19661), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28796), AOM_ICDF(29237), AOM_ICDF(32134), AOM_ICDF(32446),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(25912), AOM_ICDF(26456), AOM_ICDF(32010), AOM_ICDF(32321),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14399), AOM_ICDF(14668), AOM_ICDF(26039), AOM_ICDF(26309),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2341), AOM_ICDF(4096), AOM_ICDF(11703), AOM_ICDF(13458),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30253), AOM_ICDF(30635), AOM_ICDF(32016), AOM_ICDF(32330),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(23066), AOM_ICDF(23485), AOM_ICDF(30571), AOM_ICDF(30897),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11664), AOM_ICDF(12092), AOM_ICDF(22146), AOM_ICDF(22496),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5932), AOM_ICDF(6387), AOM_ICDF(17131), AOM_ICDF(17470),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5501), AOM_ICDF(5846), AOM_ICDF(15763), AOM_ICDF(16097),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4946), AOM_ICDF(6801), AOM_ICDF(14838), AOM_ICDF(16693),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+};
+
+static const coeff_cdf_model
+av1_default_coef_head_cdfs_q1[TX_SIZES][PLANE_TYPES] = {
+    {  // TX 4X4
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(21480), AOM_ICDF(22344), AOM_ICDF(27339), AOM_ICDF(29181),
+    AOM_ICDF(29765), AOM_ICDF(32768), },
+    {AOM_ICDF(9705), AOM_ICDF(12374), AOM_ICDF(20269), AOM_ICDF(24109),
+    AOM_ICDF(25071), AOM_ICDF(32768), },
+    {AOM_ICDF(2883), AOM_ICDF(6716), AOM_ICDF(10461), AOM_ICDF(16169),
+    AOM_ICDF(17355), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(8632), AOM_ICDF(15472), AOM_ICDF(26027), AOM_ICDF(26596),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8543), AOM_ICDF(14383), AOM_ICDF(25665), AOM_ICDF(26207),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8561), AOM_ICDF(12583), AOM_ICDF(22962), AOM_ICDF(23529),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6538), AOM_ICDF(8023), AOM_ICDF(18106), AOM_ICDF(18672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4363), AOM_ICDF(4797), AOM_ICDF(12512), AOM_ICDF(12937),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2471), AOM_ICDF(2791), AOM_ICDF(7274), AOM_ICDF(7605),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(14783), AOM_ICDF(18891), AOM_ICDF(29122), AOM_ICDF(29700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11829), AOM_ICDF(16696), AOM_ICDF(28114), AOM_ICDF(28591),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8965), AOM_ICDF(11076), AOM_ICDF(23514), AOM_ICDF(24031),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6257), AOM_ICDF(7011), AOM_ICDF(17779), AOM_ICDF(18315),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4329), AOM_ICDF(4704), AOM_ICDF(12448), AOM_ICDF(12839),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2542), AOM_ICDF(2860), AOM_ICDF(7886), AOM_ICDF(8207),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19181), AOM_ICDF(22038), AOM_ICDF(30697), AOM_ICDF(31106),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12174), AOM_ICDF(17208), AOM_ICDF(28897), AOM_ICDF(29328),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8420), AOM_ICDF(10706), AOM_ICDF(23788), AOM_ICDF(24321),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6153), AOM_ICDF(6850), AOM_ICDF(17983), AOM_ICDF(18530),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4168), AOM_ICDF(4524), AOM_ICDF(12547), AOM_ICDF(12983),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3136), AOM_ICDF(3480), AOM_ICDF(9221), AOM_ICDF(9659),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(18701), AOM_ICDF(23907), AOM_ICDF(31282), AOM_ICDF(31695),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12655), AOM_ICDF(19258), AOM_ICDF(29824), AOM_ICDF(30279),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8699), AOM_ICDF(11467), AOM_ICDF(24763), AOM_ICDF(25450),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6268), AOM_ICDF(7027), AOM_ICDF(18397), AOM_ICDF(19102),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5613), AOM_ICDF(6020), AOM_ICDF(14084), AOM_ICDF(14637),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2443), AOM_ICDF(2919), AOM_ICDF(8222), AOM_ICDF(8639),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(6156), AOM_ICDF(23586), AOM_ICDF(30739), AOM_ICDF(31476),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6056), AOM_ICDF(21852), AOM_ICDF(29323), AOM_ICDF(30442),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6113), AOM_ICDF(14408), AOM_ICDF(24331), AOM_ICDF(26899),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5825), AOM_ICDF(9328), AOM_ICDF(18946), AOM_ICDF(22143),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5023), AOM_ICDF(6340), AOM_ICDF(14812), AOM_ICDF(17429),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5140), AOM_ICDF(6104), AOM_ICDF(11565), AOM_ICDF(14135),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(12606), AOM_ICDF(20577), AOM_ICDF(21354), AOM_ICDF(29249),
+    AOM_ICDF(29714), AOM_ICDF(32768), },
+    {AOM_ICDF(8630), AOM_ICDF(17728), AOM_ICDF(19353), AOM_ICDF(27722),
+    AOM_ICDF(28219), AOM_ICDF(32768), },
+    {AOM_ICDF(3040), AOM_ICDF(12616), AOM_ICDF(14286), AOM_ICDF(23918),
+    AOM_ICDF(24539), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(20824), AOM_ICDF(21610), AOM_ICDF(31110), AOM_ICDF(31445),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15597), AOM_ICDF(17692), AOM_ICDF(29670), AOM_ICDF(30015),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8954), AOM_ICDF(10007), AOM_ICDF(23515), AOM_ICDF(23902),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6693), AOM_ICDF(7282), AOM_ICDF(18144), AOM_ICDF(18537),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4048), AOM_ICDF(4451), AOM_ICDF(12255), AOM_ICDF(12626),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2619), AOM_ICDF(2960), AOM_ICDF(7084), AOM_ICDF(7429),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21628), AOM_ICDF(22786), AOM_ICDF(31520), AOM_ICDF(31865),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15854), AOM_ICDF(17925), AOM_ICDF(29872), AOM_ICDF(30228),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8120), AOM_ICDF(8815), AOM_ICDF(22575), AOM_ICDF(22964),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5006), AOM_ICDF(5427), AOM_ICDF(15724), AOM_ICDF(16101),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2967), AOM_ICDF(3311), AOM_ICDF(9553), AOM_ICDF(9913),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2878), AOM_ICDF(3188), AOM_ICDF(5418), AOM_ICDF(5825),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(21594), AOM_ICDF(23721), AOM_ICDF(31496), AOM_ICDF(31872),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15704), AOM_ICDF(18452), AOM_ICDF(30207), AOM_ICDF(30585),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8637), AOM_ICDF(9546), AOM_ICDF(23803), AOM_ICDF(24254),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5991), AOM_ICDF(6479), AOM_ICDF(17619), AOM_ICDF(18099),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3856), AOM_ICDF(4220), AOM_ICDF(11623), AOM_ICDF(12111),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3501), AOM_ICDF(3825), AOM_ICDF(6760), AOM_ICDF(7246),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(19929), AOM_ICDF(23849), AOM_ICDF(31581), AOM_ICDF(31956),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14239), AOM_ICDF(19461), AOM_ICDF(30323), AOM_ICDF(30761),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8094), AOM_ICDF(9844), AOM_ICDF(23595), AOM_ICDF(24338),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5204), AOM_ICDF(5848), AOM_ICDF(16396), AOM_ICDF(17121),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3568), AOM_ICDF(3961), AOM_ICDF(10658), AOM_ICDF(11301),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1594), AOM_ICDF(1913), AOM_ICDF(5552), AOM_ICDF(6040),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(13512), AOM_ICDF(24112), AOM_ICDF(31648), AOM_ICDF(32057),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10595), AOM_ICDF(22378), AOM_ICDF(30592), AOM_ICDF(31236),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7571), AOM_ICDF(13305), AOM_ICDF(24936), AOM_ICDF(26656),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6163), AOM_ICDF(8207), AOM_ICDF(18688), AOM_ICDF(20500),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3185), AOM_ICDF(4449), AOM_ICDF(13298), AOM_ICDF(14707),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1890), AOM_ICDF(2731), AOM_ICDF(7562), AOM_ICDF(8192),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(26689), AOM_ICDF(27259), AOM_ICDF(30590), AOM_ICDF(31538),
+    AOM_ICDF(31930), AOM_ICDF(32768), },
+    {AOM_ICDF(17843), AOM_ICDF(19709), AOM_ICDF(27299), AOM_ICDF(29813),
+    AOM_ICDF(30435), AOM_ICDF(32768), },
+    {AOM_ICDF(9138), AOM_ICDF(13232), AOM_ICDF(20487), AOM_ICDF(25798),
+    AOM_ICDF(26794), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(13264), AOM_ICDF(22970), AOM_ICDF(30914), AOM_ICDF(31354),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11647), AOM_ICDF(20651), AOM_ICDF(30191), AOM_ICDF(30692),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10449), AOM_ICDF(15871), AOM_ICDF(27240), AOM_ICDF(27909),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7759), AOM_ICDF(9400), AOM_ICDF(22161), AOM_ICDF(22812),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4095), AOM_ICDF(4544), AOM_ICDF(13856), AOM_ICDF(14309),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3199), AOM_ICDF(3509), AOM_ICDF(8639), AOM_ICDF(8964),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18180), AOM_ICDF(25717), AOM_ICDF(31446), AOM_ICDF(31899),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14593), AOM_ICDF(22211), AOM_ICDF(30845), AOM_ICDF(31282),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10443), AOM_ICDF(13816), AOM_ICDF(27239), AOM_ICDF(27789),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6760), AOM_ICDF(7698), AOM_ICDF(19648), AOM_ICDF(20234),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3896), AOM_ICDF(4253), AOM_ICDF(12678), AOM_ICDF(13056),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(6722), AOM_ICDF(13443), AOM_ICDF(14704),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22145), AOM_ICDF(27566), AOM_ICDF(31813), AOM_ICDF(32212),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15241), AOM_ICDF(23215), AOM_ICDF(31215), AOM_ICDF(31658),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11148), AOM_ICDF(15527), AOM_ICDF(28336), AOM_ICDF(28891),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8864), AOM_ICDF(10402), AOM_ICDF(24069), AOM_ICDF(24811),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6919), AOM_ICDF(7527), AOM_ICDF(19607), AOM_ICDF(20260),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(18971), AOM_ICDF(25869),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(18795), AOM_ICDF(27901), AOM_ICDF(31907), AOM_ICDF(32272),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13177), AOM_ICDF(24166), AOM_ICDF(31395), AOM_ICDF(31820),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9217), AOM_ICDF(15410), AOM_ICDF(28101), AOM_ICDF(28868),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6328), AOM_ICDF(8749), AOM_ICDF(21695), AOM_ICDF(22954),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15672), AOM_ICDF(17809), AOM_ICDF(22795), AOM_ICDF(24932),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(9431), AOM_ICDF(28094), AOM_ICDF(31965), AOM_ICDF(32338),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8107), AOM_ICDF(26038), AOM_ICDF(31393), AOM_ICDF(32024),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9347), AOM_ICDF(19880), AOM_ICDF(28342), AOM_ICDF(29759),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7092), AOM_ICDF(13694), AOM_ICDF(25432), AOM_ICDF(28366),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7802), AOM_ICDF(12483), AOM_ICDF(21845), AOM_ICDF(26526),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(29212), AOM_ICDF(29998), AOM_ICDF(31256), AOM_ICDF(32035),
+    AOM_ICDF(32360), AOM_ICDF(32768), },
+    {AOM_ICDF(19150), AOM_ICDF(23189), AOM_ICDF(28117), AOM_ICDF(31168),
+    AOM_ICDF(31611), AOM_ICDF(32768), },
+    {AOM_ICDF(9324), AOM_ICDF(18178), AOM_ICDF(23556), AOM_ICDF(29422),
+    AOM_ICDF(30204), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(20406), AOM_ICDF(26462), AOM_ICDF(31971), AOM_ICDF(32298),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15834), AOM_ICDF(22647), AOM_ICDF(31547), AOM_ICDF(31902),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11047), AOM_ICDF(15431), AOM_ICDF(27825), AOM_ICDF(28393),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8665), AOM_ICDF(11083), AOM_ICDF(22493), AOM_ICDF(23423),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6191), AOM_ICDF(7733), AOM_ICDF(16624), AOM_ICDF(17708),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3210), AOM_ICDF(3875), AOM_ICDF(10937), AOM_ICDF(11867),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21520), AOM_ICDF(27152), AOM_ICDF(31994), AOM_ICDF(32324),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17519), AOM_ICDF(23609), AOM_ICDF(31670), AOM_ICDF(32022),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10647), AOM_ICDF(14610), AOM_ICDF(28389), AOM_ICDF(28873),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7660), AOM_ICDF(10704), AOM_ICDF(22849), AOM_ICDF(23680),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5535), AOM_ICDF(6454), AOM_ICDF(17275), AOM_ICDF(17753),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4096), AOM_ICDF(6144), AOM_ICDF(13653), AOM_ICDF(15701),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22487), AOM_ICDF(27996), AOM_ICDF(32020), AOM_ICDF(32381),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17371), AOM_ICDF(24453), AOM_ICDF(31777), AOM_ICDF(32152),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11366), AOM_ICDF(16072), AOM_ICDF(29193), AOM_ICDF(29761),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12545), AOM_ICDF(13869), AOM_ICDF(24642), AOM_ICDF(25603),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4119), AOM_ICDF(5056), AOM_ICDF(16103), AOM_ICDF(17601),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(19350), AOM_ICDF(28517), AOM_ICDF(32050), AOM_ICDF(32401),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14752), AOM_ICDF(25831), AOM_ICDF(31897), AOM_ICDF(32261),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11157), AOM_ICDF(20816), AOM_ICDF(29821), AOM_ICDF(30635),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8157), AOM_ICDF(9691), AOM_ICDF(22868), AOM_ICDF(23705),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(10650), AOM_ICDF(17203), AOM_ICDF(19661),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(15557), AOM_ICDF(29043), AOM_ICDF(32047), AOM_ICDF(32424),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10253), AOM_ICDF(27948), AOM_ICDF(31922), AOM_ICDF(32329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7797), AOM_ICDF(18860), AOM_ICDF(28870), AOM_ICDF(30661),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5617), AOM_ICDF(11235), AOM_ICDF(27151), AOM_ICDF(29959),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 8X8
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(20585), AOM_ICDF(21554), AOM_ICDF(27179), AOM_ICDF(28995),
+    AOM_ICDF(30170), AOM_ICDF(32768), },
+    {AOM_ICDF(6316), AOM_ICDF(8987), AOM_ICDF(15571), AOM_ICDF(19766),
+    AOM_ICDF(21417), AOM_ICDF(32768), },
+    {AOM_ICDF(1426), AOM_ICDF(4693), AOM_ICDF(6721), AOM_ICDF(11940),
+    AOM_ICDF(12874), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(10177), AOM_ICDF(14297), AOM_ICDF(24926), AOM_ICDF(25396),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8812), AOM_ICDF(13381), AOM_ICDF(24128), AOM_ICDF(24649),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8090), AOM_ICDF(11314), AOM_ICDF(21329), AOM_ICDF(21906),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6324), AOM_ICDF(7511), AOM_ICDF(17212), AOM_ICDF(17717),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4272), AOM_ICDF(4718), AOM_ICDF(12016), AOM_ICDF(12415),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2129), AOM_ICDF(2445), AOM_ICDF(6433), AOM_ICDF(6755),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(15709), AOM_ICDF(18339), AOM_ICDF(28174), AOM_ICDF(28566),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12592), AOM_ICDF(15866), AOM_ICDF(27071), AOM_ICDF(27475),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9361), AOM_ICDF(10768), AOM_ICDF(22752), AOM_ICDF(23166),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6525), AOM_ICDF(7048), AOM_ICDF(17478), AOM_ICDF(17863),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4314), AOM_ICDF(4656), AOM_ICDF(12242), AOM_ICDF(12579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2419), AOM_ICDF(2735), AOM_ICDF(7387), AOM_ICDF(7707),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(20453), AOM_ICDF(22253), AOM_ICDF(29963), AOM_ICDF(30329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14090), AOM_ICDF(16483), AOM_ICDF(27992), AOM_ICDF(28355),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8737), AOM_ICDF(9396), AOM_ICDF(22134), AOM_ICDF(22499),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5543), AOM_ICDF(5904), AOM_ICDF(15783), AOM_ICDF(16122),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3358), AOM_ICDF(3677), AOM_ICDF(10362), AOM_ICDF(10680),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1875), AOM_ICDF(2187), AOM_ICDF(5982), AOM_ICDF(6294),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(23693), AOM_ICDF(25306), AOM_ICDF(31174), AOM_ICDF(31516),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14804), AOM_ICDF(16843), AOM_ICDF(28713), AOM_ICDF(29058),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8442), AOM_ICDF(8976), AOM_ICDF(22003), AOM_ICDF(22353),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5397), AOM_ICDF(5741), AOM_ICDF(15529), AOM_ICDF(15867),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3322), AOM_ICDF(3639), AOM_ICDF(10248), AOM_ICDF(10570),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1852), AOM_ICDF(2161), AOM_ICDF(5980), AOM_ICDF(6290),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(24219), AOM_ICDF(26214), AOM_ICDF(31501), AOM_ICDF(31844),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15202), AOM_ICDF(17709), AOM_ICDF(29450), AOM_ICDF(29807),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9044), AOM_ICDF(9603), AOM_ICDF(23134), AOM_ICDF(23506),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5849), AOM_ICDF(6187), AOM_ICDF(16695), AOM_ICDF(17032),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3734), AOM_ICDF(4050), AOM_ICDF(11408), AOM_ICDF(11727),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1898), AOM_ICDF(2201), AOM_ICDF(6126), AOM_ICDF(6430),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(10195), AOM_ICDF(21186), AOM_ICDF(23530), AOM_ICDF(29551),
+    AOM_ICDF(30281), AOM_ICDF(32768), },
+    {AOM_ICDF(3950), AOM_ICDF(15607), AOM_ICDF(18726), AOM_ICDF(26764),
+    AOM_ICDF(27758), AOM_ICDF(32768), },
+    {AOM_ICDF(942), AOM_ICDF(11209), AOM_ICDF(12954), AOM_ICDF(22126),
+    AOM_ICDF(23296), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(24110), AOM_ICDF(24717), AOM_ICDF(31199), AOM_ICDF(31532),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16869), AOM_ICDF(18762), AOM_ICDF(29600), AOM_ICDF(29951),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10702), AOM_ICDF(12122), AOM_ICDF(25122), AOM_ICDF(25503),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8221), AOM_ICDF(9053), AOM_ICDF(20816), AOM_ICDF(21206),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5635), AOM_ICDF(6244), AOM_ICDF(15801), AOM_ICDF(16186),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3776), AOM_ICDF(4210), AOM_ICDF(10380), AOM_ICDF(10766),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24719), AOM_ICDF(25439), AOM_ICDF(31522), AOM_ICDF(31849),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16693), AOM_ICDF(18162), AOM_ICDF(29698), AOM_ICDF(30036),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9340), AOM_ICDF(10024), AOM_ICDF(23513), AOM_ICDF(23867),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6269), AOM_ICDF(6709), AOM_ICDF(17711), AOM_ICDF(18060),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3841), AOM_ICDF(4185), AOM_ICDF(11892), AOM_ICDF(12230),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1944), AOM_ICDF(2259), AOM_ICDF(6437), AOM_ICDF(6776),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25795), AOM_ICDF(26524), AOM_ICDF(31784), AOM_ICDF(32108),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17514), AOM_ICDF(18812), AOM_ICDF(30221), AOM_ICDF(30557),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9099), AOM_ICDF(9576), AOM_ICDF(23502), AOM_ICDF(23843),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5738), AOM_ICDF(6097), AOM_ICDF(16847), AOM_ICDF(17182),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3411), AOM_ICDF(3730), AOM_ICDF(10729), AOM_ICDF(11057),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1282), AOM_ICDF(1591), AOM_ICDF(4705), AOM_ICDF(5013),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26360), AOM_ICDF(27205), AOM_ICDF(31918), AOM_ICDF(32240),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18465), AOM_ICDF(19729), AOM_ICDF(30758), AOM_ICDF(31089),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9488), AOM_ICDF(9915), AOM_ICDF(24339), AOM_ICDF(24678),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5812), AOM_ICDF(6156), AOM_ICDF(17325), AOM_ICDF(17661),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3739), AOM_ICDF(4065), AOM_ICDF(10932), AOM_ICDF(11265),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1391), AOM_ICDF(1700), AOM_ICDF(4764), AOM_ICDF(5073),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27036), AOM_ICDF(28212), AOM_ICDF(31970), AOM_ICDF(32305),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18634), AOM_ICDF(21073), AOM_ICDF(31116), AOM_ICDF(31477),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9822), AOM_ICDF(10441), AOM_ICDF(24990), AOM_ICDF(25437),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6130), AOM_ICDF(6530), AOM_ICDF(17790), AOM_ICDF(18269),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3725), AOM_ICDF(4044), AOM_ICDF(11127), AOM_ICDF(11602),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1298), AOM_ICDF(1573), AOM_ICDF(4642), AOM_ICDF(5075),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(23042), AOM_ICDF(23702), AOM_ICDF(30487), AOM_ICDF(31370),
+    AOM_ICDF(31898), AOM_ICDF(32768), },
+    {AOM_ICDF(15512), AOM_ICDF(17357), AOM_ICDF(27018), AOM_ICDF(29404),
+    AOM_ICDF(30377), AOM_ICDF(32768), },
+    {AOM_ICDF(8935), AOM_ICDF(12713), AOM_ICDF(20545), AOM_ICDF(25580),
+    AOM_ICDF(26931), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(15021), AOM_ICDF(24086), AOM_ICDF(30796), AOM_ICDF(31272),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13040), AOM_ICDF(21866), AOM_ICDF(30054), AOM_ICDF(30686),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10915), AOM_ICDF(16852), AOM_ICDF(27467), AOM_ICDF(28235),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8096), AOM_ICDF(10403), AOM_ICDF(22531), AOM_ICDF(23355),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4485), AOM_ICDF(5020), AOM_ICDF(13360), AOM_ICDF(13816),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1728), AOM_ICDF(2067), AOM_ICDF(5998), AOM_ICDF(6337),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20845), AOM_ICDF(25929), AOM_ICDF(31278), AOM_ICDF(31670),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15553), AOM_ICDF(21602), AOM_ICDF(30338), AOM_ICDF(30745),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10953), AOM_ICDF(13829), AOM_ICDF(26398), AOM_ICDF(26854),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7900), AOM_ICDF(8858), AOM_ICDF(20869), AOM_ICDF(21378),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5225), AOM_ICDF(5579), AOM_ICDF(13764), AOM_ICDF(14087),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1881), AOM_ICDF(2352), AOM_ICDF(6742), AOM_ICDF(7212),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25402), AOM_ICDF(28169), AOM_ICDF(31825), AOM_ICDF(32169),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17086), AOM_ICDF(21375), AOM_ICDF(30582), AOM_ICDF(30951),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11057), AOM_ICDF(12358), AOM_ICDF(25930), AOM_ICDF(26346),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6989), AOM_ICDF(7448), AOM_ICDF(18814), AOM_ICDF(19143),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4476), AOM_ICDF(4752), AOM_ICDF(16025), AOM_ICDF(16301),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2185), AOM_ICDF(4369), AOM_ICDF(12379), AOM_ICDF(14564),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26444), AOM_ICDF(28656), AOM_ICDF(31864), AOM_ICDF(32231),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17642), AOM_ICDF(20848), AOM_ICDF(30615), AOM_ICDF(30967),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10973), AOM_ICDF(11732), AOM_ICDF(25256), AOM_ICDF(25612),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8325), AOM_ICDF(8726), AOM_ICDF(19826), AOM_ICDF(20146),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5294), AOM_ICDF(5568), AOM_ICDF(14056), AOM_ICDF(14330),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(18204), AOM_ICDF(23666),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27760), AOM_ICDF(29748), AOM_ICDF(31934), AOM_ICDF(32299),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17133), AOM_ICDF(21599), AOM_ICDF(30800), AOM_ICDF(31243),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12224), AOM_ICDF(13907), AOM_ICDF(26992), AOM_ICDF(27546),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9221), AOM_ICDF(9617), AOM_ICDF(21845), AOM_ICDF(22162),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5401), AOM_ICDF(6482), AOM_ICDF(18004), AOM_ICDF(19085),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(29286), AOM_ICDF(29932), AOM_ICDF(31576), AOM_ICDF(32075),
+    AOM_ICDF(32408), AOM_ICDF(32768), },
+    {AOM_ICDF(17969), AOM_ICDF(21693), AOM_ICDF(28937), AOM_ICDF(30945),
+    AOM_ICDF(31682), AOM_ICDF(32768), },
+    {AOM_ICDF(6607), AOM_ICDF(16160), AOM_ICDF(23280), AOM_ICDF(27595),
+    AOM_ICDF(30027), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(24724), AOM_ICDF(28333), AOM_ICDF(32022), AOM_ICDF(32346),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18803), AOM_ICDF(24728), AOM_ICDF(31661), AOM_ICDF(32022),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14179), AOM_ICDF(20757), AOM_ICDF(30098), AOM_ICDF(30633),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12564), AOM_ICDF(17179), AOM_ICDF(27133), AOM_ICDF(28080),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10543), AOM_ICDF(13479), AOM_ICDF(23725), AOM_ICDF(25031),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11377), AOM_ICDF(12741), AOM_ICDF(21923), AOM_ICDF(22888),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(26071), AOM_ICDF(28609), AOM_ICDF(32053), AOM_ICDF(32374),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20389), AOM_ICDF(24820), AOM_ICDF(31690), AOM_ICDF(32027),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12977), AOM_ICDF(16892), AOM_ICDF(29053), AOM_ICDF(29445),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8745), AOM_ICDF(12303), AOM_ICDF(24164), AOM_ICDF(25209),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4042), AOM_ICDF(5052), AOM_ICDF(18333), AOM_ICDF(18910),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(9557), AOM_ICDF(13653), AOM_ICDF(17749),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(27936), AOM_ICDF(29582), AOM_ICDF(32107), AOM_ICDF(32422),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22472), AOM_ICDF(25761), AOM_ICDF(31858), AOM_ICDF(32177),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14107), AOM_ICDF(16587), AOM_ICDF(29250), AOM_ICDF(29692),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10726), AOM_ICDF(11739), AOM_ICDF(23985), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5825), AOM_ICDF(8010), AOM_ICDF(18204), AOM_ICDF(20389),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27066), AOM_ICDF(29025), AOM_ICDF(31972), AOM_ICDF(32338),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20639), AOM_ICDF(23330), AOM_ICDF(31616), AOM_ICDF(31985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13468), AOM_ICDF(15091), AOM_ICDF(29902), AOM_ICDF(30243),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14473), AOM_ICDF(15019), AOM_ICDF(24030), AOM_ICDF(24439),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7864), AOM_ICDF(11796), AOM_ICDF(19661), AOM_ICDF(23593),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(28741), AOM_ICDF(30503), AOM_ICDF(32039), AOM_ICDF(32388),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19712), AOM_ICDF(25328), AOM_ICDF(31621), AOM_ICDF(32049),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13461), AOM_ICDF(17167), AOM_ICDF(29712), AOM_ICDF(30308),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10285), AOM_ICDF(11242), AOM_ICDF(27267), AOM_ICDF(28224),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 16X16
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(4353), AOM_ICDF(7056), AOM_ICDF(15884), AOM_ICDF(20594),
+    AOM_ICDF(24026), AOM_ICDF(32768), },
+    {AOM_ICDF(2397), AOM_ICDF(5417), AOM_ICDF(9610), AOM_ICDF(14451),
+    AOM_ICDF(16689), AOM_ICDF(32768), },
+    {AOM_ICDF(841), AOM_ICDF(3543), AOM_ICDF(4598), AOM_ICDF(9149),
+    AOM_ICDF(9950), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(8763), AOM_ICDF(11845), AOM_ICDF(22684), AOM_ICDF(23211),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8074), AOM_ICDF(12129), AOM_ICDF(22232), AOM_ICDF(22924),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7453), AOM_ICDF(10017), AOM_ICDF(19822), AOM_ICDF(20662),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5825), AOM_ICDF(6998), AOM_ICDF(16346), AOM_ICDF(16952),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4059), AOM_ICDF(4481), AOM_ICDF(11444), AOM_ICDF(11852),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1973), AOM_ICDF(2289), AOM_ICDF(5827), AOM_ICDF(6149),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(15272), AOM_ICDF(17017), AOM_ICDF(26959), AOM_ICDF(27346),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12476), AOM_ICDF(14916), AOM_ICDF(26163), AOM_ICDF(26575),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9485), AOM_ICDF(10720), AOM_ICDF(22557), AOM_ICDF(22973),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6821), AOM_ICDF(7342), AOM_ICDF(17484), AOM_ICDF(17858),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4370), AOM_ICDF(4714), AOM_ICDF(12030), AOM_ICDF(12366),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2375), AOM_ICDF(2688), AOM_ICDF(6850), AOM_ICDF(7162),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19929), AOM_ICDF(21244), AOM_ICDF(29489), AOM_ICDF(29829),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14005), AOM_ICDF(16066), AOM_ICDF(27595), AOM_ICDF(27947),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8918), AOM_ICDF(9550), AOM_ICDF(22126), AOM_ICDF(22488),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5741), AOM_ICDF(6095), AOM_ICDF(16004), AOM_ICDF(16340),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3558), AOM_ICDF(3873), AOM_ICDF(10340), AOM_ICDF(10657),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1822), AOM_ICDF(2134), AOM_ICDF(5530), AOM_ICDF(5843),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(23568), AOM_ICDF(24663), AOM_ICDF(30915), AOM_ICDF(31245),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15139), AOM_ICDF(16577), AOM_ICDF(28661), AOM_ICDF(28997),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8850), AOM_ICDF(9259), AOM_ICDF(22366), AOM_ICDF(22700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5454), AOM_ICDF(5781), AOM_ICDF(15617), AOM_ICDF(15937),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3315), AOM_ICDF(3629), AOM_ICDF(10044), AOM_ICDF(10359),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1736), AOM_ICDF(2047), AOM_ICDF(5698), AOM_ICDF(6009),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27011), AOM_ICDF(27875), AOM_ICDF(31721), AOM_ICDF(32046),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16855), AOM_ICDF(18018), AOM_ICDF(29676), AOM_ICDF(30005),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8916), AOM_ICDF(9282), AOM_ICDF(22431), AOM_ICDF(22760),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5391), AOM_ICDF(5710), AOM_ICDF(15343), AOM_ICDF(15662),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3316), AOM_ICDF(3629), AOM_ICDF(10223), AOM_ICDF(10537),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1891), AOM_ICDF(2202), AOM_ICDF(6076), AOM_ICDF(6387),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(5744), AOM_ICDF(15508), AOM_ICDF(23294), AOM_ICDF(28653),
+    AOM_ICDF(30781), AOM_ICDF(32768), },
+    {AOM_ICDF(2130), AOM_ICDF(11786), AOM_ICDF(17337), AOM_ICDF(24444),
+    AOM_ICDF(27499), AOM_ICDF(32768), },
+    {AOM_ICDF(615), AOM_ICDF(8230), AOM_ICDF(10191), AOM_ICDF(18291),
+    AOM_ICDF(21029), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(25149), AOM_ICDF(25880), AOM_ICDF(31110), AOM_ICDF(31453),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17454), AOM_ICDF(20460), AOM_ICDF(29560), AOM_ICDF(29929),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11724), AOM_ICDF(14294), AOM_ICDF(25947), AOM_ICDF(26377),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9198), AOM_ICDF(10981), AOM_ICDF(22357), AOM_ICDF(22857),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7164), AOM_ICDF(8069), AOM_ICDF(18345), AOM_ICDF(18857),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5833), AOM_ICDF(6316), AOM_ICDF(14661), AOM_ICDF(15073),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(26117), AOM_ICDF(26928), AOM_ICDF(31526), AOM_ICDF(31850),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16540), AOM_ICDF(18394), AOM_ICDF(29402), AOM_ICDF(29740),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9908), AOM_ICDF(10886), AOM_ICDF(23865), AOM_ICDF(24223),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6805), AOM_ICDF(7383), AOM_ICDF(18402), AOM_ICDF(18777),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4259), AOM_ICDF(4638), AOM_ICDF(12791), AOM_ICDF(13136),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2274), AOM_ICDF(2584), AOM_ICDF(7391), AOM_ICDF(7713),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(27129), AOM_ICDF(27797), AOM_ICDF(31745), AOM_ICDF(32063),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17255), AOM_ICDF(18663), AOM_ICDF(29815), AOM_ICDF(30145),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9538), AOM_ICDF(10091), AOM_ICDF(23590), AOM_ICDF(23931),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6366), AOM_ICDF(6732), AOM_ICDF(17467), AOM_ICDF(17800),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3701), AOM_ICDF(4018), AOM_ICDF(11326), AOM_ICDF(11652),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1976), AOM_ICDF(2284), AOM_ICDF(6325), AOM_ICDF(6633),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27944), AOM_ICDF(28479), AOM_ICDF(31894), AOM_ICDF(32211),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18032), AOM_ICDF(18997), AOM_ICDF(30130), AOM_ICDF(30452),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9467), AOM_ICDF(9842), AOM_ICDF(23729), AOM_ICDF(24051),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5900), AOM_ICDF(6226), AOM_ICDF(16797), AOM_ICDF(17116),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3282), AOM_ICDF(3595), AOM_ICDF(10418), AOM_ICDF(10730),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2289), AOM_ICDF(2601), AOM_ICDF(6048), AOM_ICDF(6360),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29278), AOM_ICDF(29837), AOM_ICDF(32038), AOM_ICDF(32360),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19805), AOM_ICDF(20846), AOM_ICDF(31007), AOM_ICDF(31343),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9976), AOM_ICDF(10433), AOM_ICDF(24483), AOM_ICDF(24848),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5971), AOM_ICDF(6354), AOM_ICDF(17184), AOM_ICDF(17539),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3497), AOM_ICDF(4693), AOM_ICDF(11940), AOM_ICDF(12291),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1776), AOM_ICDF(2357), AOM_ICDF(6260), AOM_ICDF(6918),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(23166), AOM_ICDF(23821), AOM_ICDF(30269), AOM_ICDF(31075),
+    AOM_ICDF(31847), AOM_ICDF(32768), },
+    {AOM_ICDF(14510), AOM_ICDF(16494), AOM_ICDF(25635), AOM_ICDF(28335),
+    AOM_ICDF(29759), AOM_ICDF(32768), },
+    {AOM_ICDF(7730), AOM_ICDF(12354), AOM_ICDF(18089), AOM_ICDF(24005),
+    AOM_ICDF(25442), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17908), AOM_ICDF(24824), AOM_ICDF(30533), AOM_ICDF(31042),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13950), AOM_ICDF(22899), AOM_ICDF(29969), AOM_ICDF(30646),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11728), AOM_ICDF(17834), AOM_ICDF(27214), AOM_ICDF(28218),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9581), AOM_ICDF(12074), AOM_ICDF(23689), AOM_ICDF(24616),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6193), AOM_ICDF(6855), AOM_ICDF(16430), AOM_ICDF(16955),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3393), AOM_ICDF(3712), AOM_ICDF(8802), AOM_ICDF(9226),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23368), AOM_ICDF(26826), AOM_ICDF(31183), AOM_ICDF(31579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16523), AOM_ICDF(21603), AOM_ICDF(30044), AOM_ICDF(30503),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11171), AOM_ICDF(14152), AOM_ICDF(27009), AOM_ICDF(27644),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8523), AOM_ICDF(9348), AOM_ICDF(21021), AOM_ICDF(21595),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4780), AOM_ICDF(5196), AOM_ICDF(13440), AOM_ICDF(13786),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4328), AOM_ICDF(5255), AOM_ICDF(10820), AOM_ICDF(11747),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(27020), AOM_ICDF(28644), AOM_ICDF(31643), AOM_ICDF(31990),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18016), AOM_ICDF(21678), AOM_ICDF(30346), AOM_ICDF(30712),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10497), AOM_ICDF(11555), AOM_ICDF(24827), AOM_ICDF(25156),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6370), AOM_ICDF(6703), AOM_ICDF(18612), AOM_ICDF(18903),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5355), AOM_ICDF(5738), AOM_ICDF(14790), AOM_ICDF(15173),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3486), AOM_ICDF(5578), AOM_ICDF(11155), AOM_ICDF(13247),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28933), AOM_ICDF(29746), AOM_ICDF(31882), AOM_ICDF(32203),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18171), AOM_ICDF(20286), AOM_ICDF(29713), AOM_ICDF(30052),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9732), AOM_ICDF(10163), AOM_ICDF(23952), AOM_ICDF(24275),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6084), AOM_ICDF(6480), AOM_ICDF(17459), AOM_ICDF(17771),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3250), AOM_ICDF(3656), AOM_ICDF(10291), AOM_ICDF(10697),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4681), AOM_ICDF(8192), AOM_ICDF(15214), AOM_ICDF(18725),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29940), AOM_ICDF(30510), AOM_ICDF(31933), AOM_ICDF(32260),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17688), AOM_ICDF(19258), AOM_ICDF(29757), AOM_ICDF(30125),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9668), AOM_ICDF(10798), AOM_ICDF(24231), AOM_ICDF(24605),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7580), AOM_ICDF(7942), AOM_ICDF(19364), AOM_ICDF(19692),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6043), AOM_ICDF(6446), AOM_ICDF(15578), AOM_ICDF(15981),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(28553), AOM_ICDF(29151), AOM_ICDF(31521), AOM_ICDF(32038),
+    AOM_ICDF(32413), AOM_ICDF(32768), },
+    {AOM_ICDF(15138), AOM_ICDF(19554), AOM_ICDF(27559), AOM_ICDF(29750),
+    AOM_ICDF(31321), AOM_ICDF(32768), },
+    {AOM_ICDF(3406), AOM_ICDF(18680), AOM_ICDF(23310), AOM_ICDF(27259),
+    AOM_ICDF(30430), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(29000), AOM_ICDF(30219), AOM_ICDF(32098), AOM_ICDF(32414),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21324), AOM_ICDF(25278), AOM_ICDF(31789), AOM_ICDF(32126),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14011), AOM_ICDF(21190), AOM_ICDF(30288), AOM_ICDF(30900),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12762), AOM_ICDF(18476), AOM_ICDF(27140), AOM_ICDF(28461),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11498), AOM_ICDF(14867), AOM_ICDF(24806), AOM_ICDF(25613),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15872), AOM_ICDF(16512), AOM_ICDF(24192), AOM_ICDF(25088),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(29308), AOM_ICDF(30286), AOM_ICDF(32095), AOM_ICDF(32410),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21819), AOM_ICDF(24215), AOM_ICDF(31771), AOM_ICDF(32103),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14853), AOM_ICDF(18028), AOM_ICDF(29729), AOM_ICDF(30160),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10598), AOM_ICDF(13400), AOM_ICDF(26555), AOM_ICDF(27043),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10426), AOM_ICDF(12660), AOM_ICDF(21597), AOM_ICDF(23831),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(29866), AOM_ICDF(30588), AOM_ICDF(32131), AOM_ICDF(32445),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(23473), AOM_ICDF(25323), AOM_ICDF(31960), AOM_ICDF(32280),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17529), AOM_ICDF(19173), AOM_ICDF(30278), AOM_ICDF(30577),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9830), AOM_ICDF(11469), AOM_ICDF(23484), AOM_ICDF(25122),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(30405), AOM_ICDF(31032), AOM_ICDF(32139), AOM_ICDF(32451),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(25453), AOM_ICDF(27199), AOM_ICDF(32040), AOM_ICDF(32361),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15663), AOM_ICDF(16432), AOM_ICDF(30654), AOM_ICDF(31038),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6780), AOM_ICDF(10169), AOM_ICDF(18079), AOM_ICDF(21469),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29785), AOM_ICDF(30368), AOM_ICDF(31904), AOM_ICDF(32245),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18173), AOM_ICDF(21111), AOM_ICDF(30105), AOM_ICDF(30575),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8476), AOM_ICDF(13666), AOM_ICDF(28420), AOM_ICDF(28896),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11427), AOM_ICDF(12066), AOM_ICDF(26197), AOM_ICDF(26691),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6827), AOM_ICDF(10923), AOM_ICDF(21845), AOM_ICDF(25941),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 32X32
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(7848), AOM_ICDF(9841), AOM_ICDF(13623), AOM_ICDF(19351),
+    AOM_ICDF(23196), AOM_ICDF(32768), },
+    {AOM_ICDF(3229), AOM_ICDF(5641), AOM_ICDF(7103), AOM_ICDF(13195),
+    AOM_ICDF(15046), AOM_ICDF(32768), },
+    {AOM_ICDF(810), AOM_ICDF(3129), AOM_ICDF(3687), AOM_ICDF(8373),
+    AOM_ICDF(8971), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(8165), AOM_ICDF(12626), AOM_ICDF(22213), AOM_ICDF(23403),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7602), AOM_ICDF(15378), AOM_ICDF(23248), AOM_ICDF(24331),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5607), AOM_ICDF(10197), AOM_ICDF(18657), AOM_ICDF(20616),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4498), AOM_ICDF(6539), AOM_ICDF(14461), AOM_ICDF(16104),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3387), AOM_ICDF(4098), AOM_ICDF(10245), AOM_ICDF(11322),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1793), AOM_ICDF(2111), AOM_ICDF(5262), AOM_ICDF(5646),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(16815), AOM_ICDF(19141), AOM_ICDF(27640), AOM_ICDF(28110),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13156), AOM_ICDF(15592), AOM_ICDF(26089), AOM_ICDF(26592),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9841), AOM_ICDF(11588), AOM_ICDF(22858), AOM_ICDF(23403),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7765), AOM_ICDF(8871), AOM_ICDF(19127), AOM_ICDF(19526),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5550), AOM_ICDF(6013), AOM_ICDF(14338), AOM_ICDF(14677),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2658), AOM_ICDF(2969), AOM_ICDF(7230), AOM_ICDF(7541),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22765), AOM_ICDF(24278), AOM_ICDF(30194), AOM_ICDF(30535),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15310), AOM_ICDF(17292), AOM_ICDF(27870), AOM_ICDF(28248),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10047), AOM_ICDF(10839), AOM_ICDF(23345), AOM_ICDF(23710),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6594), AOM_ICDF(6959), AOM_ICDF(17456), AOM_ICDF(17796),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3784), AOM_ICDF(4109), AOM_ICDF(10984), AOM_ICDF(11297),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1569), AOM_ICDF(1875), AOM_ICDF(4586), AOM_ICDF(4892),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25747), AOM_ICDF(26817), AOM_ICDF(31236), AOM_ICDF(31577),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16018), AOM_ICDF(17720), AOM_ICDF(28833), AOM_ICDF(29219),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9348), AOM_ICDF(10015), AOM_ICDF(22943), AOM_ICDF(23323),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5841), AOM_ICDF(6167), AOM_ICDF(15774), AOM_ICDF(16107),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3385), AOM_ICDF(3703), AOM_ICDF(9664), AOM_ICDF(9975),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1460), AOM_ICDF(1768), AOM_ICDF(4704), AOM_ICDF(5011),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29634), AOM_ICDF(30134), AOM_ICDF(31898), AOM_ICDF(32218),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16976), AOM_ICDF(17856), AOM_ICDF(29258), AOM_ICDF(29584),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8521), AOM_ICDF(8858), AOM_ICDF(21252), AOM_ICDF(21574),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4894), AOM_ICDF(5208), AOM_ICDF(13957), AOM_ICDF(14271),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3140), AOM_ICDF(3452), AOM_ICDF(9099), AOM_ICDF(9411),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1770), AOM_ICDF(2080), AOM_ICDF(5241), AOM_ICDF(5551),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(22253), AOM_ICDF(23279), AOM_ICDF(24319), AOM_ICDF(27691),
+    AOM_ICDF(30884), AOM_ICDF(32768), },
+    {AOM_ICDF(6281), AOM_ICDF(8348), AOM_ICDF(9473), AOM_ICDF(15740),
+    AOM_ICDF(24879), AOM_ICDF(32768), },
+    {AOM_ICDF(1265), AOM_ICDF(3893), AOM_ICDF(4482), AOM_ICDF(9694),
+    AOM_ICDF(18376), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17243), AOM_ICDF(18993), AOM_ICDF(28515), AOM_ICDF(29242),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15645), AOM_ICDF(23632), AOM_ICDF(29905), AOM_ICDF(30416),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11203), AOM_ICDF(18441), AOM_ICDF(27037), AOM_ICDF(27930),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9306), AOM_ICDF(13788), AOM_ICDF(23647), AOM_ICDF(24669),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8076), AOM_ICDF(10237), AOM_ICDF(20500), AOM_ICDF(21437),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7214), AOM_ICDF(8133), AOM_ICDF(17608), AOM_ICDF(18202),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23555), AOM_ICDF(26147), AOM_ICDF(31229), AOM_ICDF(31581),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16046), AOM_ICDF(20455), AOM_ICDF(29711), AOM_ICDF(30107),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10810), AOM_ICDF(14014), AOM_ICDF(25967), AOM_ICDF(26499),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8267), AOM_ICDF(9930), AOM_ICDF(21704), AOM_ICDF(22244),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5637), AOM_ICDF(6282), AOM_ICDF(15954), AOM_ICDF(16508),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4090), AOM_ICDF(4363), AOM_ICDF(11771), AOM_ICDF(12044),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26146), AOM_ICDF(27425), AOM_ICDF(31658), AOM_ICDF(31983),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17486), AOM_ICDF(20295), AOM_ICDF(30279), AOM_ICDF(30621),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10812), AOM_ICDF(12230), AOM_ICDF(26095), AOM_ICDF(26460),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7510), AOM_ICDF(8042), AOM_ICDF(21058), AOM_ICDF(21425),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4566), AOM_ICDF(4916), AOM_ICDF(13594), AOM_ICDF(13891),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1956), AOM_ICDF(2445), AOM_ICDF(5380), AOM_ICDF(5869),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28423), AOM_ICDF(29253), AOM_ICDF(31959), AOM_ICDF(32277),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18711), AOM_ICDF(20638), AOM_ICDF(30445), AOM_ICDF(30777),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10301), AOM_ICDF(10903), AOM_ICDF(24702), AOM_ICDF(25060),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6531), AOM_ICDF(6885), AOM_ICDF(18215), AOM_ICDF(18535),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3965), AOM_ICDF(4265), AOM_ICDF(11701), AOM_ICDF(12023),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3255), AOM_ICDF(3906), AOM_ICDF(8897), AOM_ICDF(9548),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29905), AOM_ICDF(30382), AOM_ICDF(32053), AOM_ICDF(32369),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19724), AOM_ICDF(20376), AOM_ICDF(30778), AOM_ICDF(31101),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10430), AOM_ICDF(10786), AOM_ICDF(24620), AOM_ICDF(24943),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6151), AOM_ICDF(6475), AOM_ICDF(17188), AOM_ICDF(17504),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3728), AOM_ICDF(4034), AOM_ICDF(11352), AOM_ICDF(11658),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1456), AOM_ICDF(1748), AOM_ICDF(5024), AOM_ICDF(5316),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(24883), AOM_ICDF(25616), AOM_ICDF(27995), AOM_ICDF(29251),
+    AOM_ICDF(31055), AOM_ICDF(32768), },
+    {AOM_ICDF(9802), AOM_ICDF(11841), AOM_ICDF(18691), AOM_ICDF(22179),
+    AOM_ICDF(26383), AOM_ICDF(32768), },
+    {AOM_ICDF(4096), AOM_ICDF(7928), AOM_ICDF(14072), AOM_ICDF(21042),
+    AOM_ICDF(23453), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(10363), AOM_ICDF(20924), AOM_ICDF(29116), AOM_ICDF(29906),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10682), AOM_ICDF(22326), AOM_ICDF(29093), AOM_ICDF(29642),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10304), AOM_ICDF(21073), AOM_ICDF(26843), AOM_ICDF(28904),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6138), AOM_ICDF(13221), AOM_ICDF(22475), AOM_ICDF(25119),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3788), AOM_ICDF(4356), AOM_ICDF(10607), AOM_ICDF(12690),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1950), AOM_ICDF(4291), AOM_ICDF(10923), AOM_ICDF(12873),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21958), AOM_ICDF(27093), AOM_ICDF(30741), AOM_ICDF(31349),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18725), AOM_ICDF(23406), AOM_ICDF(30541), AOM_ICDF(31268),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15634), AOM_ICDF(17134), AOM_ICDF(26450), AOM_ICDF(27092),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10012), AOM_ICDF(11287), AOM_ICDF(24758), AOM_ICDF(25304),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6242), AOM_ICDF(7802), AOM_ICDF(19895), AOM_ICDF(21065),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4096), AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(20480),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26587), AOM_ICDF(27934), AOM_ICDF(31817), AOM_ICDF(32094),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20234), AOM_ICDF(22651), AOM_ICDF(30576), AOM_ICDF(30857),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13405), AOM_ICDF(14708), AOM_ICDF(26624), AOM_ICDF(27183),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9132), AOM_ICDF(11281), AOM_ICDF(19876), AOM_ICDF(21487),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(15522), AOM_ICDF(20696),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28277), AOM_ICDF(29312), AOM_ICDF(32101), AOM_ICDF(32400),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18946), AOM_ICDF(23037), AOM_ICDF(31186), AOM_ICDF(31565),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14043), AOM_ICDF(14980), AOM_ICDF(29491), AOM_ICDF(30193),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9638), AOM_ICDF(12529), AOM_ICDF(21203), AOM_ICDF(24094),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(18022), AOM_ICDF(22938),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(31039), AOM_ICDF(31404), AOM_ICDF(32048), AOM_ICDF(32372),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20567), AOM_ICDF(21869), AOM_ICDF(28724), AOM_ICDF(29256),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10000), AOM_ICDF(11250), AOM_ICDF(22768), AOM_ICDF(23393),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6291), AOM_ICDF(7078), AOM_ICDF(20447), AOM_ICDF(21234),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3072), AOM_ICDF(6144), AOM_ICDF(18432), AOM_ICDF(21504),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(23448), AOM_ICDF(25882), AOM_ICDF(29692), AOM_ICDF(31272),
+    AOM_ICDF(32065), AOM_ICDF(32768), },
+    {AOM_ICDF(4276), AOM_ICDF(17832), AOM_ICDF(22156), AOM_ICDF(28463),
+    AOM_ICDF(30374), AOM_ICDF(32768), },
+    {AOM_ICDF(842), AOM_ICDF(20937), AOM_ICDF(22447), AOM_ICDF(28559),
+    AOM_ICDF(30333), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(30469), AOM_ICDF(30991), AOM_ICDF(32114), AOM_ICDF(32435),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(27295), AOM_ICDF(29153), AOM_ICDF(31917), AOM_ICDF(32269),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16309), AOM_ICDF(22060), AOM_ICDF(29937), AOM_ICDF(30686),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11440), AOM_ICDF(16853), AOM_ICDF(26633), AOM_ICDF(27427),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13069), AOM_ICDF(15405), AOM_ICDF(27401), AOM_ICDF(28033),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9084), AOM_ICDF(10058), AOM_ICDF(23197), AOM_ICDF(23684),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(30728), AOM_ICDF(31202), AOM_ICDF(32138), AOM_ICDF(32450),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(23421), AOM_ICDF(26186), AOM_ICDF(31939), AOM_ICDF(32278),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12249), AOM_ICDF(15027), AOM_ICDF(28348), AOM_ICDF(28854),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5667), AOM_ICDF(6899), AOM_ICDF(22174), AOM_ICDF(23652),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(10650), AOM_ICDF(17203), AOM_ICDF(20480),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(30721), AOM_ICDF(31093), AOM_ICDF(32141), AOM_ICDF(32453),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(24052), AOM_ICDF(25175), AOM_ICDF(31923), AOM_ICDF(32231),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8145), AOM_ICDF(9281), AOM_ICDF(27654), AOM_ICDF(28412),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7373), AOM_ICDF(9830), AOM_ICDF(21299), AOM_ICDF(23757),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(31284), AOM_ICDF(31621), AOM_ICDF(32143), AOM_ICDF(32455),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(27783), AOM_ICDF(28563), AOM_ICDF(32045), AOM_ICDF(32361),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10149), AOM_ICDF(12179), AOM_ICDF(28128), AOM_ICDF(28998),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5650), AOM_ICDF(9039), AOM_ICDF(19209), AOM_ICDF(22599),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(31038), AOM_ICDF(31383), AOM_ICDF(32035), AOM_ICDF(32357),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20689), AOM_ICDF(22001), AOM_ICDF(28880), AOM_ICDF(29479),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7827), AOM_ICDF(10613), AOM_ICDF(24141), AOM_ICDF(24735),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8021), AOM_ICDF(8585), AOM_ICDF(22014), AOM_ICDF(22383),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6047), AOM_ICDF(6350), AOM_ICDF(19918), AOM_ICDF(20220),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+};
+
+static const coeff_cdf_model
+av1_default_coef_head_cdfs_q2[TX_SIZES][PLANE_TYPES] = {
+    {  // TX 4X4
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(23035), AOM_ICDF(23799), AOM_ICDF(27745), AOM_ICDF(29607),
+    AOM_ICDF(30130), AOM_ICDF(32768), },
+    {AOM_ICDF(12409), AOM_ICDF(14763), AOM_ICDF(22883), AOM_ICDF(26775),
+    AOM_ICDF(27649), AOM_ICDF(32768), },
+    {AOM_ICDF(5237), AOM_ICDF(9433), AOM_ICDF(15597), AOM_ICDF(21779),
+    AOM_ICDF(23224), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(10424), AOM_ICDF(17678), AOM_ICDF(28850), AOM_ICDF(29349),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10376), AOM_ICDF(16902), AOM_ICDF(28779), AOM_ICDF(29265),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10166), AOM_ICDF(14387), AOM_ICDF(26253), AOM_ICDF(26807),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8474), AOM_ICDF(9927), AOM_ICDF(22092), AOM_ICDF(22697),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6415), AOM_ICDF(6911), AOM_ICDF(17155), AOM_ICDF(17579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4611), AOM_ICDF(4928), AOM_ICDF(12174), AOM_ICDF(12497),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(16984), AOM_ICDF(21802), AOM_ICDF(30901), AOM_ICDF(31373),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14003), AOM_ICDF(19369), AOM_ICDF(30193), AOM_ICDF(30615),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10729), AOM_ICDF(13233), AOM_ICDF(26938), AOM_ICDF(27455),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8604), AOM_ICDF(9526), AOM_ICDF(22436), AOM_ICDF(22989),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6828), AOM_ICDF(7236), AOM_ICDF(18056), AOM_ICDF(18456),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4302), AOM_ICDF(4555), AOM_ICDF(12209), AOM_ICDF(12462),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(20261), AOM_ICDF(24381), AOM_ICDF(31612), AOM_ICDF(31989),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13775), AOM_ICDF(20449), AOM_ICDF(30685), AOM_ICDF(31111),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10459), AOM_ICDF(13768), AOM_ICDF(27504), AOM_ICDF(28114),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7994), AOM_ICDF(8989), AOM_ICDF(22906), AOM_ICDF(23636),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5928), AOM_ICDF(6460), AOM_ICDF(16884), AOM_ICDF(17720),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4520), AOM_ICDF(7910), AOM_ICDF(12429), AOM_ICDF(16949),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(17822), AOM_ICDF(26021), AOM_ICDF(31751), AOM_ICDF(32150),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13484), AOM_ICDF(23372), AOM_ICDF(31305), AOM_ICDF(31747),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11009), AOM_ICDF(15469), AOM_ICDF(28452), AOM_ICDF(29132),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8358), AOM_ICDF(9357), AOM_ICDF(22412), AOM_ICDF(23385),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9392), AOM_ICDF(10018), AOM_ICDF(18158), AOM_ICDF(19202),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(5236), AOM_ICDF(26529), AOM_ICDF(31709), AOM_ICDF(32201),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5710), AOM_ICDF(25925), AOM_ICDF(31254), AOM_ICDF(31967),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7645), AOM_ICDF(19427), AOM_ICDF(28170), AOM_ICDF(29920),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7427), AOM_ICDF(13350), AOM_ICDF(23253), AOM_ICDF(25438),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4681), AOM_ICDF(6687), AOM_ICDF(15381), AOM_ICDF(18725),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(11176), AOM_ICDF(18297), AOM_ICDF(19062), AOM_ICDF(28984),
+    AOM_ICDF(29496), AOM_ICDF(32768), },
+    {AOM_ICDF(9778), AOM_ICDF(17798), AOM_ICDF(19934), AOM_ICDF(28434),
+    AOM_ICDF(28921), AOM_ICDF(32768), },
+    {AOM_ICDF(4806), AOM_ICDF(14260), AOM_ICDF(17259), AOM_ICDF(26368),
+    AOM_ICDF(26942), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(21802), AOM_ICDF(22916), AOM_ICDF(31657), AOM_ICDF(31989),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16874), AOM_ICDF(20345), AOM_ICDF(31048), AOM_ICDF(31389),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10717), AOM_ICDF(12576), AOM_ICDF(26899), AOM_ICDF(27294),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8468), AOM_ICDF(9404), AOM_ICDF(21928), AOM_ICDF(22358),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5992), AOM_ICDF(6521), AOM_ICDF(16309), AOM_ICDF(16729),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5134), AOM_ICDF(5452), AOM_ICDF(11491), AOM_ICDF(11865),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22003), AOM_ICDF(24147), AOM_ICDF(31841), AOM_ICDF(32177),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17179), AOM_ICDF(20593), AOM_ICDF(31041), AOM_ICDF(31394),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9282), AOM_ICDF(10544), AOM_ICDF(25698), AOM_ICDF(26133),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6301), AOM_ICDF(7013), AOM_ICDF(19066), AOM_ICDF(19557),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3845), AOM_ICDF(4316), AOM_ICDF(12209), AOM_ICDF(12812),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4819), AOM_ICDF(6746), AOM_ICDF(11565), AOM_ICDF(13011),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22820), AOM_ICDF(26023), AOM_ICDF(31888), AOM_ICDF(32236),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17130), AOM_ICDF(21510), AOM_ICDF(31268), AOM_ICDF(31632),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10062), AOM_ICDF(11898), AOM_ICDF(26787), AOM_ICDF(27281),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7681), AOM_ICDF(8590), AOM_ICDF(21264), AOM_ICDF(22034),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4413), AOM_ICDF(5143), AOM_ICDF(13605), AOM_ICDF(14712),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(21845),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(20237), AOM_ICDF(25695), AOM_ICDF(31868), AOM_ICDF(32222),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15551), AOM_ICDF(22658), AOM_ICDF(31236), AOM_ICDF(31659),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9584), AOM_ICDF(12389), AOM_ICDF(26347), AOM_ICDF(27242),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6067), AOM_ICDF(7231), AOM_ICDF(19625), AOM_ICDF(20707),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3724), AOM_ICDF(4312), AOM_ICDF(11269), AOM_ICDF(12425),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4096), AOM_ICDF(6554), AOM_ICDF(9830), AOM_ICDF(12288),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(11726), AOM_ICDF(26639), AOM_ICDF(31977), AOM_ICDF(32340),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10754), AOM_ICDF(25823), AOM_ICDF(31568), AOM_ICDF(32060),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8761), AOM_ICDF(16650), AOM_ICDF(27884), AOM_ICDF(29394),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7387), AOM_ICDF(9941), AOM_ICDF(21377), AOM_ICDF(23333),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2374), AOM_ICDF(3799), AOM_ICDF(16147), AOM_ICDF(19471),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(29271), AOM_ICDF(29645), AOM_ICDF(31447), AOM_ICDF(31951),
+    AOM_ICDF(32313), AOM_ICDF(32768), },
+    {AOM_ICDF(22174), AOM_ICDF(23288), AOM_ICDF(29633), AOM_ICDF(31096),
+    AOM_ICDF(31701), AOM_ICDF(32768), },
+    {AOM_ICDF(13601), AOM_ICDF(16603), AOM_ICDF(25296), AOM_ICDF(28966),
+    AOM_ICDF(30043), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(13850), AOM_ICDF(26266), AOM_ICDF(31653), AOM_ICDF(32083),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11979), AOM_ICDF(24610), AOM_ICDF(31369), AOM_ICDF(31810),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11325), AOM_ICDF(18989), AOM_ICDF(29109), AOM_ICDF(29770),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9338), AOM_ICDF(11892), AOM_ICDF(25324), AOM_ICDF(26115),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5725), AOM_ICDF(6243), AOM_ICDF(18483), AOM_ICDF(18919),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(9830), AOM_ICDF(16384), AOM_ICDF(19661),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18097), AOM_ICDF(27765), AOM_ICDF(31891), AOM_ICDF(32286),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14735), AOM_ICDF(24632), AOM_ICDF(31577), AOM_ICDF(31970),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11031), AOM_ICDF(15675), AOM_ICDF(29109), AOM_ICDF(29716),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8859), AOM_ICDF(9891), AOM_ICDF(23909), AOM_ICDF(24940),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7864), AOM_ICDF(11796), AOM_ICDF(20972), AOM_ICDF(24904),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(21057), AOM_ICDF(29116), AOM_ICDF(32033), AOM_ICDF(32367),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15287), AOM_ICDF(25704), AOM_ICDF(31791), AOM_ICDF(32151),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12927), AOM_ICDF(18993), AOM_ICDF(30815), AOM_ICDF(31329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13227), AOM_ICDF(16234), AOM_ICDF(27657), AOM_ICDF(28860),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6899), AOM_ICDF(12072), AOM_ICDF(18971), AOM_ICDF(25869),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(17688), AOM_ICDF(28768), AOM_ICDF(32140), AOM_ICDF(32435),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13473), AOM_ICDF(26360), AOM_ICDF(31944), AOM_ICDF(32307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12653), AOM_ICDF(18817), AOM_ICDF(28875), AOM_ICDF(30497),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(20025), AOM_ICDF(25486),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(6820), AOM_ICDF(28765), AOM_ICDF(31878), AOM_ICDF(32323),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7737), AOM_ICDF(28672), AOM_ICDF(31972), AOM_ICDF(32313),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11796), AOM_ICDF(18350), AOM_ICDF(24904), AOM_ICDF(28836),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(30079), AOM_ICDF(30525), AOM_ICDF(31559), AOM_ICDF(32085),
+    AOM_ICDF(32407), AOM_ICDF(32768), },
+    {AOM_ICDF(22148), AOM_ICDF(24035), AOM_ICDF(29557), AOM_ICDF(31423),
+    AOM_ICDF(31881), AOM_ICDF(32768), },
+    {AOM_ICDF(13266), AOM_ICDF(17717), AOM_ICDF(26069), AOM_ICDF(29825),
+    AOM_ICDF(30780), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(18219), AOM_ICDF(27530), AOM_ICDF(32048), AOM_ICDF(32373),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14664), AOM_ICDF(25532), AOM_ICDF(31886), AOM_ICDF(32244),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11683), AOM_ICDF(19554), AOM_ICDF(30330), AOM_ICDF(30870),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9410), AOM_ICDF(14238), AOM_ICDF(25794), AOM_ICDF(27268),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6629), AOM_ICDF(9580), AOM_ICDF(20186), AOM_ICDF(22187),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2891), AOM_ICDF(4337), AOM_ICDF(11083), AOM_ICDF(13493),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20016), AOM_ICDF(28471), AOM_ICDF(32074), AOM_ICDF(32401),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16915), AOM_ICDF(26047), AOM_ICDF(31965), AOM_ICDF(32300),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10725), AOM_ICDF(18206), AOM_ICDF(30056), AOM_ICDF(30606),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6883), AOM_ICDF(13990), AOM_ICDF(26334), AOM_ICDF(27531),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11529), AOM_ICDF(15170), AOM_ICDF(22452), AOM_ICDF(24879),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(23488), AOM_ICDF(29744), AOM_ICDF(32117), AOM_ICDF(32442),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17520), AOM_ICDF(27259), AOM_ICDF(32056), AOM_ICDF(32389),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13107), AOM_ICDF(20597), AOM_ICDF(31416), AOM_ICDF(32092),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20165), AOM_ICDF(22686), AOM_ICDF(26887), AOM_ICDF(29407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(17711), AOM_ICDF(29963), AOM_ICDF(32137), AOM_ICDF(32452),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14078), AOM_ICDF(28336), AOM_ICDF(32026), AOM_ICDF(32391),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11129), AOM_ICDF(28749), AOM_ICDF(30295), AOM_ICDF(31222),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7447), AOM_ICDF(13405), AOM_ICDF(22342), AOM_ICDF(26810),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(14413), AOM_ICDF(30309), AOM_ICDF(32090), AOM_ICDF(32471),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11814), AOM_ICDF(30354), AOM_ICDF(32251), AOM_ICDF(32509),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(21845), AOM_ICDF(27307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 8X8
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(16945), AOM_ICDF(18241), AOM_ICDF(25718), AOM_ICDF(28152),
+    AOM_ICDF(29383), AOM_ICDF(32768), },
+    {AOM_ICDF(7095), AOM_ICDF(10051), AOM_ICDF(18830), AOM_ICDF(23174),
+    AOM_ICDF(24906), AOM_ICDF(32768), },
+    {AOM_ICDF(2585), AOM_ICDF(6677), AOM_ICDF(10951), AOM_ICDF(17411),
+    AOM_ICDF(18916), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(12894), AOM_ICDF(17897), AOM_ICDF(28218), AOM_ICDF(28651),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11333), AOM_ICDF(16802), AOM_ICDF(27676), AOM_ICDF(28153),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10166), AOM_ICDF(13829), AOM_ICDF(25072), AOM_ICDF(25646),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8356), AOM_ICDF(9772), AOM_ICDF(21358), AOM_ICDF(21912),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5988), AOM_ICDF(6506), AOM_ICDF(16203), AOM_ICDF(16647),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3684), AOM_ICDF(4012), AOM_ICDF(10039), AOM_ICDF(10367),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18192), AOM_ICDF(21044), AOM_ICDF(30229), AOM_ICDF(30597),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14976), AOM_ICDF(18218), AOM_ICDF(29191), AOM_ICDF(29564),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10914), AOM_ICDF(12508), AOM_ICDF(25451), AOM_ICDF(25857),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7970), AOM_ICDF(8605), AOM_ICDF(20619), AOM_ICDF(21011),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5555), AOM_ICDF(5926), AOM_ICDF(15730), AOM_ICDF(16091),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3522), AOM_ICDF(3847), AOM_ICDF(10567), AOM_ICDF(10892),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(21896), AOM_ICDF(23866), AOM_ICDF(31136), AOM_ICDF(31486),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15913), AOM_ICDF(18331), AOM_ICDF(29670), AOM_ICDF(30019),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10158), AOM_ICDF(10878), AOM_ICDF(24664), AOM_ICDF(25024),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6692), AOM_ICDF(7070), AOM_ICDF(18934), AOM_ICDF(19267),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4603), AOM_ICDF(4914), AOM_ICDF(13724), AOM_ICDF(14041),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2378), AOM_ICDF(3171), AOM_ICDF(7663), AOM_ICDF(8456),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24113), AOM_ICDF(25740), AOM_ICDF(31668), AOM_ICDF(32000),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16618), AOM_ICDF(18583), AOM_ICDF(30173), AOM_ICDF(30511),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10122), AOM_ICDF(10666), AOM_ICDF(24877), AOM_ICDF(25222),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6721), AOM_ICDF(7062), AOM_ICDF(19250), AOM_ICDF(19588),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4641), AOM_ICDF(4957), AOM_ICDF(13698), AOM_ICDF(14021),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3324), AOM_ICDF(4749), AOM_ICDF(9498), AOM_ICDF(10923),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(24933), AOM_ICDF(27294), AOM_ICDF(31876), AOM_ICDF(32207),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17505), AOM_ICDF(20214), AOM_ICDF(30842), AOM_ICDF(31189),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10756), AOM_ICDF(11345), AOM_ICDF(25989), AOM_ICDF(26362),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7374), AOM_ICDF(7763), AOM_ICDF(19820), AOM_ICDF(20160),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5003), AOM_ICDF(5328), AOM_ICDF(15420), AOM_ICDF(15723),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4915), AOM_ICDF(9830), AOM_ICDF(18022), AOM_ICDF(22938),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(7874), AOM_ICDF(17174), AOM_ICDF(19119), AOM_ICDF(28514),
+    AOM_ICDF(29361), AOM_ICDF(32768), },
+    {AOM_ICDF(3407), AOM_ICDF(13628), AOM_ICDF(16836), AOM_ICDF(26723),
+    AOM_ICDF(27681), AOM_ICDF(32768), },
+    {AOM_ICDF(1062), AOM_ICDF(11514), AOM_ICDF(14002), AOM_ICDF(24081),
+    AOM_ICDF(25232), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23614), AOM_ICDF(24717), AOM_ICDF(31593), AOM_ICDF(31927),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18177), AOM_ICDF(21581), AOM_ICDF(30890), AOM_ICDF(31234),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12535), AOM_ICDF(14549), AOM_ICDF(27749), AOM_ICDF(28134),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9687), AOM_ICDF(10712), AOM_ICDF(23848), AOM_ICDF(24271),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6461), AOM_ICDF(7119), AOM_ICDF(17940), AOM_ICDF(18368),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3863), AOM_ICDF(4245), AOM_ICDF(10904), AOM_ICDF(11278),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24334), AOM_ICDF(25912), AOM_ICDF(31795), AOM_ICDF(32120),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17964), AOM_ICDF(20229), AOM_ICDF(30726), AOM_ICDF(31064),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10463), AOM_ICDF(11527), AOM_ICDF(25898), AOM_ICDF(26256),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7431), AOM_ICDF(8071), AOM_ICDF(20542), AOM_ICDF(20928),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4561), AOM_ICDF(4995), AOM_ICDF(13977), AOM_ICDF(14347),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2427), AOM_ICDF(2687), AOM_ICDF(8149), AOM_ICDF(8409),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25888), AOM_ICDF(27308), AOM_ICDF(31957), AOM_ICDF(32279),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18868), AOM_ICDF(20992), AOM_ICDF(31092), AOM_ICDF(31424),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10480), AOM_ICDF(11191), AOM_ICDF(25801), AOM_ICDF(26149),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6878), AOM_ICDF(7326), AOM_ICDF(19397), AOM_ICDF(19762),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4235), AOM_ICDF(4601), AOM_ICDF(13182), AOM_ICDF(13587),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3584), AOM_ICDF(5120), AOM_ICDF(11264), AOM_ICDF(13312),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26802), AOM_ICDF(28181), AOM_ICDF(32031), AOM_ICDF(32349),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19661), AOM_ICDF(21746), AOM_ICDF(31360), AOM_ICDF(31688),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10680), AOM_ICDF(11361), AOM_ICDF(26261), AOM_ICDF(26610),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6811), AOM_ICDF(7274), AOM_ICDF(19689), AOM_ICDF(20075),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4881), AOM_ICDF(5230), AOM_ICDF(11882), AOM_ICDF(12324),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4096), AOM_ICDF(6144), AOM_ICDF(9557), AOM_ICDF(11605),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27511), AOM_ICDF(29045), AOM_ICDF(32051), AOM_ICDF(32376),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19712), AOM_ICDF(22596), AOM_ICDF(31464), AOM_ICDF(31813),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11035), AOM_ICDF(11852), AOM_ICDF(26626), AOM_ICDF(27082),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7190), AOM_ICDF(7674), AOM_ICDF(20245), AOM_ICDF(20794),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5114), AOM_ICDF(5407), AOM_ICDF(12895), AOM_ICDF(13443),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(15522), AOM_ICDF(20696),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(26201), AOM_ICDF(26641), AOM_ICDF(31158), AOM_ICDF(31755),
+    AOM_ICDF(32200), AOM_ICDF(32768), },
+    {AOM_ICDF(19651), AOM_ICDF(20883), AOM_ICDF(28935), AOM_ICDF(30581),
+    AOM_ICDF(31426), AOM_ICDF(32768), },
+    {AOM_ICDF(12456), AOM_ICDF(15868), AOM_ICDF(23727), AOM_ICDF(27839),
+    AOM_ICDF(29216), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(16708), AOM_ICDF(25600), AOM_ICDF(31550), AOM_ICDF(31927),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14533), AOM_ICDF(24134), AOM_ICDF(31151), AOM_ICDF(31670),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12771), AOM_ICDF(19041), AOM_ICDF(29256), AOM_ICDF(29926),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9497), AOM_ICDF(12011), AOM_ICDF(24856), AOM_ICDF(25648),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6059), AOM_ICDF(6512), AOM_ICDF(17765), AOM_ICDF(18218),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4498), AOM_ICDF(6425), AOM_ICDF(13493), AOM_ICDF(15420),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(21314), AOM_ICDF(26763), AOM_ICDF(31645), AOM_ICDF(32043),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16898), AOM_ICDF(23241), AOM_ICDF(31276), AOM_ICDF(31667),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12339), AOM_ICDF(16091), AOM_ICDF(28493), AOM_ICDF(28851),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8583), AOM_ICDF(10033), AOM_ICDF(23721), AOM_ICDF(24359),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6801), AOM_ICDF(7728), AOM_ICDF(18857), AOM_ICDF(19784),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25155), AOM_ICDF(28551), AOM_ICDF(31936), AOM_ICDF(32273),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18054), AOM_ICDF(22818), AOM_ICDF(31343), AOM_ICDF(31736),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12381), AOM_ICDF(14088), AOM_ICDF(27865), AOM_ICDF(28300),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7853), AOM_ICDF(8666), AOM_ICDF(21665), AOM_ICDF(22477),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6242), AOM_ICDF(10923), AOM_ICDF(15604), AOM_ICDF(20285),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26649), AOM_ICDF(29334), AOM_ICDF(32001), AOM_ICDF(32345),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18410), AOM_ICDF(22788), AOM_ICDF(31465), AOM_ICDF(31842),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12504), AOM_ICDF(13480), AOM_ICDF(28600), AOM_ICDF(28955),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9175), AOM_ICDF(10486), AOM_ICDF(21845), AOM_ICDF(23156),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27622), AOM_ICDF(30399), AOM_ICDF(32070), AOM_ICDF(32399),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18214), AOM_ICDF(24797), AOM_ICDF(31688), AOM_ICDF(32070),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14564), AOM_ICDF(16894), AOM_ICDF(28981), AOM_ICDF(29564),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7802), AOM_ICDF(12483), AOM_ICDF(17164), AOM_ICDF(21845),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(30040), AOM_ICDF(30464), AOM_ICDF(31682), AOM_ICDF(32091),
+    AOM_ICDF(32421), AOM_ICDF(32768), },
+    {AOM_ICDF(20770), AOM_ICDF(22635), AOM_ICDF(29889), AOM_ICDF(31156),
+    AOM_ICDF(31909), AOM_ICDF(32768), },
+    {AOM_ICDF(9112), AOM_ICDF(13841), AOM_ICDF(23864), AOM_ICDF(27288),
+    AOM_ICDF(30322), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23477), AOM_ICDF(28240), AOM_ICDF(32035), AOM_ICDF(32360),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18183), AOM_ICDF(26268), AOM_ICDF(31861), AOM_ICDF(32205),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14392), AOM_ICDF(23052), AOM_ICDF(30811), AOM_ICDF(31315),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12579), AOM_ICDF(20081), AOM_ICDF(28411), AOM_ICDF(29467),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9882), AOM_ICDF(14796), AOM_ICDF(25492), AOM_ICDF(27040),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11141), AOM_ICDF(13107), AOM_ICDF(21627), AOM_ICDF(23593),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24700), AOM_ICDF(28735), AOM_ICDF(32055), AOM_ICDF(32379),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19703), AOM_ICDF(25203), AOM_ICDF(31809), AOM_ICDF(32142),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12756), AOM_ICDF(18882), AOM_ICDF(30716), AOM_ICDF(31103),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9508), AOM_ICDF(13922), AOM_ICDF(25977), AOM_ICDF(26826),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5243), AOM_ICDF(9175), AOM_ICDF(19661), AOM_ICDF(23593),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26792), AOM_ICDF(29367), AOM_ICDF(32090), AOM_ICDF(32407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21899), AOM_ICDF(25640), AOM_ICDF(31870), AOM_ICDF(32192),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14205), AOM_ICDF(16907), AOM_ICDF(30415), AOM_ICDF(30764),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10570), AOM_ICDF(13741), AOM_ICDF(23255), AOM_ICDF(26426),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27743), AOM_ICDF(29950), AOM_ICDF(32116), AOM_ICDF(32430),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21595), AOM_ICDF(24944), AOM_ICDF(31927), AOM_ICDF(32259),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15227), AOM_ICDF(16673), AOM_ICDF(30744), AOM_ICDF(31130),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13797), AOM_ICDF(16384), AOM_ICDF(25007), AOM_ICDF(27594),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(28888), AOM_ICDF(30883), AOM_ICDF(32127), AOM_ICDF(32447),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20978), AOM_ICDF(26121), AOM_ICDF(32090), AOM_ICDF(32406),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16644), AOM_ICDF(18725), AOM_ICDF(30427), AOM_ICDF(31468),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(11469), AOM_ICDF(22938), AOM_ICDF(27853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 16X16
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(2791), AOM_ICDF(5929), AOM_ICDF(15783), AOM_ICDF(21305),
+    AOM_ICDF(24756), AOM_ICDF(32768), },
+    {AOM_ICDF(2492), AOM_ICDF(5974), AOM_ICDF(11999), AOM_ICDF(17892),
+    AOM_ICDF(20328), AOM_ICDF(32768), },
+    {AOM_ICDF(1232), AOM_ICDF(4784), AOM_ICDF(7266), AOM_ICDF(13409),
+    AOM_ICDF(14638), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(10984), AOM_ICDF(15590), AOM_ICDF(26386), AOM_ICDF(26860),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10300), AOM_ICDF(15555), AOM_ICDF(26075), AOM_ICDF(26661),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9016), AOM_ICDF(12368), AOM_ICDF(23292), AOM_ICDF(24037),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7432), AOM_ICDF(9010), AOM_ICDF(19640), AOM_ICDF(20245),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5340), AOM_ICDF(5830), AOM_ICDF(14605), AOM_ICDF(15017),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3041), AOM_ICDF(3357), AOM_ICDF(8664), AOM_ICDF(8983),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(17487), AOM_ICDF(19944), AOM_ICDF(29422), AOM_ICDF(29785),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14365), AOM_ICDF(17572), AOM_ICDF(28369), AOM_ICDF(28763),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10944), AOM_ICDF(12562), AOM_ICDF(24945), AOM_ICDF(25372),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8061), AOM_ICDF(8670), AOM_ICDF(20179), AOM_ICDF(20570),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5386), AOM_ICDF(5759), AOM_ICDF(14881), AOM_ICDF(15238),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3124), AOM_ICDF(3450), AOM_ICDF(9578), AOM_ICDF(9895),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(21610), AOM_ICDF(23212), AOM_ICDF(30674), AOM_ICDF(31007),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15516), AOM_ICDF(17922), AOM_ICDF(29225), AOM_ICDF(29573),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10431), AOM_ICDF(11308), AOM_ICDF(24594), AOM_ICDF(24955),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6949), AOM_ICDF(7331), AOM_ICDF(18758), AOM_ICDF(19089),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4564), AOM_ICDF(4898), AOM_ICDF(12730), AOM_ICDF(13048),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2435), AOM_ICDF(2739), AOM_ICDF(7406), AOM_ICDF(7710),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24469), AOM_ICDF(25838), AOM_ICDF(31499), AOM_ICDF(31824),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17238), AOM_ICDF(18899), AOM_ICDF(30066), AOM_ICDF(30395),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10423), AOM_ICDF(10890), AOM_ICDF(24655), AOM_ICDF(24992),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6612), AOM_ICDF(6939), AOM_ICDF(18149), AOM_ICDF(18467),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4122), AOM_ICDF(4431), AOM_ICDF(12556), AOM_ICDF(12874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1910), AOM_ICDF(2211), AOM_ICDF(7840), AOM_ICDF(8142),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27205), AOM_ICDF(28145), AOM_ICDF(31900), AOM_ICDF(32218),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18503), AOM_ICDF(19729), AOM_ICDF(30590), AOM_ICDF(30916),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10343), AOM_ICDF(10734), AOM_ICDF(24636), AOM_ICDF(24963),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6629), AOM_ICDF(6955), AOM_ICDF(18492), AOM_ICDF(18810),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4131), AOM_ICDF(4437), AOM_ICDF(13086), AOM_ICDF(13392),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4005), AOM_ICDF(5097), AOM_ICDF(9102), AOM_ICDF(10194),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(1286), AOM_ICDF(10273), AOM_ICDF(21021), AOM_ICDF(28617),
+    AOM_ICDF(29729), AOM_ICDF(32768), },
+    {AOM_ICDF(941), AOM_ICDF(10009), AOM_ICDF(17718), AOM_ICDF(25847),
+    AOM_ICDF(27712), AOM_ICDF(32768), },
+    {AOM_ICDF(508), AOM_ICDF(9488), AOM_ICDF(12907), AOM_ICDF(21634),
+    AOM_ICDF(23969), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23900), AOM_ICDF(25135), AOM_ICDF(31528), AOM_ICDF(31861),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18613), AOM_ICDF(22015), AOM_ICDF(30774), AOM_ICDF(31124),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13064), AOM_ICDF(16135), AOM_ICDF(28060), AOM_ICDF(28484),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10563), AOM_ICDF(12428), AOM_ICDF(24847), AOM_ICDF(25281),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7960), AOM_ICDF(9069), AOM_ICDF(20548), AOM_ICDF(21017),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6944), AOM_ICDF(7491), AOM_ICDF(16595), AOM_ICDF(17007),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24972), AOM_ICDF(26434), AOM_ICDF(31771), AOM_ICDF(32097),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18362), AOM_ICDF(20757), AOM_ICDF(30733), AOM_ICDF(31070),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11226), AOM_ICDF(12487), AOM_ICDF(26292), AOM_ICDF(26651),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7823), AOM_ICDF(8448), AOM_ICDF(20940), AOM_ICDF(21314),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4964), AOM_ICDF(5365), AOM_ICDF(14104), AOM_ICDF(14457),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2435), AOM_ICDF(2712), AOM_ICDF(8247), AOM_ICDF(8524),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26551), AOM_ICDF(27694), AOM_ICDF(31943), AOM_ICDF(32261),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19519), AOM_ICDF(21452), AOM_ICDF(31120), AOM_ICDF(31446),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11272), AOM_ICDF(11965), AOM_ICDF(26389), AOM_ICDF(26736),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7109), AOM_ICDF(7485), AOM_ICDF(19585), AOM_ICDF(19920),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4033), AOM_ICDF(4370), AOM_ICDF(12546), AOM_ICDF(12865),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1570), AOM_ICDF(2158), AOM_ICDF(7456), AOM_ICDF(8045),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27654), AOM_ICDF(28637), AOM_ICDF(32030), AOM_ICDF(32345),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20795), AOM_ICDF(22232), AOM_ICDF(31351), AOM_ICDF(31672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10841), AOM_ICDF(11329), AOM_ICDF(25676), AOM_ICDF(26002),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6589), AOM_ICDF(6943), AOM_ICDF(18084), AOM_ICDF(18412),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3970), AOM_ICDF(4279), AOM_ICDF(12009), AOM_ICDF(12318),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3449), AOM_ICDF(3967), AOM_ICDF(7761), AOM_ICDF(8278),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29545), AOM_ICDF(30314), AOM_ICDF(32084), AOM_ICDF(32404),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21229), AOM_ICDF(22783), AOM_ICDF(31470), AOM_ICDF(31800),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10409), AOM_ICDF(11031), AOM_ICDF(25267), AOM_ICDF(25669),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6456), AOM_ICDF(6909), AOM_ICDF(18270), AOM_ICDF(18674),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4253), AOM_ICDF(5017), AOM_ICDF(13288), AOM_ICDF(13706),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(1627), AOM_ICDF(2324), AOM_ICDF(8831), AOM_ICDF(9528),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(24627), AOM_ICDF(25102), AOM_ICDF(30943), AOM_ICDF(31607),
+    AOM_ICDF(32215), AOM_ICDF(32768), },
+    {AOM_ICDF(17408), AOM_ICDF(18757), AOM_ICDF(28256), AOM_ICDF(30111),
+    AOM_ICDF(31225), AOM_ICDF(32768), },
+    {AOM_ICDF(10984), AOM_ICDF(14293), AOM_ICDF(22894), AOM_ICDF(27503),
+    AOM_ICDF(28853), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(16390), AOM_ICDF(25826), AOM_ICDF(31293), AOM_ICDF(31726),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14074), AOM_ICDF(25147), AOM_ICDF(31045), AOM_ICDF(31638),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13598), AOM_ICDF(20524), AOM_ICDF(28818), AOM_ICDF(29894),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10035), AOM_ICDF(13322), AOM_ICDF(25086), AOM_ICDF(26332),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7156), AOM_ICDF(8035), AOM_ICDF(18456), AOM_ICDF(19334),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(10923), AOM_ICDF(19115), AOM_ICDF(21845),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22787), AOM_ICDF(27489), AOM_ICDF(31676), AOM_ICDF(32026),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17518), AOM_ICDF(23800), AOM_ICDF(31204), AOM_ICDF(31578),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10686), AOM_ICDF(15226), AOM_ICDF(28087), AOM_ICDF(28560),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9612), AOM_ICDF(11942), AOM_ICDF(22574), AOM_ICDF(23010),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6437), AOM_ICDF(8192), AOM_ICDF(18139), AOM_ICDF(19895),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26773), AOM_ICDF(28429), AOM_ICDF(31782), AOM_ICDF(32120),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18449), AOM_ICDF(22329), AOM_ICDF(30991), AOM_ICDF(31329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12861), AOM_ICDF(14182), AOM_ICDF(27130), AOM_ICDF(27395),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4681), AOM_ICDF(6554), AOM_ICDF(22469), AOM_ICDF(23874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8623), AOM_ICDF(13797), AOM_ICDF(22420), AOM_ICDF(27594),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28378), AOM_ICDF(29466), AOM_ICDF(31934), AOM_ICDF(32245),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19880), AOM_ICDF(21733), AOM_ICDF(31206), AOM_ICDF(31550),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12173), AOM_ICDF(13245), AOM_ICDF(27638), AOM_ICDF(27945),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6215), AOM_ICDF(7910), AOM_ICDF(19774), AOM_ICDF(21469),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30333), AOM_ICDF(31015), AOM_ICDF(32078), AOM_ICDF(32401),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19277), AOM_ICDF(21376), AOM_ICDF(31072), AOM_ICDF(31407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12978), AOM_ICDF(13724), AOM_ICDF(28144), AOM_ICDF(28442),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10031), AOM_ICDF(12037), AOM_ICDF(25412), AOM_ICDF(27418),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(29777), AOM_ICDF(30229), AOM_ICDF(31726), AOM_ICDF(32104),
+    AOM_ICDF(32440), AOM_ICDF(32768), },
+    {AOM_ICDF(18551), AOM_ICDF(20755), AOM_ICDF(29778), AOM_ICDF(30685),
+    AOM_ICDF(31935), AOM_ICDF(32768), },
+    {AOM_ICDF(6236), AOM_ICDF(13170), AOM_ICDF(24037), AOM_ICDF(25823),
+    AOM_ICDF(30798), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(28890), AOM_ICDF(30863), AOM_ICDF(32128), AOM_ICDF(32440),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17311), AOM_ICDF(27082), AOM_ICDF(31871), AOM_ICDF(32209),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13447), AOM_ICDF(25217), AOM_ICDF(31158), AOM_ICDF(31793),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11906), AOM_ICDF(20177), AOM_ICDF(29976), AOM_ICDF(30713),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14883), AOM_ICDF(17134), AOM_ICDF(27140), AOM_ICDF(28266),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14959), AOM_ICDF(17096), AOM_ICDF(22795), AOM_ICDF(25645),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(29494), AOM_ICDF(30807), AOM_ICDF(32086), AOM_ICDF(32404),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19860), AOM_ICDF(25179), AOM_ICDF(31857), AOM_ICDF(32190),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13936), AOM_ICDF(19209), AOM_ICDF(30508), AOM_ICDF(31073),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7168), AOM_ICDF(10240), AOM_ICDF(24576), AOM_ICDF(27648),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(19275), AOM_ICDF(25058),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(30496), AOM_ICDF(31243), AOM_ICDF(32121), AOM_ICDF(32433),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21369), AOM_ICDF(24262), AOM_ICDF(31827), AOM_ICDF(32158),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18971), AOM_ICDF(21127), AOM_ICDF(29319), AOM_ICDF(30612),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(30922), AOM_ICDF(31459), AOM_ICDF(32136), AOM_ICDF(32449),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22640), AOM_ICDF(24782), AOM_ICDF(31768), AOM_ICDF(32076),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12955), AOM_ICDF(14860), AOM_ICDF(28958), AOM_ICDF(30101),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(21845), AOM_ICDF(27307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30469), AOM_ICDF(31279), AOM_ICDF(32115), AOM_ICDF(32446),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19748), AOM_ICDF(24367), AOM_ICDF(31900), AOM_ICDF(32257),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12684), AOM_ICDF(16120), AOM_ICDF(30125), AOM_ICDF(30918),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 32X32
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(8402), AOM_ICDF(9860), AOM_ICDF(23425), AOM_ICDF(26798),
+    AOM_ICDF(28753), AOM_ICDF(32768), },
+    {AOM_ICDF(4503), AOM_ICDF(7478), AOM_ICDF(14541), AOM_ICDF(19455),
+    AOM_ICDF(21058), AOM_ICDF(32768), },
+    {AOM_ICDF(1404), AOM_ICDF(4914), AOM_ICDF(7456), AOM_ICDF(13239),
+    AOM_ICDF(14005), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(11786), AOM_ICDF(17804), AOM_ICDF(26686), AOM_ICDF(27285),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10456), AOM_ICDF(16685), AOM_ICDF(26272), AOM_ICDF(27135),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8297), AOM_ICDF(12591), AOM_ICDF(23088), AOM_ICDF(24288),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6320), AOM_ICDF(8297), AOM_ICDF(18902), AOM_ICDF(20112),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4385), AOM_ICDF(4892), AOM_ICDF(12779), AOM_ICDF(13476),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2151), AOM_ICDF(2470), AOM_ICDF(6432), AOM_ICDF(6758),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(17988), AOM_ICDF(21025), AOM_ICDF(29658), AOM_ICDF(30075),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14641), AOM_ICDF(18188), AOM_ICDF(28759), AOM_ICDF(29202),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10951), AOM_ICDF(12924), AOM_ICDF(25087), AOM_ICDF(25515),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(9165), AOM_ICDF(20302), AOM_ICDF(20696),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5213), AOM_ICDF(5567), AOM_ICDF(14740), AOM_ICDF(15114),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2785), AOM_ICDF(3096), AOM_ICDF(8153), AOM_ICDF(8465),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22839), AOM_ICDF(24625), AOM_ICDF(31013), AOM_ICDF(31343),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16111), AOM_ICDF(18689), AOM_ICDF(29552), AOM_ICDF(29896),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10736), AOM_ICDF(11502), AOM_ICDF(24493), AOM_ICDF(24827),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7153), AOM_ICDF(7570), AOM_ICDF(18744), AOM_ICDF(19067),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4285), AOM_ICDF(4591), AOM_ICDF(11651), AOM_ICDF(11957),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2064), AOM_ICDF(2322), AOM_ICDF(6321), AOM_ICDF(6579),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24955), AOM_ICDF(26499), AOM_ICDF(31625), AOM_ICDF(31948),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17242), AOM_ICDF(19354), AOM_ICDF(30096), AOM_ICDF(30432),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10470), AOM_ICDF(11049), AOM_ICDF(24405), AOM_ICDF(24742),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6717), AOM_ICDF(7038), AOM_ICDF(17553), AOM_ICDF(17870),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4030), AOM_ICDF(4342), AOM_ICDF(11280), AOM_ICDF(11592),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2060), AOM_ICDF(2355), AOM_ICDF(6966), AOM_ICDF(7260),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29697), AOM_ICDF(30286), AOM_ICDF(32009), AOM_ICDF(32325),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18629), AOM_ICDF(19720), AOM_ICDF(30251), AOM_ICDF(30574),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9459), AOM_ICDF(9826), AOM_ICDF(22948), AOM_ICDF(23264),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5742), AOM_ICDF(6057), AOM_ICDF(16269), AOM_ICDF(16580),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3696), AOM_ICDF(4006), AOM_ICDF(11276), AOM_ICDF(11586),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2359), AOM_ICDF(2614), AOM_ICDF(5801), AOM_ICDF(6056),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(14224), AOM_ICDF(15827), AOM_ICDF(27984), AOM_ICDF(30263),
+    AOM_ICDF(31458), AOM_ICDF(32768), },
+    {AOM_ICDF(4253), AOM_ICDF(7150), AOM_ICDF(20729), AOM_ICDF(24629),
+    AOM_ICDF(28621), AOM_ICDF(32768), },
+    {AOM_ICDF(1405), AOM_ICDF(5159), AOM_ICDF(12422), AOM_ICDF(17006),
+    AOM_ICDF(24088), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(20029), AOM_ICDF(23525), AOM_ICDF(30941), AOM_ICDF(31369),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15691), AOM_ICDF(22792), AOM_ICDF(30520), AOM_ICDF(30960),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12036), AOM_ICDF(18829), AOM_ICDF(28256), AOM_ICDF(29025),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10881), AOM_ICDF(14586), AOM_ICDF(25416), AOM_ICDF(26318),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11249), AOM_ICDF(13311), AOM_ICDF(23713), AOM_ICDF(24498),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9444), AOM_ICDF(10609), AOM_ICDF(20170), AOM_ICDF(21025),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23805), AOM_ICDF(26370), AOM_ICDF(31579), AOM_ICDF(31927),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16685), AOM_ICDF(21243), AOM_ICDF(30526), AOM_ICDF(30890),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11661), AOM_ICDF(14143), AOM_ICDF(26804), AOM_ICDF(27193),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8321), AOM_ICDF(9593), AOM_ICDF(21814), AOM_ICDF(22228),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6243), AOM_ICDF(6820), AOM_ICDF(16151), AOM_ICDF(16506),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3612), AOM_ICDF(4386), AOM_ICDF(9547), AOM_ICDF(10321),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26022), AOM_ICDF(27534), AOM_ICDF(31845), AOM_ICDF(32167),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18692), AOM_ICDF(21351), AOM_ICDF(30871), AOM_ICDF(31203),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11493), AOM_ICDF(12410), AOM_ICDF(26280), AOM_ICDF(26619),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7099), AOM_ICDF(7581), AOM_ICDF(19315), AOM_ICDF(19619),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3329), AOM_ICDF(3623), AOM_ICDF(10868), AOM_ICDF(11162),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3104), AOM_ICDF(4139), AOM_ICDF(10003), AOM_ICDF(11038),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28126), AOM_ICDF(29216), AOM_ICDF(32027), AOM_ICDF(32345),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19828), AOM_ICDF(22063), AOM_ICDF(31140), AOM_ICDF(31465),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11206), AOM_ICDF(11832), AOM_ICDF(25718), AOM_ICDF(26041),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6496), AOM_ICDF(6825), AOM_ICDF(18069), AOM_ICDF(18408),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4600), AOM_ICDF(4904), AOM_ICDF(12431), AOM_ICDF(12735),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2016), AOM_ICDF(3529), AOM_ICDF(8066), AOM_ICDF(9578),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30246), AOM_ICDF(30814), AOM_ICDF(32096), AOM_ICDF(32411),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21165), AOM_ICDF(22238), AOM_ICDF(31122), AOM_ICDF(31445),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10123), AOM_ICDF(10519), AOM_ICDF(24102), AOM_ICDF(24419),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5968), AOM_ICDF(6277), AOM_ICDF(17606), AOM_ICDF(17924),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4312), AOM_ICDF(4620), AOM_ICDF(12131), AOM_ICDF(12439),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4608), AOM_ICDF(6144), AOM_ICDF(9216), AOM_ICDF(10752),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(22808), AOM_ICDF(23508), AOM_ICDF(29956), AOM_ICDF(30649),
+    AOM_ICDF(31698), AOM_ICDF(32768), },
+    {AOM_ICDF(11001), AOM_ICDF(12792), AOM_ICDF(25018), AOM_ICDF(27680),
+    AOM_ICDF(29623), AOM_ICDF(32768), },
+    {AOM_ICDF(6919), AOM_ICDF(10026), AOM_ICDF(19635), AOM_ICDF(24728),
+    AOM_ICDF(26490), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(12861), AOM_ICDF(25068), AOM_ICDF(30802), AOM_ICDF(31375),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11298), AOM_ICDF(21545), AOM_ICDF(29953), AOM_ICDF(30816),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13053), AOM_ICDF(24270), AOM_ICDF(28485), AOM_ICDF(29845),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7710), AOM_ICDF(15059), AOM_ICDF(26383), AOM_ICDF(28431),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8856), AOM_ICDF(10332), AOM_ICDF(18008), AOM_ICDF(19779),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3855), AOM_ICDF(7710), AOM_ICDF(19275), AOM_ICDF(22167),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(19458), AOM_ICDF(25796), AOM_ICDF(31754), AOM_ICDF(32007),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16458), AOM_ICDF(23827), AOM_ICDF(31294), AOM_ICDF(31638),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16274), AOM_ICDF(18913), AOM_ICDF(28150), AOM_ICDF(29029),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12429), AOM_ICDF(15254), AOM_ICDF(24858), AOM_ICDF(26553),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7399), AOM_ICDF(11627), AOM_ICDF(21141), AOM_ICDF(24312),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(17348), AOM_ICDF(23130),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25493), AOM_ICDF(28975), AOM_ICDF(31960), AOM_ICDF(32271),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16904), AOM_ICDF(21759), AOM_ICDF(31381), AOM_ICDF(31728),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9709), AOM_ICDF(11529), AOM_ICDF(24879), AOM_ICDF(26700),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(13107), AOM_ICDF(22938), AOM_ICDF(27853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(20025), AOM_ICDF(25486),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26127), AOM_ICDF(28926), AOM_ICDF(31725), AOM_ICDF(32274),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17673), AOM_ICDF(25036), AOM_ICDF(31940), AOM_ICDF(32216),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14824), AOM_ICDF(17164), AOM_ICDF(26526), AOM_ICDF(28867),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(16384), AOM_ICDF(21845), AOM_ICDF(27307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30683), AOM_ICDF(31149), AOM_ICDF(32155), AOM_ICDF(32449),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17896), AOM_ICDF(22055), AOM_ICDF(31508), AOM_ICDF(31886),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8548), AOM_ICDF(12822), AOM_ICDF(24220), AOM_ICDF(28494),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(27393), AOM_ICDF(28900), AOM_ICDF(31555), AOM_ICDF(31971),
+    AOM_ICDF(32368), AOM_ICDF(32768), },
+    {AOM_ICDF(8379), AOM_ICDF(19364), AOM_ICDF(27675), AOM_ICDF(28688),
+    AOM_ICDF(31114), AOM_ICDF(32768), },
+    {AOM_ICDF(1955), AOM_ICDF(19256), AOM_ICDF(24580), AOM_ICDF(25370),
+    AOM_ICDF(30257), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(31085), AOM_ICDF(31718), AOM_ICDF(32129), AOM_ICDF(32443),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14336), AOM_ICDF(26852), AOM_ICDF(31370), AOM_ICDF(31760),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11751), AOM_ICDF(23544), AOM_ICDF(28851), AOM_ICDF(29567),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14670), AOM_ICDF(21251), AOM_ICDF(28381), AOM_ICDF(29752),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14832), AOM_ICDF(19316), AOM_ICDF(27134), AOM_ICDF(28974),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13312), AOM_ICDF(15360), AOM_ICDF(25600), AOM_ICDF(27648),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(31302), AOM_ICDF(31746), AOM_ICDF(32144), AOM_ICDF(32455),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18343), AOM_ICDF(26723), AOM_ICDF(32018), AOM_ICDF(32434),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10570), AOM_ICDF(16913), AOM_ICDF(29068), AOM_ICDF(30125),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(13797), AOM_ICDF(24145), AOM_ICDF(26732),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(31420), AOM_ICDF(31795), AOM_ICDF(32144), AOM_ICDF(32455),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21510), AOM_ICDF(28245), AOM_ICDF(32064), AOM_ICDF(32366),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6342), AOM_ICDF(11627), AOM_ICDF(25369), AOM_ICDF(28540),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(31470), AOM_ICDF(31806), AOM_ICDF(32143), AOM_ICDF(32455),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19571), AOM_ICDF(25722), AOM_ICDF(31538), AOM_ICDF(31985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(8738), AOM_ICDF(25122), AOM_ICDF(28399),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(31292), AOM_ICDF(31637), AOM_ICDF(32104), AOM_ICDF(32431),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12774), AOM_ICDF(16652), AOM_ICDF(30002), AOM_ICDF(30986),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4652), AOM_ICDF(11442), AOM_ICDF(30231), AOM_ICDF(30593),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7022), AOM_ICDF(10031), AOM_ICDF(28087), AOM_ICDF(29090),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+};
+
+static const coeff_cdf_model
+av1_default_coef_head_cdfs_q3[TX_SIZES][PLANE_TYPES] = {
+    {  // TX 4X4
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(25117), AOM_ICDF(25655), AOM_ICDF(28371), AOM_ICDF(30246),
+    AOM_ICDF(30939), AOM_ICDF(32768), },
+    {AOM_ICDF(15083), AOM_ICDF(16850), AOM_ICDF(26029), AOM_ICDF(29031),
+    AOM_ICDF(30115), AOM_ICDF(32768), },
+    {AOM_ICDF(8774), AOM_ICDF(12118), AOM_ICDF(22041), AOM_ICDF(26730),
+    AOM_ICDF(28574), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(13690), AOM_ICDF(23135), AOM_ICDF(31469), AOM_ICDF(31868),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13306), AOM_ICDF(22730), AOM_ICDF(31466), AOM_ICDF(31860),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13503), AOM_ICDF(19892), AOM_ICDF(30528), AOM_ICDF(31005),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13150), AOM_ICDF(16108), AOM_ICDF(28345), AOM_ICDF(28869),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12014), AOM_ICDF(12842), AOM_ICDF(25693), AOM_ICDF(26145),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8937), AOM_ICDF(13405), AOM_ICDF(23831), AOM_ICDF(28300),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18707), AOM_ICDF(26260), AOM_ICDF(31853), AOM_ICDF(32238),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15985), AOM_ICDF(24804), AOM_ICDF(31717), AOM_ICDF(32115),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14012), AOM_ICDF(18913), AOM_ICDF(30497), AOM_ICDF(31005),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12300), AOM_ICDF(14741), AOM_ICDF(28386), AOM_ICDF(28958),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12483), AOM_ICDF(15084), AOM_ICDF(24966), AOM_ICDF(26526),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(19934), AOM_ICDF(28117), AOM_ICDF(32022), AOM_ICDF(32378),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14925), AOM_ICDF(26201), AOM_ICDF(31828), AOM_ICDF(32262),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13132), AOM_ICDF(18927), AOM_ICDF(30269), AOM_ICDF(31173),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13926), AOM_ICDF(19251), AOM_ICDF(28262), AOM_ICDF(29901),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(16626), AOM_ICDF(28981), AOM_ICDF(32074), AOM_ICDF(32413),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12895), AOM_ICDF(27583), AOM_ICDF(31974), AOM_ICDF(32332),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14150), AOM_ICDF(22094), AOM_ICDF(31030), AOM_ICDF(31775),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(5279), AOM_ICDF(29309), AOM_ICDF(32149), AOM_ICDF(32477),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5880), AOM_ICDF(29657), AOM_ICDF(32086), AOM_ICDF(32385),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11469), AOM_ICDF(18022), AOM_ICDF(22938), AOM_ICDF(27853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(8302), AOM_ICDF(14024), AOM_ICDF(16072), AOM_ICDF(27926),
+    AOM_ICDF(28871), AOM_ICDF(32768), },
+    {AOM_ICDF(9359), AOM_ICDF(15522), AOM_ICDF(20581), AOM_ICDF(28595),
+    AOM_ICDF(29250), AOM_ICDF(32768), },
+    {AOM_ICDF(5318), AOM_ICDF(12803), AOM_ICDF(19679), AOM_ICDF(27719),
+    AOM_ICDF(28609), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(22745), AOM_ICDF(25806), AOM_ICDF(31997), AOM_ICDF(32327),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18803), AOM_ICDF(25473), AOM_ICDF(31960), AOM_ICDF(32293),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15553), AOM_ICDF(19553), AOM_ICDF(31039), AOM_ICDF(31407),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13037), AOM_ICDF(15169), AOM_ICDF(28589), AOM_ICDF(29060),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10871), AOM_ICDF(11694), AOM_ICDF(24941), AOM_ICDF(25360),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6242), AOM_ICDF(10923), AOM_ICDF(18725), AOM_ICDF(23406),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22276), AOM_ICDF(27316), AOM_ICDF(32078), AOM_ICDF(32402),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19227), AOM_ICDF(25420), AOM_ICDF(31954), AOM_ICDF(32293),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12383), AOM_ICDF(16969), AOM_ICDF(30280), AOM_ICDF(30766),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11282), AOM_ICDF(13725), AOM_ICDF(26516), AOM_ICDF(27379),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5120), AOM_ICDF(9216), AOM_ICDF(15360), AOM_ICDF(20480),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(22814), AOM_ICDF(28656), AOM_ICDF(32097), AOM_ICDF(32425),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19349), AOM_ICDF(26355), AOM_ICDF(32000), AOM_ICDF(32341),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13824), AOM_ICDF(17830), AOM_ICDF(30780), AOM_ICDF(31142),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6746), AOM_ICDF(13493), AOM_ICDF(25058), AOM_ICDF(27949),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(19746), AOM_ICDF(28536), AOM_ICDF(32088), AOM_ICDF(32411),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17457), AOM_ICDF(27155), AOM_ICDF(32024), AOM_ICDF(32376),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10949), AOM_ICDF(16662), AOM_ICDF(29118), AOM_ICDF(30229),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6096), AOM_ICDF(12955), AOM_ICDF(21337), AOM_ICDF(27434),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(10114), AOM_ICDF(29713), AOM_ICDF(32140), AOM_ICDF(32448),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11455), AOM_ICDF(29324), AOM_ICDF(32094), AOM_ICDF(32419),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(14418), AOM_ICDF(23593), AOM_ICDF(27525),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(30309), AOM_ICDF(30623), AOM_ICDF(31738), AOM_ICDF(32084),
+    AOM_ICDF(32428), AOM_ICDF(32768), },
+    {AOM_ICDF(25732), AOM_ICDF(26211), AOM_ICDF(31079), AOM_ICDF(31737),
+    AOM_ICDF(32269), AOM_ICDF(32768), },
+    {AOM_ICDF(19676), AOM_ICDF(21061), AOM_ICDF(29564), AOM_ICDF(31011),
+    AOM_ICDF(31879), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(12328), AOM_ICDF(28270), AOM_ICDF(32125), AOM_ICDF(32447),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11177), AOM_ICDF(28585), AOM_ICDF(32076), AOM_ICDF(32401),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13232), AOM_ICDF(25364), AOM_ICDF(31558), AOM_ICDF(32072),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11997), AOM_ICDF(18443), AOM_ICDF(30261), AOM_ICDF(31873),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7399), AOM_ICDF(11627), AOM_ICDF(24312), AOM_ICDF(27483),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(16893), AOM_ICDF(29817), AOM_ICDF(32005), AOM_ICDF(32463),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14911), AOM_ICDF(27935), AOM_ICDF(32179), AOM_ICDF(32473),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9973), AOM_ICDF(19946), AOM_ICDF(24220), AOM_ICDF(28494),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(18859), AOM_ICDF(29232), AOM_ICDF(31354), AOM_ICDF(32061),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11281), AOM_ICDF(26322), AOM_ICDF(29545), AOM_ICDF(31156),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(8937), AOM_ICDF(19363), AOM_ICDF(23831), AOM_ICDF(28300),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(30586), AOM_ICDF(30911), AOM_ICDF(31771), AOM_ICDF(32121),
+    AOM_ICDF(32443), AOM_ICDF(32768), },
+    {AOM_ICDF(23875), AOM_ICDF(24492), AOM_ICDF(30970), AOM_ICDF(31684),
+    AOM_ICDF(32217), AOM_ICDF(32768), },
+    {AOM_ICDF(15874), AOM_ICDF(17477), AOM_ICDF(29172), AOM_ICDF(30703),
+    AOM_ICDF(32023), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17059), AOM_ICDF(30027), AOM_ICDF(32152), AOM_ICDF(32450),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13931), AOM_ICDF(29387), AOM_ICDF(32103), AOM_ICDF(32414),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12903), AOM_ICDF(25742), AOM_ICDF(31906), AOM_ICDF(32289),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13493), AOM_ICDF(23130), AOM_ICDF(29614), AOM_ICDF(30840),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(14746), AOM_ICDF(26214), AOM_ICDF(28672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18660), AOM_ICDF(30626), AOM_ICDF(32150), AOM_ICDF(32459),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17338), AOM_ICDF(29279), AOM_ICDF(32168), AOM_ICDF(32495),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11916), AOM_ICDF(17873), AOM_ICDF(26810), AOM_ICDF(29789),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(14564), AOM_ICDF(21845), AOM_ICDF(27307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(23269), AOM_ICDF(31374), AOM_ICDF(32245), AOM_ICDF(32507),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15741), AOM_ICDF(27628), AOM_ICDF(30840), AOM_ICDF(31804),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(15464), AOM_ICDF(29454), AOM_ICDF(30559), AOM_ICDF(31663),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6827), AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 8X8
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(18128), AOM_ICDF(19079), AOM_ICDF(27400), AOM_ICDF(29265),
+    AOM_ICDF(30385), AOM_ICDF(32768), },
+    {AOM_ICDF(10290), AOM_ICDF(12446), AOM_ICDF(23496), AOM_ICDF(26905),
+    AOM_ICDF(28729), AOM_ICDF(32768), },
+    {AOM_ICDF(5877), AOM_ICDF(9423), AOM_ICDF(18374), AOM_ICDF(23871),
+    AOM_ICDF(26028), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(16010), AOM_ICDF(22388), AOM_ICDF(30990), AOM_ICDF(31378),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14579), AOM_ICDF(21619), AOM_ICDF(30755), AOM_ICDF(31177),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13859), AOM_ICDF(18660), AOM_ICDF(29381), AOM_ICDF(29904),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12288), AOM_ICDF(14656), AOM_ICDF(27505), AOM_ICDF(28077),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10009), AOM_ICDF(10812), AOM_ICDF(23591), AOM_ICDF(24068),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8663), AOM_ICDF(9981), AOM_ICDF(19962), AOM_ICDF(20904),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20773), AOM_ICDF(24941), AOM_ICDF(31701), AOM_ICDF(32046),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17537), AOM_ICDF(22279), AOM_ICDF(31257), AOM_ICDF(31629),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13337), AOM_ICDF(15972), AOM_ICDF(29181), AOM_ICDF(29575),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11120), AOM_ICDF(12128), AOM_ICDF(26440), AOM_ICDF(26874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10061), AOM_ICDF(10800), AOM_ICDF(23999), AOM_ICDF(24276),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24073), AOM_ICDF(27227), AOM_ICDF(31920), AOM_ICDF(32246),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18916), AOM_ICDF(22611), AOM_ICDF(31508), AOM_ICDF(31853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13371), AOM_ICDF(14495), AOM_ICDF(28662), AOM_ICDF(29093),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9283), AOM_ICDF(9840), AOM_ICDF(24228), AOM_ICDF(24506),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4681), AOM_ICDF(9362), AOM_ICDF(20285), AOM_ICDF(24966),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(25180), AOM_ICDF(28079), AOM_ICDF(32048), AOM_ICDF(32365),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19790), AOM_ICDF(23090), AOM_ICDF(31675), AOM_ICDF(32001),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12634), AOM_ICDF(13382), AOM_ICDF(28384), AOM_ICDF(28718),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11264), AOM_ICDF(12083), AOM_ICDF(28672), AOM_ICDF(29286),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7710), AOM_ICDF(13493), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(26180), AOM_ICDF(29109), AOM_ICDF(32085), AOM_ICDF(32408),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19990), AOM_ICDF(23991), AOM_ICDF(31806), AOM_ICDF(32152),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13735), AOM_ICDF(14612), AOM_ICDF(29022), AOM_ICDF(29326),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(10240), AOM_ICDF(25259), AOM_ICDF(27307),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(5084), AOM_ICDF(13063), AOM_ICDF(15732), AOM_ICDF(27628),
+    AOM_ICDF(28823), AOM_ICDF(32768), },
+    {AOM_ICDF(3233), AOM_ICDF(11850), AOM_ICDF(16878), AOM_ICDF(26809),
+    AOM_ICDF(27973), AOM_ICDF(32768), },
+    {AOM_ICDF(1405), AOM_ICDF(10468), AOM_ICDF(15220), AOM_ICDF(25209),
+    AOM_ICDF(26482), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23854), AOM_ICDF(26692), AOM_ICDF(31964), AOM_ICDF(32291),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20514), AOM_ICDF(25677), AOM_ICDF(31833), AOM_ICDF(32170),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16504), AOM_ICDF(20235), AOM_ICDF(30877), AOM_ICDF(31237),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13241), AOM_ICDF(15173), AOM_ICDF(28673), AOM_ICDF(29116),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9526), AOM_ICDF(10553), AOM_ICDF(23852), AOM_ICDF(24361),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(6428), AOM_ICDF(17806), AOM_ICDF(18148),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24345), AOM_ICDF(27736), AOM_ICDF(32033), AOM_ICDF(32355),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20277), AOM_ICDF(23726), AOM_ICDF(31700), AOM_ICDF(32031),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13361), AOM_ICDF(15650), AOM_ICDF(29411), AOM_ICDF(29794),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9421), AOM_ICDF(10887), AOM_ICDF(25426), AOM_ICDF(26039),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6242), AOM_ICDF(7607), AOM_ICDF(17749), AOM_ICDF(18530),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26118), AOM_ICDF(28888), AOM_ICDF(32095), AOM_ICDF(32413),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21286), AOM_ICDF(24631), AOM_ICDF(31871), AOM_ICDF(32198),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13285), AOM_ICDF(15402), AOM_ICDF(29317), AOM_ICDF(29737),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9902), AOM_ICDF(10814), AOM_ICDF(24755), AOM_ICDF(25276),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11431), AOM_ICDF(13717), AOM_ICDF(20575), AOM_ICDF(23623),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27178), AOM_ICDF(29612), AOM_ICDF(32119), AOM_ICDF(32433),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22095), AOM_ICDF(25550), AOM_ICDF(31976), AOM_ICDF(32298),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13847), AOM_ICDF(16273), AOM_ICDF(29602), AOM_ICDF(30024),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8771), AOM_ICDF(10923), AOM_ICDF(19694), AOM_ICDF(20521),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11398), AOM_ICDF(15672), AOM_ICDF(21370), AOM_ICDF(25645),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(28257), AOM_ICDF(30327), AOM_ICDF(32126), AOM_ICDF(32441),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22325), AOM_ICDF(26453), AOM_ICDF(32054), AOM_ICDF(32380),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14860), AOM_ICDF(17652), AOM_ICDF(30682), AOM_ICDF(31035),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5097), AOM_ICDF(10194), AOM_ICDF(18933), AOM_ICDF(21117),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(28902), AOM_ICDF(29234), AOM_ICDF(31608), AOM_ICDF(31973),
+    AOM_ICDF(32378), AOM_ICDF(32768), },
+    {AOM_ICDF(22721), AOM_ICDF(23397), AOM_ICDF(30476), AOM_ICDF(31293),
+    AOM_ICDF(32179), AOM_ICDF(32768), },
+    {AOM_ICDF(16404), AOM_ICDF(18013), AOM_ICDF(27505), AOM_ICDF(29454),
+    AOM_ICDF(31300), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(14290), AOM_ICDF(27662), AOM_ICDF(31923), AOM_ICDF(32327),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13282), AOM_ICDF(26727), AOM_ICDF(31749), AOM_ICDF(32113),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12514), AOM_ICDF(22487), AOM_ICDF(30689), AOM_ICDF(31459),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11657), AOM_ICDF(16967), AOM_ICDF(29660), AOM_ICDF(30437),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8937), AOM_ICDF(12660), AOM_ICDF(24576), AOM_ICDF(26810),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20145), AOM_ICDF(28026), AOM_ICDF(31820), AOM_ICDF(32212),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16906), AOM_ICDF(25677), AOM_ICDF(31760), AOM_ICDF(32059),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12332), AOM_ICDF(18322), AOM_ICDF(29597), AOM_ICDF(31006),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(13107), AOM_ICDF(21299), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(23492), AOM_ICDF(29214), AOM_ICDF(32166), AOM_ICDF(32467),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18757), AOM_ICDF(25536), AOM_ICDF(31789), AOM_ICDF(32165),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12603), AOM_ICDF(16384), AOM_ICDF(25206), AOM_ICDF(28987),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(24518), AOM_ICDF(29453), AOM_ICDF(32074), AOM_ICDF(32382),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19369), AOM_ICDF(26533), AOM_ICDF(31972), AOM_ICDF(32370),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(24576), AOM_ICDF(28789), AOM_ICDF(31364), AOM_ICDF(32066),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20052), AOM_ICDF(24454), AOM_ICDF(29834), AOM_ICDF(31301),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(30358), AOM_ICDF(30700), AOM_ICDF(31747), AOM_ICDF(32103),
+    AOM_ICDF(32430), AOM_ICDF(32768), },
+    {AOM_ICDF(22346), AOM_ICDF(23277), AOM_ICDF(30508), AOM_ICDF(31386),
+    AOM_ICDF(32138), AOM_ICDF(32768), },
+    {AOM_ICDF(11974), AOM_ICDF(14562), AOM_ICDF(27349), AOM_ICDF(28970),
+    AOM_ICDF(31969), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(22910), AOM_ICDF(29539), AOM_ICDF(32102), AOM_ICDF(32412),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18429), AOM_ICDF(28710), AOM_ICDF(32106), AOM_ICDF(32432),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13601), AOM_ICDF(25238), AOM_ICDF(31845), AOM_ICDF(32262),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12472), AOM_ICDF(20976), AOM_ICDF(29026), AOM_ICDF(30500),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8738), AOM_ICDF(11469), AOM_ICDF(24030), AOM_ICDF(26761),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23359), AOM_ICDF(30038), AOM_ICDF(32127), AOM_ICDF(32444),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19590), AOM_ICDF(28108), AOM_ICDF(32056), AOM_ICDF(32382),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15578), AOM_ICDF(22024), AOM_ICDF(29008), AOM_ICDF(30619),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26372), AOM_ICDF(31019), AOM_ICDF(32146), AOM_ICDF(32463),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22190), AOM_ICDF(28573), AOM_ICDF(32160), AOM_ICDF(32464),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(22938), AOM_ICDF(27853),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26672), AOM_ICDF(31311), AOM_ICDF(32156), AOM_ICDF(32462),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20946), AOM_ICDF(27885), AOM_ICDF(31997), AOM_ICDF(32382),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(27342), AOM_ICDF(31385), AOM_ICDF(32130), AOM_ICDF(32449),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8674), AOM_ICDF(22167), AOM_ICDF(26985), AOM_ICDF(29877),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 16X16
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(2479), AOM_ICDF(4993), AOM_ICDF(17332), AOM_ICDF(21885),
+    AOM_ICDF(25826), AOM_ICDF(32768), },
+    {AOM_ICDF(2848), AOM_ICDF(5996), AOM_ICDF(15242), AOM_ICDF(20755),
+    AOM_ICDF(23763), AOM_ICDF(32768), },
+    {AOM_ICDF(2125), AOM_ICDF(6226), AOM_ICDF(11733), AOM_ICDF(18389),
+    AOM_ICDF(20442), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(14539), AOM_ICDF(19828), AOM_ICDF(29467), AOM_ICDF(29934),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12513), AOM_ICDF(19139), AOM_ICDF(29177), AOM_ICDF(29702),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11826), AOM_ICDF(16348), AOM_ICDF(27245), AOM_ICDF(27977),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10123), AOM_ICDF(12262), AOM_ICDF(24690), AOM_ICDF(25359),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7979), AOM_ICDF(8826), AOM_ICDF(20804), AOM_ICDF(21295),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5262), AOM_ICDF(5604), AOM_ICDF(14716), AOM_ICDF(15015),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(20625), AOM_ICDF(24118), AOM_ICDF(31086), AOM_ICDF(31446),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16710), AOM_ICDF(20899), AOM_ICDF(30505), AOM_ICDF(30864),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13161), AOM_ICDF(15579), AOM_ICDF(27988), AOM_ICDF(28449),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10596), AOM_ICDF(11651), AOM_ICDF(24124), AOM_ICDF(24589),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7724), AOM_ICDF(8452), AOM_ICDF(21060), AOM_ICDF(21476),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(9466), AOM_ICDF(18933), AOM_ICDF(21117),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24265), AOM_ICDF(26472), AOM_ICDF(31667), AOM_ICDF(31998),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18213), AOM_ICDF(21117), AOM_ICDF(30932), AOM_ICDF(31280),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12944), AOM_ICDF(14000), AOM_ICDF(27696), AOM_ICDF(28050),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9709), AOM_ICDF(10056), AOM_ICDF(23282), AOM_ICDF(23579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8590), AOM_ICDF(9862), AOM_ICDF(18770), AOM_ICDF(19724),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26658), AOM_ICDF(28275), AOM_ICDF(31975), AOM_ICDF(32294),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20049), AOM_ICDF(22203), AOM_ICDF(31374), AOM_ICDF(31708),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12795), AOM_ICDF(13387), AOM_ICDF(28328), AOM_ICDF(28653),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8607), AOM_ICDF(9073), AOM_ICDF(23383), AOM_ICDF(23695),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(9947), AOM_ICDF(18725), AOM_ICDF(20480),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(28651), AOM_ICDF(29902), AOM_ICDF(32085), AOM_ICDF(32402),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21133), AOM_ICDF(23229), AOM_ICDF(31684), AOM_ICDF(32013),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(13231), AOM_ICDF(14045), AOM_ICDF(28203), AOM_ICDF(28576),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7903), AOM_ICDF(8481), AOM_ICDF(21781), AOM_ICDF(22359),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(824), AOM_ICDF(8672), AOM_ICDF(16514), AOM_ICDF(27587),
+    AOM_ICDF(29231), AOM_ICDF(32768), },
+    {AOM_ICDF(1118), AOM_ICDF(9561), AOM_ICDF(17021), AOM_ICDF(25911),
+    AOM_ICDF(27753), AOM_ICDF(32768), },
+    {AOM_ICDF(806), AOM_ICDF(9313), AOM_ICDF(13998), AOM_ICDF(22910),
+    AOM_ICDF(25224), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(23650), AOM_ICDF(26487), AOM_ICDF(31840), AOM_ICDF(32166),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19593), AOM_ICDF(25206), AOM_ICDF(31604), AOM_ICDF(31944),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15813), AOM_ICDF(19643), AOM_ICDF(30328), AOM_ICDF(30726),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12978), AOM_ICDF(15108), AOM_ICDF(27886), AOM_ICDF(28310),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9793), AOM_ICDF(11020), AOM_ICDF(23305), AOM_ICDF(23818),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4855), AOM_ICDF(5565), AOM_ICDF(14268), AOM_ICDF(14741),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(24547), AOM_ICDF(27751), AOM_ICDF(31964), AOM_ICDF(32285),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19674), AOM_ICDF(23377), AOM_ICDF(31426), AOM_ICDF(31759),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12643), AOM_ICDF(14489), AOM_ICDF(28159), AOM_ICDF(28541),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9110), AOM_ICDF(10279), AOM_ICDF(23565), AOM_ICDF(23992),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5082), AOM_ICDF(5617), AOM_ICDF(16317), AOM_ICDF(16651),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(18971), AOM_ICDF(24145),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26773), AOM_ICDF(29038), AOM_ICDF(32050), AOM_ICDF(32367),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20956), AOM_ICDF(23898), AOM_ICDF(31563), AOM_ICDF(31888),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12527), AOM_ICDF(13472), AOM_ICDF(27840), AOM_ICDF(28211),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8773), AOM_ICDF(9353), AOM_ICDF(22555), AOM_ICDF(22856),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4291), AOM_ICDF(4876), AOM_ICDF(16969), AOM_ICDF(17554),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(17348), AOM_ICDF(23130),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28065), AOM_ICDF(29768), AOM_ICDF(32086), AOM_ICDF(32400),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21847), AOM_ICDF(24001), AOM_ICDF(31608), AOM_ICDF(31929),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12482), AOM_ICDF(13091), AOM_ICDF(27413), AOM_ICDF(27739),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7582), AOM_ICDF(8002), AOM_ICDF(22090), AOM_ICDF(22405),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6324), AOM_ICDF(7186), AOM_ICDF(15809), AOM_ICDF(16671),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29731), AOM_ICDF(30798), AOM_ICDF(32113), AOM_ICDF(32431),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22224), AOM_ICDF(24448), AOM_ICDF(31791), AOM_ICDF(32118),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12622), AOM_ICDF(13513), AOM_ICDF(28103), AOM_ICDF(28530),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8886), AOM_ICDF(9600), AOM_ICDF(22890), AOM_ICDF(23604),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8058), AOM_ICDF(9669), AOM_ICDF(18264), AOM_ICDF(19876),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(27375), AOM_ICDF(27731), AOM_ICDF(31591), AOM_ICDF(31993),
+    AOM_ICDF(32404), AOM_ICDF(32768), },
+    {AOM_ICDF(20943), AOM_ICDF(21758), AOM_ICDF(30037), AOM_ICDF(31074),
+    AOM_ICDF(32003), AOM_ICDF(32768), },
+    {AOM_ICDF(16218), AOM_ICDF(17771), AOM_ICDF(26832), AOM_ICDF(29181),
+    AOM_ICDF(30586), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17239), AOM_ICDF(27853), AOM_ICDF(31557), AOM_ICDF(32198),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14494), AOM_ICDF(25906), AOM_ICDF(31543), AOM_ICDF(32033),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12980), AOM_ICDF(19788), AOM_ICDF(29137), AOM_ICDF(29410),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11796), AOM_ICDF(14680), AOM_ICDF(26477), AOM_ICDF(27787),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12603), AOM_ICDF(15124), AOM_ICDF(21005), AOM_ICDF(23526),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(22821), AOM_ICDF(27655), AOM_ICDF(32024), AOM_ICDF(32303),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16534), AOM_ICDF(23629), AOM_ICDF(31145), AOM_ICDF(31686),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12407), AOM_ICDF(14952), AOM_ICDF(28950), AOM_ICDF(30859),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6554), AOM_ICDF(10486), AOM_ICDF(19661), AOM_ICDF(23593),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(26369), AOM_ICDF(29624), AOM_ICDF(31996), AOM_ICDF(32272),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19346), AOM_ICDF(24807), AOM_ICDF(31750), AOM_ICDF(32027),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15056), AOM_ICDF(19484), AOM_ICDF(27454), AOM_ICDF(30111),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5783), AOM_ICDF(11565), AOM_ICDF(21203), AOM_ICDF(26985),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28213), AOM_ICDF(30301), AOM_ICDF(32199), AOM_ICDF(32483),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22988), AOM_ICDF(27307), AOM_ICDF(31879), AOM_ICDF(32260),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11796), AOM_ICDF(15729), AOM_ICDF(24904), AOM_ICDF(28836),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29813), AOM_ICDF(31323), AOM_ICDF(32142), AOM_ICDF(32444),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21497), AOM_ICDF(25254), AOM_ICDF(31307), AOM_ICDF(32142),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(30560), AOM_ICDF(30889), AOM_ICDF(31795), AOM_ICDF(32128),
+    AOM_ICDF(32455), AOM_ICDF(32768), },
+    {AOM_ICDF(20347), AOM_ICDF(20993), AOM_ICDF(30496), AOM_ICDF(31112),
+    AOM_ICDF(32263), AOM_ICDF(32768), },
+    {AOM_ICDF(9723), AOM_ICDF(10992), AOM_ICDF(27830), AOM_ICDF(28681),
+    AOM_ICDF(32168), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(25900), AOM_ICDF(30610), AOM_ICDF(32179), AOM_ICDF(32474),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18535), AOM_ICDF(29316), AOM_ICDF(32153), AOM_ICDF(32437),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15230), AOM_ICDF(25845), AOM_ICDF(30922), AOM_ICDF(31845),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(16384), AOM_ICDF(27097), AOM_ICDF(28987),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8548), AOM_ICDF(12822), AOM_ICDF(21370), AOM_ICDF(25645),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(26104), AOM_ICDF(30659), AOM_ICDF(32157), AOM_ICDF(32462),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20457), AOM_ICDF(28242), AOM_ICDF(31682), AOM_ICDF(32225),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(24576), AOM_ICDF(28672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(28740), AOM_ICDF(30618), AOM_ICDF(32154), AOM_ICDF(32461),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19333), AOM_ICDF(26214), AOM_ICDF(30802), AOM_ICDF(31785),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28161), AOM_ICDF(30834), AOM_ICDF(32160), AOM_ICDF(32464),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(26536), AOM_ICDF(29149), AOM_ICDF(31562), AOM_ICDF(32165),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(29913), AOM_ICDF(31560), AOM_ICDF(32172), AOM_ICDF(32470),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22209), AOM_ICDF(28035), AOM_ICDF(30583), AOM_ICDF(31676),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+    {  // TX 32X32
+    {  // Y plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(3982), AOM_ICDF(6433), AOM_ICDF(20418), AOM_ICDF(25151),
+    AOM_ICDF(27471), AOM_ICDF(32768), },
+    {AOM_ICDF(3342), AOM_ICDF(6943), AOM_ICDF(15018), AOM_ICDF(20274),
+    AOM_ICDF(22412), AOM_ICDF(32768), },
+    {AOM_ICDF(1805), AOM_ICDF(5863), AOM_ICDF(9932), AOM_ICDF(16426),
+    AOM_ICDF(17655), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(11799), AOM_ICDF(19138), AOM_ICDF(28295), AOM_ICDF(28881),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11008), AOM_ICDF(18597), AOM_ICDF(28369), AOM_ICDF(29021),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10104), AOM_ICDF(15628), AOM_ICDF(26339), AOM_ICDF(27195),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8537), AOM_ICDF(11246), AOM_ICDF(22663), AOM_ICDF(23623),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5895), AOM_ICDF(6476), AOM_ICDF(16647), AOM_ICDF(17329),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(4046), AOM_ICDF(4357), AOM_ICDF(10849), AOM_ICDF(11160),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(18503), AOM_ICDF(22222), AOM_ICDF(30403), AOM_ICDF(30814),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15264), AOM_ICDF(19282), AOM_ICDF(29949), AOM_ICDF(30339),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12101), AOM_ICDF(14721), AOM_ICDF(27350), AOM_ICDF(27783),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9243), AOM_ICDF(10177), AOM_ICDF(22679), AOM_ICDF(23097),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5571), AOM_ICDF(5967), AOM_ICDF(16714), AOM_ICDF(17043),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(2731), AOM_ICDF(3755), AOM_ICDF(14677), AOM_ICDF(15701),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(23077), AOM_ICDF(25272), AOM_ICDF(31444), AOM_ICDF(31771),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16598), AOM_ICDF(19790), AOM_ICDF(30479), AOM_ICDF(30822),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11961), AOM_ICDF(12871), AOM_ICDF(27162), AOM_ICDF(27529),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8156), AOM_ICDF(8563), AOM_ICDF(22220), AOM_ICDF(22579),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5851), AOM_ICDF(6242), AOM_ICDF(15994), AOM_ICDF(16384),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(26084), AOM_ICDF(27933), AOM_ICDF(31906), AOM_ICDF(32223),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19335), AOM_ICDF(21760), AOM_ICDF(31149), AOM_ICDF(31477),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12724), AOM_ICDF(13278), AOM_ICDF(27015), AOM_ICDF(27365),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8687), AOM_ICDF(9010), AOM_ICDF(21051), AOM_ICDF(21334),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5814), AOM_ICDF(6606), AOM_ICDF(14534), AOM_ICDF(15327),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(18432), AOM_ICDF(24576),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30147), AOM_ICDF(30787), AOM_ICDF(32081), AOM_ICDF(32395),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(20402), AOM_ICDF(21697), AOM_ICDF(30943), AOM_ICDF(31266),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11661), AOM_ICDF(12125), AOM_ICDF(25710), AOM_ICDF(26034),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7224), AOM_ICDF(7504), AOM_ICDF(19876), AOM_ICDF(20156),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6183), AOM_ICDF(7110), AOM_ICDF(17002), AOM_ICDF(17930),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5174), AOM_ICDF(10348), AOM_ICDF(17246), AOM_ICDF(22420),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(4079), AOM_ICDF(8378), AOM_ICDF(25109), AOM_ICDF(29897),
+    AOM_ICDF(30898), AOM_ICDF(32768), },
+    {AOM_ICDF(3870), AOM_ICDF(8207), AOM_ICDF(22495), AOM_ICDF(27162),
+    AOM_ICDF(29559), AOM_ICDF(32768), },
+    {AOM_ICDF(2127), AOM_ICDF(6197), AOM_ICDF(15932), AOM_ICDF(20604),
+    AOM_ICDF(27312), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(21253), AOM_ICDF(26168), AOM_ICDF(31780), AOM_ICDF(32120),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16610), AOM_ICDF(23985), AOM_ICDF(31495), AOM_ICDF(31866),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14861), AOM_ICDF(21030), AOM_ICDF(30219), AOM_ICDF(30784),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14573), AOM_ICDF(18162), AOM_ICDF(28524), AOM_ICDF(29116),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14036), AOM_ICDF(15983), AOM_ICDF(26283), AOM_ICDF(27085),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9119), AOM_ICDF(10742), AOM_ICDF(19630), AOM_ICDF(20016),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23192), AOM_ICDF(27248), AOM_ICDF(31887), AOM_ICDF(32215),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18219), AOM_ICDF(23213), AOM_ICDF(31417), AOM_ICDF(31769),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12657), AOM_ICDF(14754), AOM_ICDF(27845), AOM_ICDF(28233),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8127), AOM_ICDF(8829), AOM_ICDF(20909), AOM_ICDF(21279),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7547), AOM_ICDF(8142), AOM_ICDF(17476), AOM_ICDF(18072),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5461), AOM_ICDF(10923), AOM_ICDF(16384), AOM_ICDF(21845),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(25516), AOM_ICDF(28301), AOM_ICDF(31970), AOM_ICDF(32289),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19094), AOM_ICDF(23041), AOM_ICDF(31404), AOM_ICDF(31732),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12328), AOM_ICDF(13099), AOM_ICDF(27275), AOM_ICDF(27613),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8134), AOM_ICDF(8458), AOM_ICDF(21075), AOM_ICDF(21352),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5041), AOM_ICDF(5881), AOM_ICDF(17644), AOM_ICDF(18485),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(7282), AOM_ICDF(12743), AOM_ICDF(18204), AOM_ICDF(23666),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28082), AOM_ICDF(29782), AOM_ICDF(32087), AOM_ICDF(32400),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(21281), AOM_ICDF(24161), AOM_ICDF(31679), AOM_ICDF(31997),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12144), AOM_ICDF(12913), AOM_ICDF(27139), AOM_ICDF(27460),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8232), AOM_ICDF(8472), AOM_ICDF(21659), AOM_ICDF(21979),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(3034), AOM_ICDF(4855), AOM_ICDF(17598), AOM_ICDF(19418),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30193), AOM_ICDF(31021), AOM_ICDF(32122), AOM_ICDF(32435),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(22124), AOM_ICDF(23763), AOM_ICDF(31498), AOM_ICDF(31816),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12066), AOM_ICDF(12418), AOM_ICDF(26849), AOM_ICDF(27157),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8701), AOM_ICDF(8979), AOM_ICDF(20920), AOM_ICDF(21197),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5266), AOM_ICDF(7022), AOM_ICDF(15799), AOM_ICDF(17554),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    {  // UV plane
+    {  // Intra
+    {  // Band 0
+    {AOM_ICDF(23468), AOM_ICDF(24062), AOM_ICDF(30645), AOM_ICDF(31200),
+    AOM_ICDF(32193), AOM_ICDF(32768), },
+    {AOM_ICDF(12642), AOM_ICDF(14371), AOM_ICDF(26924), AOM_ICDF(28832),
+    AOM_ICDF(31098), AOM_ICDF(32768), },
+    {AOM_ICDF(7785), AOM_ICDF(8831), AOM_ICDF(23705), AOM_ICDF(26028),
+    AOM_ICDF(29979), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(13575), AOM_ICDF(28087), AOM_ICDF(31130), AOM_ICDF(31832),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11108), AOM_ICDF(27955), AOM_ICDF(31657), AOM_ICDF(32213),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(9797), AOM_ICDF(23985), AOM_ICDF(28039), AOM_ICDF(30741),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5578), AOM_ICDF(18824), AOM_ICDF(26493), AOM_ICDF(28585),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(5041), AOM_ICDF(12603), AOM_ICDF(18905), AOM_ICDF(22686),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(17613), AOM_ICDF(26624), AOM_ICDF(30310), AOM_ICDF(31539),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(11398), AOM_ICDF(22795), AOM_ICDF(29444), AOM_ICDF(30868),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8548), AOM_ICDF(15672), AOM_ICDF(22795), AOM_ICDF(28494),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6144), AOM_ICDF(12288), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24145), AOM_ICDF(26301), AOM_ICDF(30181), AOM_ICDF(31475),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15565), AOM_ICDF(20480), AOM_ICDF(27853), AOM_ICDF(30310),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(8192), AOM_ICDF(14336), AOM_ICDF(20480), AOM_ICDF(26624),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(27434), AOM_ICDF(28450), AOM_ICDF(30990), AOM_ICDF(31752),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14947), AOM_ICDF(21845), AOM_ICDF(29319), AOM_ICDF(31043),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(31130), AOM_ICDF(31676), AOM_ICDF(32180), AOM_ICDF(32474),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(18289), AOM_ICDF(22099), AOM_ICDF(28196), AOM_ICDF(30482),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    {  // Inter
+    {  // Band 0
+    {AOM_ICDF(29436), AOM_ICDF(29775), AOM_ICDF(31685), AOM_ICDF(32029),
+    AOM_ICDF(32425), AOM_ICDF(32768), },
+    {AOM_ICDF(10536), AOM_ICDF(11074), AOM_ICDF(27753), AOM_ICDF(28385),
+    AOM_ICDF(31293), AOM_ICDF(32768), },
+    {AOM_ICDF(3010), AOM_ICDF(3521), AOM_ICDF(22603), AOM_ICDF(23227),
+    AOM_ICDF(30440), AOM_ICDF(32768), },
+    },
+    {  // Band 1
+    {AOM_ICDF(17576), AOM_ICDF(29491), AOM_ICDF(30981), AOM_ICDF(31874),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(10426), AOM_ICDF(29044), AOM_ICDF(31725), AOM_ICDF(32321),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15766), AOM_ICDF(28286), AOM_ICDF(31377), AOM_ICDF(32304),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(19661), AOM_ICDF(26985), AOM_ICDF(30069), AOM_ICDF(31611),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(16035), AOM_ICDF(23007), AOM_ICDF(28585), AOM_ICDF(30676),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 2
+    {AOM_ICDF(23073), AOM_ICDF(30053), AOM_ICDF(31605), AOM_ICDF(32186),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(12858), AOM_ICDF(24887), AOM_ICDF(30279), AOM_ICDF(31524),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 3
+    {AOM_ICDF(24030), AOM_ICDF(26839), AOM_ICDF(30896), AOM_ICDF(31832),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(17644), AOM_ICDF(23526), AOM_ICDF(27727), AOM_ICDF(30247),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 4
+    {AOM_ICDF(28019), AOM_ICDF(30156), AOM_ICDF(31343), AOM_ICDF(32056),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(14980), AOM_ICDF(22469), AOM_ICDF(27151), AOM_ICDF(29959),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    {  // Band 5
+    {AOM_ICDF(30549), AOM_ICDF(31511), AOM_ICDF(32176), AOM_ICDF(32472),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(15019), AOM_ICDF(20480), AOM_ICDF(24576), AOM_ICDF(28672),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    {AOM_ICDF(6553), AOM_ICDF(13107), AOM_ICDF(19660), AOM_ICDF(26214),
+    AOM_ICDF(32768), },
+    },
+    },
+    },
+    },
+};
+/* clang-format on */
+
+typedef coeff_cdf_model coeff_cdf_table[TX_SIZES][PLANE_TYPES];
+static const coeff_cdf_table *av1_default_qctx_coef_cdfs[TOKEN_CDF_Q_CTXS] = {
+  &av1_default_coef_head_cdfs_q0, &av1_default_coef_head_cdfs_q1,
+  &av1_default_coef_head_cdfs_q2, &av1_default_coef_head_cdfs_q3,
+};
diff --git a/third_party/aom/av1/common/txb_common.c b/third_party/aom/av1/common/txb_common.c
index eb66ba175..c5b91e991 100644
--- a/third_party/aom/av1/common/txb_common.c
+++ b/third_party/aom/av1/common/txb_common.c
@@ -10,6 +10,7 @@
  */
 #include "aom/aom_integer.h"
 #include "av1/common/onyxc_int.h"
+#include "av1/common/txb_common.h"
 
 const int16_t av1_coeff_band_4x4[16] = { 0, 1, 2,  3,  4,  5,  6,  7,
                                          8, 9, 10, 11, 12, 13, 14, 15 };
@@ -95,6 +96,123 @@ const int16_t av1_coeff_band_32x32[1024] = {
   22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
 };
 
+#if LV_MAP_PROB
+void av1_init_txb_probs(FRAME_CONTEXT *fc) {
+  TX_SIZE tx_size;
+  int plane, ctx, level;
+
+  // Update probability models for transform block skip flag
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx) {
+      fc->txb_skip_cdf[tx_size][ctx][0] =
+          AOM_ICDF(128 * (aom_cdf_prob)fc->txb_skip[tx_size][ctx]);
+      fc->txb_skip_cdf[tx_size][ctx][1] = AOM_ICDF(32768);
+      fc->txb_skip_cdf[tx_size][ctx][2] = 0;
+    }
+  }
+
+  for (plane = 0; plane < PLANE_TYPES; ++plane) {
+    for (ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx) {
+      fc->dc_sign_cdf[plane][ctx][0] =
+          AOM_ICDF(128 * (aom_cdf_prob)fc->dc_sign[plane][ctx]);
+      fc->dc_sign_cdf[plane][ctx][1] = AOM_ICDF(32768);
+      fc->dc_sign_cdf[plane][ctx][2] = 0;
+    }
+  }
+
+  // Update probability models for non-zero coefficient map and eob flag.
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (level = 0; level < NUM_BASE_LEVELS; ++level) {
+        for (ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx) {
+          fc->coeff_base_cdf[tx_size][plane][level][ctx][0] = AOM_ICDF(
+              128 * (aom_cdf_prob)fc->coeff_base[tx_size][plane][level][ctx]);
+          fc->coeff_base_cdf[tx_size][plane][level][ctx][1] = AOM_ICDF(32768);
+          fc->coeff_base_cdf[tx_size][plane][level][ctx][2] = 0;
+        }
+      }
+    }
+  }
+
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
+        fc->nz_map_cdf[tx_size][plane][ctx][0] =
+            AOM_ICDF(128 * (aom_cdf_prob)fc->nz_map[tx_size][plane][ctx]);
+        fc->nz_map_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
+        fc->nz_map_cdf[tx_size][plane][ctx][2] = 0;
+      }
+
+      for (ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) {
+        fc->eob_flag_cdf[tx_size][plane][ctx][0] =
+            AOM_ICDF(128 * (aom_cdf_prob)fc->eob_flag[tx_size][plane][ctx]);
+        fc->eob_flag_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
+        fc->eob_flag_cdf[tx_size][plane][ctx][2] = 0;
+      }
+    }
+  }
+
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+        fc->coeff_lps_cdf[tx_size][plane][ctx][0] =
+            AOM_ICDF(128 * (aom_cdf_prob)fc->coeff_lps[tx_size][plane][ctx]);
+        fc->coeff_lps_cdf[tx_size][plane][ctx][1] = AOM_ICDF(32768);
+        fc->coeff_lps_cdf[tx_size][plane][ctx][2] = 0;
+      }
+#if BR_NODE
+      for (int br = 0; br < BASE_RANGE_SETS; ++br) {
+        for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+          fc->coeff_br_cdf[tx_size][plane][br][ctx][0] = AOM_ICDF(
+              128 * (aom_cdf_prob)fc->coeff_br[tx_size][plane][br][ctx]);
+          fc->coeff_br_cdf[tx_size][plane][br][ctx][1] = AOM_ICDF(32768);
+          fc->coeff_br_cdf[tx_size][plane][br][ctx][2] = 0;
+        }
+      }
+#endif  // BR_NODE
+    }
+  }
+#if CONFIG_CTX1D
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
+        fc->eob_mode_cdf[tx_size][plane][tx_class][0] = AOM_ICDF(
+            128 * (aom_cdf_prob)fc->eob_mode[tx_size][plane][tx_class]);
+        fc->eob_mode_cdf[tx_size][plane][tx_class][1] = AOM_ICDF(32768);
+        fc->eob_mode_cdf[tx_size][plane][tx_class][2] = 0;
+      }
+    }
+  }
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
+        for (ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx) {
+          fc->empty_line_cdf[tx_size][plane][tx_class][ctx][0] = AOM_ICDF(
+              128 *
+              (aom_cdf_prob)fc->empty_line[tx_size][plane][tx_class][ctx]);
+          fc->empty_line_cdf[tx_size][plane][tx_class][ctx][1] =
+              AOM_ICDF(32768);
+          fc->empty_line_cdf[tx_size][plane][tx_class][ctx][2] = 0;
+        }
+      }
+    }
+  }
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class) {
+        for (ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx) {
+          fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][0] = AOM_ICDF(
+              128 * (aom_cdf_prob)fc->hv_eob[tx_size][plane][tx_class][ctx]);
+          fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][1] = AOM_ICDF(32768);
+          fc->hv_eob_cdf[tx_size][plane][tx_class][ctx][2] = 0;
+        }
+      }
+    }
+  }
+#endif  // CONFIG_CTX1D
+}
+#endif  // LV_MAP_PROB
+
 void av1_adapt_txb_probs(AV1_COMMON *cm, unsigned int count_sat,
                          unsigned int update_factor) {
   FRAME_CONTEXT *fc = cm->fc;
@@ -141,10 +259,64 @@ void av1_adapt_txb_probs(AV1_COMMON *cm, unsigned int count_sat,
   }
 
   for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
-    for (plane = 0; plane < PLANE_TYPES; ++plane)
-      for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx)
+    for (plane = 0; plane < PLANE_TYPES; ++plane) {
+      for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
         fc->coeff_lps[tx_size][plane][ctx] = merge_probs(
             pre_fc->coeff_lps[tx_size][plane][ctx],
             counts->coeff_lps[tx_size][plane][ctx], count_sat, update_factor);
+      }
+#if BR_NODE
+      for (int br = 0; br < BASE_RANGE_SETS; ++br) {
+        for (ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+          fc->coeff_br[tx_size][plane][br][ctx] =
+              merge_probs(pre_fc->coeff_br[tx_size][plane][br][ctx],
+                          counts->coeff_br[tx_size][plane][br][ctx], count_sat,
+                          update_factor);
+        }
+      }
+#endif  // BR_NODE
+    }
+  }
+#if CONFIG_CTX1D
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane)
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        fc->eob_mode[tx_size][plane][tx_class] =
+            merge_probs(pre_fc->eob_mode[tx_size][plane][tx_class],
+                        counts->eob_mode[tx_size][plane][tx_class], count_sat,
+                        update_factor);
+  }
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane)
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx)
+          fc->empty_line[tx_size][plane][tx_class][ctx] =
+              merge_probs(pre_fc->empty_line[tx_size][plane][tx_class][ctx],
+                          counts->empty_line[tx_size][plane][tx_class][ctx],
+                          count_sat, update_factor);
+  }
+  for (tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (plane = 0; plane < PLANE_TYPES; ++plane)
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx)
+          fc->hv_eob[tx_size][plane][tx_class][ctx] =
+              merge_probs(pre_fc->hv_eob[tx_size][plane][tx_class][ctx],
+                          counts->hv_eob[tx_size][plane][tx_class][ctx],
+                          count_sat, update_factor);
+  }
+#endif
+}
+
+void av1_init_lv_map(AV1_COMMON *cm) {
+  LV_MAP_CTX_TABLE *coeff_ctx_table = &cm->coeff_ctx_table;
+  for (int row = 0; row < 2; ++row) {
+    for (int col = 0; col < 2; ++col) {
+      for (int sig_mag = 0; sig_mag < 2; ++sig_mag) {
+        for (int count = 0; count < BASE_CONTEXT_POSITION_NUM + 1; ++count) {
+          coeff_ctx_table->base_ctx_table[row][col][sig_mag][count] =
+              get_base_ctx_from_count_mag(row, col, count, sig_mag);
+        }
+      }
+    }
   }
 }
diff --git a/third_party/aom/av1/common/txb_common.h b/third_party/aom/av1/common/txb_common.h
index 5620a70a9..3bf8f8c61 100644
--- a/third_party/aom/av1/common/txb_common.h
+++ b/third_party/aom/av1/common/txb_common.h
@@ -11,6 +11,10 @@
 
 #ifndef AV1_COMMON_TXB_COMMON_H_
 #define AV1_COMMON_TXB_COMMON_H_
+
+#define REDUCE_CONTEXT_DEPENDENCY 0
+#define MIN_SCAN_IDX_REDUCE_CONTEXT_DEPENDENCY 0
+
 extern const int16_t av1_coeff_band_4x4[16];
 
 extern const int16_t av1_coeff_band_8x8[64];
@@ -28,7 +32,6 @@ static INLINE TX_SIZE get_txsize_context(TX_SIZE tx_size) {
   return txsize_sqr_up_map[tx_size];
 }
 
-#define BASE_CONTEXT_POSITION_NUM 12
 static int base_ref_offset[BASE_CONTEXT_POSITION_NUM][2] = {
   /* clang-format off*/
   { -2, 0 }, { -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, -2 }, { 0, -1 }, { 0, 1 },
@@ -36,23 +39,24 @@ static int base_ref_offset[BASE_CONTEXT_POSITION_NUM][2] = {
   /* clang-format on*/
 };
 
-static INLINE int get_level_count(const tran_low_t *tcoeffs, int stride,
+static INLINE int get_level_count(const tran_low_t *tcoeffs, int bwl,
                                   int height, int row, int col, int level,
                                   int (*nb_offset)[2], int nb_num) {
   int count = 0;
   for (int idx = 0; idx < nb_num; ++idx) {
     const int ref_row = row + nb_offset[idx][0];
     const int ref_col = col + nb_offset[idx][1];
-    const int pos = ref_row * stride + ref_col;
-    if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl))
       continue;
+    const int pos = (ref_row << bwl) + ref_col;
     tran_low_t abs_coeff = abs(tcoeffs[pos]);
     count += abs_coeff > level;
   }
   return count;
 }
 
-static INLINE void get_mag(int *mag, const tran_low_t *tcoeffs, int stride,
+static INLINE void get_mag(int *mag, const tran_low_t *tcoeffs, int bwl,
                            int height, int row, int col, int (*nb_offset)[2],
                            int nb_num) {
   mag[0] = 0;
@@ -60,9 +64,10 @@ static INLINE void get_mag(int *mag, const tran_low_t *tcoeffs, int stride,
   for (int idx = 0; idx < nb_num; ++idx) {
     const int ref_row = row + nb_offset[idx][0];
     const int ref_col = col + nb_offset[idx][1];
-    const int pos = ref_row * stride + ref_col;
-    if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl))
       continue;
+    const int pos = (ref_row << bwl) + ref_col;
     tran_low_t abs_coeff = abs(tcoeffs[pos]);
     if (nb_offset[idx][0] >= 0 && nb_offset[idx][1] >= 0) {
       if (abs_coeff > mag[0]) {
@@ -74,18 +79,50 @@ static INLINE void get_mag(int *mag, const tran_low_t *tcoeffs, int stride,
     }
   }
 }
+
+static INLINE void get_base_count_mag(int *mag, int *count,
+                                      const tran_low_t *tcoeffs, int bwl,
+                                      int height, int row, int col) {
+  mag[0] = 0;
+  mag[1] = 0;
+  for (int i = 0; i < NUM_BASE_LEVELS; ++i) count[i] = 0;
+  for (int idx = 0; idx < BASE_CONTEXT_POSITION_NUM; ++idx) {
+    const int ref_row = row + base_ref_offset[idx][0];
+    const int ref_col = col + base_ref_offset[idx][1];
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl))
+      continue;
+    const int pos = (ref_row << bwl) + ref_col;
+    tran_low_t abs_coeff = abs(tcoeffs[pos]);
+    // count
+    for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
+      count[i] += abs_coeff > i;
+    }
+    // mag
+    if (base_ref_offset[idx][0] >= 0 && base_ref_offset[idx][1] >= 0) {
+      if (abs_coeff > mag[0]) {
+        mag[0] = abs_coeff;
+        mag[1] = 1;
+      } else if (abs_coeff == mag[0]) {
+        ++mag[1];
+      }
+    }
+  }
+}
+
 static INLINE int get_level_count_mag(int *mag, const tran_low_t *tcoeffs,
-                                      int stride, int height, int row, int col,
+                                      int bwl, int height, int row, int col,
                                       int level, int (*nb_offset)[2],
                                       int nb_num) {
+  const int stride = 1 << bwl;
   int count = 0;
   *mag = 0;
   for (int idx = 0; idx < nb_num; ++idx) {
     const int ref_row = row + nb_offset[idx][0];
     const int ref_col = col + nb_offset[idx][1];
-    const int pos = ref_row * stride + ref_col;
     if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
       continue;
+    const int pos = (ref_row << bwl) + ref_col;
     tran_low_t abs_coeff = abs(tcoeffs[pos]);
     count += abs_coeff > level;
     if (nb_offset[idx][0] >= 0 && nb_offset[idx][1] >= 0)
@@ -95,19 +132,21 @@ static INLINE int get_level_count_mag(int *mag, const tran_low_t *tcoeffs,
 }
 
 static INLINE int get_base_ctx_from_count_mag(int row, int col, int count,
-                                              int mag, int level) {
+                                              int sig_mag) {
   const int ctx = (count + 1) >> 1;
-  const int sig_mag = mag > level;
   int ctx_idx = -1;
   if (row == 0 && col == 0) {
     ctx_idx = (ctx << 1) + sig_mag;
-    assert(ctx_idx < 8);
+    // TODO(angiebird): turn this on once the optimization is finalized
+    // assert(ctx_idx < 8);
   } else if (row == 0) {
     ctx_idx = 8 + (ctx << 1) + sig_mag;
-    assert(ctx_idx < 18);
+    // TODO(angiebird): turn this on once the optimization is finalized
+    // assert(ctx_idx < 18);
   } else if (col == 0) {
     ctx_idx = 8 + 10 + (ctx << 1) + sig_mag;
-    assert(ctx_idx < 28);
+    // TODO(angiebird): turn this on once the optimization is finalized
+    // assert(ctx_idx < 28);
   } else {
     ctx_idx = 8 + 10 + 10 + (ctx << 1) + sig_mag;
     assert(ctx_idx < COEFF_BASE_CONTEXTS);
@@ -119,15 +158,14 @@ static INLINE int get_base_ctx(const tran_low_t *tcoeffs,
                                int c,  // raster order
                                const int bwl, const int height,
                                const int level) {
-  const int stride = 1 << bwl;
   const int row = c >> bwl;
   const int col = c - (row << bwl);
   const int level_minus_1 = level - 1;
   int mag;
-  int count = get_level_count_mag(&mag, tcoeffs, stride, height, row, col,
-                                  level_minus_1, base_ref_offset,
-                                  BASE_CONTEXT_POSITION_NUM);
-  int ctx_idx = get_base_ctx_from_count_mag(row, col, count, mag, level);
+  int count =
+      get_level_count_mag(&mag, tcoeffs, bwl, height, row, col, level_minus_1,
+                          base_ref_offset, BASE_CONTEXT_POSITION_NUM);
+  int ctx_idx = get_base_ctx_from_count_mag(row, col, count, mag > level);
   return ctx_idx;
 }
 
@@ -139,13 +177,52 @@ static int br_ref_offset[BR_CONTEXT_POSITION_NUM][2] = {
   /* clang-format on*/
 };
 
-static int br_level_map[9] = {
+static const int br_level_map[9] = {
   0, 0, 1, 1, 2, 2, 3, 3, 3,
 };
 
+static const int coeff_to_br_index[COEFF_BASE_RANGE] = {
+  0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const int br_index_to_coeff[BASE_RANGE_SETS] = {
+  0, 2, 6,
+};
+
+static const int br_extra_bits[BASE_RANGE_SETS] = {
+  1, 2, 3,
+};
+
 #define BR_MAG_OFFSET 1
 // TODO(angiebird): optimize this function by using a table to map from
 // count/mag to ctx
+
+static INLINE int get_br_count_mag(int *mag, const tran_low_t *tcoeffs, int bwl,
+                                   int height, int row, int col, int level) {
+  mag[0] = 0;
+  mag[1] = 0;
+  int count = 0;
+  for (int idx = 0; idx < BR_CONTEXT_POSITION_NUM; ++idx) {
+    const int ref_row = row + br_ref_offset[idx][0];
+    const int ref_col = col + br_ref_offset[idx][1];
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl))
+      continue;
+    const int pos = (ref_row << bwl) + ref_col;
+    tran_low_t abs_coeff = abs(tcoeffs[pos]);
+    count += abs_coeff > level;
+    if (br_ref_offset[idx][0] >= 0 && br_ref_offset[idx][1] >= 0) {
+      if (abs_coeff > mag[0]) {
+        mag[0] = abs_coeff;
+        mag[1] = 1;
+      } else if (abs_coeff == mag[0]) {
+        ++mag[1];
+      }
+    }
+  }
+  return count;
+}
+
 static INLINE int get_br_ctx_from_count_mag(int row, int col, int count,
                                             int mag) {
   int offset = 0;
@@ -153,7 +230,7 @@ static INLINE int get_br_ctx_from_count_mag(int row, int col, int count,
     offset = 0;
   else if (mag <= 3)
     offset = 1;
-  else if (mag <= 6)
+  else if (mag <= 5)
     offset = 2;
   else
     offset = 3;
@@ -177,111 +254,171 @@ static INLINE int get_br_ctx_from_count_mag(int row, int col, int count,
 static INLINE int get_br_ctx(const tran_low_t *tcoeffs,
                              const int c,  // raster order
                              const int bwl, const int height) {
-  const int stride = 1 << bwl;
   const int row = c >> bwl;
   const int col = c - (row << bwl);
   const int level_minus_1 = NUM_BASE_LEVELS;
   int mag;
-  const int count = get_level_count_mag(&mag, tcoeffs, stride, height, row, col,
-                                        level_minus_1, br_ref_offset,
-                                        BR_CONTEXT_POSITION_NUM);
+  const int count =
+      get_level_count_mag(&mag, tcoeffs, bwl, height, row, col, level_minus_1,
+                          br_ref_offset, BR_CONTEXT_POSITION_NUM);
   const int ctx = get_br_ctx_from_count_mag(row, col, count, mag);
   return ctx;
 }
 
-#define SIG_REF_OFFSET_NUM 11
+#define SIG_REF_OFFSET_NUM 7
 static int sig_ref_offset[SIG_REF_OFFSET_NUM][2] = {
-  { -2, -1 }, { -2, 0 }, { -2, 1 }, { -1, -2 }, { -1, -1 }, { -1, 0 },
-  { -1, 1 },  { 0, -2 }, { 0, -1 }, { 1, -2 },  { 1, -1 },
+  { -2, -1 }, { -2, 0 }, { -1, -2 }, { -1, -1 },
+  { -1, 0 },  { 0, -2 }, { 0, -1 },
 };
 
-static INLINE int get_nz_count(const tran_low_t *tcoeffs, int stride,
-                               int height, int row, int col,
-                               const int16_t *iscan) {
+#if REDUCE_CONTEXT_DEPENDENCY
+static INLINE int get_nz_count(const tran_low_t *tcoeffs, int bwl, int height,
+                               int row, int col, int prev_row, int prev_col) {
   int count = 0;
-  const int pos = row * stride + col;
   for (int idx = 0; idx < SIG_REF_OFFSET_NUM; ++idx) {
     const int ref_row = row + sig_ref_offset[idx][0];
     const int ref_col = col + sig_ref_offset[idx][1];
-    if (ref_row < 0 || ref_col < 0 || ref_row >= height || ref_col >= stride)
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl) || (prev_row == ref_row && prev_col == ref_col))
+      continue;
+    const int nb_pos = (ref_row << bwl) + ref_col;
+    count += (tcoeffs[nb_pos] != 0);
+  }
+  return count;
+}
+#else
+static INLINE int get_nz_count(const tran_low_t *tcoeffs, int bwl, int height,
+                               int row, int col) {
+  int count = 0;
+  for (int idx = 0; idx < SIG_REF_OFFSET_NUM; ++idx) {
+    const int ref_row = row + sig_ref_offset[idx][0];
+    const int ref_col = col + sig_ref_offset[idx][1];
+    if (ref_row < 0 || ref_col < 0 || ref_row >= height ||
+        ref_col >= (1 << bwl))
       continue;
-    const int nb_pos = ref_row * stride + ref_col;
-    if (iscan[nb_pos] < iscan[pos]) count += (tcoeffs[nb_pos] != 0);
+    const int nb_pos = (ref_row << bwl) + ref_col;
+    count += (tcoeffs[nb_pos] != 0);
   }
   return count;
 }
+#endif
+
+static INLINE TX_CLASS get_tx_class(TX_TYPE tx_type) {
+  switch (tx_type) {
+#if CONFIG_EXT_TX
+    case V_DCT:
+    case V_ADST:
+    case V_FLIPADST: return TX_CLASS_VERT;
+    case H_DCT:
+    case H_ADST:
+    case H_FLIPADST: return TX_CLASS_HORIZ;
+#endif
+    default: return TX_CLASS_2D;
+  }
+}
 
 // TODO(angiebird): optimize this function by generate a table that maps from
 // count to ctx
 static INLINE int get_nz_map_ctx_from_count(int count,
-                                            const tran_low_t *tcoeffs,
                                             int coeff_idx,  // raster order
-                                            int bwl, const int16_t *iscan) {
+                                            int bwl, TX_TYPE tx_type) {
+  (void)tx_type;
   const int row = coeff_idx >> bwl;
   const int col = coeff_idx - (row << bwl);
   int ctx = 0;
+#if CONFIG_EXT_TX
+  int tx_class = get_tx_class(tx_type);
+  int offset;
+  if (tx_class == TX_CLASS_2D)
+    offset = 0;
+  else if (tx_class == TX_CLASS_VERT)
+    offset = SIG_COEF_CONTEXTS_2D;
+  else
+    offset = SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D;
+#else
+  int offset = 0;
+#endif
 
-  if (row == 0 && col == 0) return 0;
+  if (row == 0 && col == 0) return offset + 0;
 
-  if (row == 0 && col == 1) return 1 + (tcoeffs[0] != 0);
+  if (row == 0 && col == 1) return offset + 1 + count;
 
-  if (row == 1 && col == 0) return 3 + (tcoeffs[0] != 0);
+  if (row == 1 && col == 0) return offset + 3 + count;
 
   if (row == 1 && col == 1) {
-    int pos;
-    ctx = (tcoeffs[0] != 0);
-
-    if (iscan[1] < iscan[coeff_idx]) ctx += (tcoeffs[1] != 0);
-    pos = 1 << bwl;
-    if (iscan[pos] < iscan[coeff_idx]) ctx += (tcoeffs[pos] != 0);
-
-    ctx = (ctx + 1) >> 1;
+    ctx = (count + 1) >> 1;
 
     assert(5 + ctx <= 7);
 
-    return 5 + ctx;
+    return offset + 5 + ctx;
   }
 
   if (row == 0) {
     ctx = (count + 1) >> 1;
 
-    assert(ctx < 3);
-    return 8 + ctx;
+    assert(ctx < 2);
+    return offset + 8 + ctx;
   }
 
   if (col == 0) {
     ctx = (count + 1) >> 1;
 
-    assert(ctx < 3);
-    return 11 + ctx;
+    assert(ctx < 2);
+    return offset + 10 + ctx;
   }
 
   ctx = count >> 1;
 
-  assert(14 + ctx < 20);
+  assert(12 + ctx < 16);
 
-  return 14 + ctx;
+  return offset + 12 + ctx;
 }
 
-static INLINE int get_nz_map_ctx(const tran_low_t *tcoeffs,
-                                 const int coeff_idx,  // raster order
-                                 const int bwl, const int height,
-                                 const int16_t *iscan) {
-  int stride = 1 << bwl;
+static INLINE int get_nz_map_ctx(const tran_low_t *tcoeffs, const int scan_idx,
+                                 const int16_t *scan, const int bwl,
+                                 const int height, TX_TYPE tx_type) {
+  const int coeff_idx = scan[scan_idx];
   const int row = coeff_idx >> bwl;
   const int col = coeff_idx - (row << bwl);
-  int count = get_nz_count(tcoeffs, stride, height, row, col, iscan);
-  return get_nz_map_ctx_from_count(count, tcoeffs, coeff_idx, bwl, iscan);
+#if REDUCE_CONTEXT_DEPENDENCY
+  int prev_coeff_idx;
+  int prev_row;
+  int prev_col;
+  if (scan_idx > MIN_SCAN_IDX_REDUCE_CONTEXT_DEPENDENCY) {
+    prev_coeff_idx = scan[scan_idx - 1];  // raster order
+    prev_row = prev_coeff_idx >> bwl;
+    prev_col = prev_coeff_idx - (prev_row << bwl);
+  } else {
+    prev_coeff_idx = -1;
+    prev_row = -1;
+    prev_col = -1;
+  }
+  int count = get_nz_count(tcoeffs, bwl, height, row, col, prev_row, prev_col);
+#else
+  int count = get_nz_count(tcoeffs, bwl, height, row, col);
+#endif
+  return get_nz_map_ctx_from_count(count, coeff_idx, bwl, tx_type);
 }
 
 static INLINE int get_eob_ctx(const tran_low_t *tcoeffs,
                               const int coeff_idx,  // raster order
-                              const TX_SIZE txs_ctx) {
+                              const TX_SIZE txs_ctx, TX_TYPE tx_type) {
   (void)tcoeffs;
-  if (txs_ctx == TX_4X4) return av1_coeff_band_4x4[coeff_idx];
-  if (txs_ctx == TX_8X8) return av1_coeff_band_8x8[coeff_idx];
-  if (txs_ctx == TX_16X16) return av1_coeff_band_16x16[coeff_idx];
-  if (txs_ctx == TX_32X32) return av1_coeff_band_32x32[coeff_idx];
+  int offset = 0;
+#if CONFIG_CTX1D
+  TX_CLASS tx_class = get_tx_class(tx_type);
+  if (tx_class == TX_CLASS_VERT)
+    offset = EOB_COEF_CONTEXTS_2D;
+  else if (tx_class == TX_CLASS_HORIZ)
+    offset = EOB_COEF_CONTEXTS_2D + EOB_COEF_CONTEXTS_1D;
+#else
+  (void)tx_type;
+#endif
+
+  if (txs_ctx == TX_4X4) return offset + av1_coeff_band_4x4[coeff_idx];
+  if (txs_ctx == TX_8X8) return offset + av1_coeff_band_8x8[coeff_idx];
+  if (txs_ctx == TX_16X16) return offset + av1_coeff_band_16x16[coeff_idx];
+  if (txs_ctx == TX_32X32) return offset + av1_coeff_band_32x32[coeff_idx];
 
   assert(0);
   return 0;
@@ -369,6 +506,86 @@ static INLINE void get_txb_ctx(BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
   }
 }
 
+#if LV_MAP_PROB
+void av1_init_txb_probs(FRAME_CONTEXT *fc);
+#endif  // LV_MAP_PROB
+
 void av1_adapt_txb_probs(AV1_COMMON *cm, unsigned int count_sat,
                          unsigned int update_factor);
+
+void av1_init_lv_map(AV1_COMMON *cm);
+
+#if CONFIG_CTX1D
+static INLINE void get_eob_vert(int16_t *eob_ls, const tran_low_t *tcoeff,
+                                int w, int h) {
+  for (int c = 0; c < w; ++c) {
+    eob_ls[c] = 0;
+    for (int r = h - 1; r >= 0; --r) {
+      int coeff_idx = r * w + c;
+      if (tcoeff[coeff_idx] != 0) {
+        eob_ls[c] = r + 1;
+        break;
+      }
+    }
+  }
+}
+
+static INLINE void get_eob_horiz(int16_t *eob_ls, const tran_low_t *tcoeff,
+                                 int w, int h) {
+  for (int r = 0; r < h; ++r) {
+    eob_ls[r] = 0;
+    for (int c = w - 1; c >= 0; --c) {
+      int coeff_idx = r * w + c;
+      if (tcoeff[coeff_idx] != 0) {
+        eob_ls[r] = c + 1;
+        break;
+      }
+    }
+  }
+}
+
+static INLINE int get_empty_line_ctx(int line_idx, int16_t *eob_ls) {
+  if (line_idx > 0) {
+    int prev_eob = eob_ls[line_idx - 1];
+    if (prev_eob == 0) {
+      return 1;
+    } else if (prev_eob < 3) {
+      return 2;
+    } else if (prev_eob < 6) {
+      return 3;
+    } else {
+      return 4;
+    }
+  } else {
+    return 0;
+  }
+}
+
+#define MAX_POS_CTX 8
+static int pos_ctx[MAX_HVTX_SIZE] = {
+  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
+};
+static INLINE int get_hv_eob_ctx(int line_idx, int pos, int16_t *eob_ls) {
+  if (line_idx > 0) {
+    int prev_eob = eob_ls[line_idx - 1];
+    int diff = pos + 1 - prev_eob;
+    int abs_diff = abs(diff);
+    int ctx_idx = pos_ctx[abs_diff];
+    assert(ctx_idx < MAX_POS_CTX);
+    if (diff < 0) {
+      ctx_idx += MAX_POS_CTX;
+      assert(ctx_idx >= MAX_POS_CTX);
+      assert(ctx_idx < 2 * MAX_POS_CTX);
+    }
+    return ctx_idx;
+  } else {
+    int ctx_idx = MAX_POS_CTX + MAX_POS_CTX + pos_ctx[pos];
+    assert(ctx_idx < HV_EOB_CONTEXTS);
+    assert(HV_EOB_CONTEXTS == MAX_POS_CTX * 3);
+    return ctx_idx;
+  }
+}
+#endif  // CONFIG_CTX1D
+
 #endif  // AV1_COMMON_TXB_COMMON_H_
diff --git a/third_party/aom/av1/common/warped_motion.c b/third_party/aom/av1/common/warped_motion.c
index 75ae08723..34374af69 100644
--- a/third_party/aom/av1/common/warped_motion.c
+++ b/third_party/aom/av1/common/warped_motion.c
@@ -912,8 +912,8 @@ static void highbd_warp_plane_old(const WarpedMotionParams *const wm,
       in[0] = j;
       in[1] = i;
       projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
-      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
-      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS);
       if (conv_params->do_average)
         pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
             pred[(j - p_col) + (i - p_row) * p_stride] +
@@ -939,136 +939,51 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
                               int16_t beta, int16_t gamma, int16_t delta) {
   int32_t tmp[15 * 8];
   int i, j, k, l, m;
-
-  for (i = p_row; i < p_row + p_height; i += 8) {
-    for (j = p_col; j < p_col + p_width; j += 8) {
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
-      sx4 += alpha * (-4) + beta * (-4);
-      sy4 += gamma * (-4) + delta * (-4);
-
-      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
-      // Horizontal filter
-      for (k = -7; k < 8; ++k) {
-        int iy = iy4 + k;
-        if (iy < 0)
-          iy = 0;
-        else if (iy > height - 1)
-          iy = height - 1;
-
-        int sx = sx4 + beta * (k + 4);
-        for (l = -4; l < 4; ++l) {
-          int ix = ix4 + l - 3;
-          const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
-                           WARPEDPIXEL_PREC_SHIFTS;
-          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
-          const int16_t *coeffs = warped_filter[offs];
-
-          int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1);
-          for (m = 0; m < 8; ++m) {
-            int sample_x = ix + m;
-            if (sample_x < 0)
-              sample_x = 0;
-            else if (sample_x > width - 1)
-              sample_x = width - 1;
-            sum += ref[iy * stride + sample_x] * coeffs[m];
-          }
-          sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
-          assert(0 <= sum &&
-                 sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 -
-                              HORSHEAR_REDUCE_PREC_BITS)));
-          tmp[(k + 7) * 8 + (l + 4)] = sum;
-          sx += alpha;
-        }
-      }
-
-      // Vertical filter
-      for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
-        int sy = sy4 + delta * (k + 4);
-        for (l = -4; l < 4; ++l) {
-          uint16_t *p =
-              &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
-          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
-                           WARPEDPIXEL_PREC_SHIFTS;
-          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
-          const int16_t *coeffs = warped_filter[offs];
-
-          int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS -
-                              HORSHEAR_REDUCE_PREC_BITS);
-          for (m = 0; m < 8; ++m) {
-            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
-          }
-          sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
-          assert(0 <= sum && sum < (1 << (bd + 2)));
-          uint16_t px =
-              clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
-          if (conv_params->do_average)
-            *p = ROUND_POWER_OF_TWO(*p + px, 1);
-          else
-            *p = px;
-          sy += gamma;
-        }
-      }
-    }
-  }
-}
-
 #if CONFIG_CONVOLVE_ROUND
-void av1_highbd_warp_affine_post_round_c(
-    const int32_t *mat, const uint16_t *ref, int width, int height, int stride,
-    uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
-    int p_stride, int subsampling_x, int subsampling_y, int bd,
-    ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma,
-    int16_t delta) {
-  (void)pred;
-  (void)p_stride;
-  int32_t tmp[15 * 8];
-  int i, j, k, l, m;
-  const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  const int offset_bits_vert = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
+  const int reduce_bits_horiz =
+      use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
+  const int max_bits_horiz =
+      use_conv_params
+          ? bd + FILTER_BITS + 1 - conv_params->round_0
+          : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz =
+      use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
+  const int offset_bits_vert =
+      use_conv_params
+          ? bd + 2 * FILTER_BITS - conv_params->round_0
+          : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
+  if (use_conv_params) {
+    conv_params->do_post_rounding = 1;
+  }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+#else
+  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
+  const int max_bits_horiz =
+      bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
+  const int offset_bits_vert =
+      bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
+#endif
+  (void)max_bits_horiz;
 
   for (i = p_row; i < p_row + p_height; i += 8) {
     for (j = p_col; j < p_col + p_width; j += 8) {
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      // Calculate the center of this 8x8 block,
+      // project to luma coordinates (if in a subsampled chroma plane),
+      // apply the affine transformation,
+      // then convert back to the original coordinates (if necessary)
+      const int32_t src_x = (j + 4) << subsampling_x;
+      const int32_t src_y = (i + 4) << subsampling_y;
+      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+      const int32_t x4 = dst_x >> subsampling_x;
+      const int32_t y4 = dst_y >> subsampling_y;
+
+      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
       sx4 += alpha * (-4) + beta * (-4);
       sy4 += gamma * (-4) + delta * (-4);
@@ -1101,9 +1016,8 @@ void av1_highbd_warp_affine_post_round_c(
               sample_x = width - 1;
             sum += ref[iy * stride + sample_x] * coeffs[m];
           }
-          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_0);
-          assert(0 <= sum &&
-                 sum < (1 << (bd + FILTER_BITS + 1 - conv_params->round_0)));
+          sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
+          assert(0 <= sum && sum < (1 << max_bits_horiz));
           tmp[(k + 7) * 8 + (l + 4)] = sum;
           sx += alpha;
         }
@@ -1112,7 +1026,7 @@ void av1_highbd_warp_affine_post_round_c(
       // Vertical filter
       for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
         int sy = sy4 + delta * (k + 4);
-        for (l = -4; l < 4; ++l) {
+        for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
           const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
                            WARPEDPIXEL_PREC_SHIFTS;
           assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
@@ -1122,22 +1036,41 @@ void av1_highbd_warp_affine_post_round_c(
           for (m = 0; m < 8; ++m) {
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
-
-          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                (1 << (offset_bits_horiz + FILTER_BITS - conv_params->round_0 -
-                       conv_params->round_1)) -
-                (1 << (offset_bits_vert - conv_params->round_1));
-          CONV_BUF_TYPE *p =
-              &conv_params->dst[(i - p_row + k + 4) * conv_params->dst_stride +
-                                (j - p_col + l + 4)];
-          *p += sum;
+#if CONFIG_CONVOLVE_ROUND
+          if (use_conv_params) {
+            CONV_BUF_TYPE *p =
+                &conv_params
+                     ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
+                           (j - p_col + l + 4)];
+            sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+                  (1 << (offset_bits_horiz + FILTER_BITS -
+                         conv_params->round_0 - conv_params->round_1)) -
+                  (1 << (offset_bits_vert - conv_params->round_1));
+            if (conv_params->do_average)
+              *p += sum;
+            else
+              *p = sum;
+          } else {
+#else
+          {
+#endif
+            uint16_t *p =
+                &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
+            sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
+            assert(0 <= sum && sum < (1 << (bd + 2)));
+            uint16_t px =
+                clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
+            if (conv_params->do_average)
+              *p = ROUND_POWER_OF_TWO(*p + px, 1);
+            else
+              *p = px;
+          }
           sy += gamma;
         }
       }
     }
   }
 }
-#endif
 
 static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
                               int width, int height, int stride,
@@ -1160,25 +1093,10 @@ static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
 
     const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
     uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
-#if CONFIG_CONVOLVE_ROUND
-    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-      conv_params->do_post_rounding = 1;
-      av1_highbd_warp_affine_post_round(
-          mat, ref, width, height, stride, pred, p_col, p_row, p_width,
-          p_height, p_stride, subsampling_x, subsampling_y, bd, conv_params,
-          alpha, beta, gamma, delta);
-    } else {
-      av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col,
-                             p_row, p_width, p_height, p_stride, subsampling_x,
-                             subsampling_y, bd, conv_params, alpha, beta, gamma,
-                             delta);
-    }
-#else
     av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                            p_width, p_height, p_stride, subsampling_x,
                            subsampling_y, bd, conv_params, alpha, beta, gamma,
                            delta);
-#endif
   } else {
     highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
                           p_width, p_height, p_stride, subsampling_x,
@@ -1251,8 +1169,8 @@ static void warp_plane_old(const WarpedMotionParams *const wm,
       in[0] = j;
       in[1] = i;
       projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
-      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
-      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, SCALE_SUBPEL_BITS);
+      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, SCALE_SUBPEL_BITS);
       if (conv_params->do_average)
         pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
             pred[(j - p_col) + (i - p_row) * p_stride] +
@@ -1359,143 +1277,51 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
   int32_t tmp[15 * 8];
   int i, j, k, l, m;
   const int bd = 8;
-
-  for (i = p_row; i < p_row + p_height; i += 8) {
-    for (j = p_col; j < p_col + p_width; j += 8) {
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-
-      sx4 += alpha * (-4) + beta * (-4);
-      sy4 += gamma * (-4) + delta * (-4);
-
-      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
-
-      // Horizontal filter
-      for (k = -7; k < 8; ++k) {
-        // Clamp to top/bottom edge of the frame
-        int iy = iy4 + k;
-        if (iy < 0)
-          iy = 0;
-        else if (iy > height - 1)
-          iy = height - 1;
-
-        int sx = sx4 + beta * (k + 4);
-
-        for (l = -4; l < 4; ++l) {
-          int ix = ix4 + l - 3;
-          // At this point, sx = sx4 + alpha * l + beta * k
-          const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
-                           WARPEDPIXEL_PREC_SHIFTS;
-          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
-          const int16_t *coeffs = warped_filter[offs];
-
-          int32_t sum = 1 << (bd + WARPEDPIXEL_FILTER_BITS - 1);
-          for (m = 0; m < 8; ++m) {
-            // Clamp to left/right edge of the frame
-            int sample_x = ix + m;
-            if (sample_x < 0)
-              sample_x = 0;
-            else if (sample_x > width - 1)
-              sample_x = width - 1;
-
-            sum += ref[iy * stride + sample_x] * coeffs[m];
-          }
-          sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
-          assert(0 <= sum &&
-                 sum < (1 << (bd + WARPEDPIXEL_FILTER_BITS + 1 -
-                              HORSHEAR_REDUCE_PREC_BITS)));
-          tmp[(k + 7) * 8 + (l + 4)] = sum;
-          sx += alpha;
-        }
-      }
-
-      // Vertical filter
-      for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
-        int sy = sy4 + delta * (k + 4);
-        for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
-          uint8_t *p =
-              &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
-          // At this point, sy = sy4 + gamma * l + delta * k
-          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
-                           WARPEDPIXEL_PREC_SHIFTS;
-          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
-          const int16_t *coeffs = warped_filter[offs];
-
-          int32_t sum = 1 << (bd + 2 * WARPEDPIXEL_FILTER_BITS -
-                              HORSHEAR_REDUCE_PREC_BITS);
-          for (m = 0; m < 8; ++m) {
-            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
-          }
-          sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
-          assert(0 <= sum && sum < (1 << (bd + 2)));
-          uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
-          if (conv_params->do_average)
-            *p = ROUND_POWER_OF_TWO(*p + px, 1);
-          else
-            *p = px;
-          sy += gamma;
-        }
-      }
-    }
-  }
-}
-
 #if CONFIG_CONVOLVE_ROUND
-void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref,
-                                  int width, int height, int stride,
-                                  uint8_t *pred, int p_col, int p_row,
-                                  int p_width, int p_height, int p_stride,
-                                  int subsampling_x, int subsampling_y,
-                                  ConvolveParams *conv_params, int16_t alpha,
-                                  int16_t beta, int16_t gamma, int16_t delta) {
-  (void)pred;
-  (void)p_stride;
-  int32_t tmp[15 * 8];
-  int i, j, k, l, m;
-  const int bd = 8;
-  const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  const int offset_bits_vert = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
+  const int reduce_bits_horiz =
+      use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
+  const int max_bits_horiz =
+      use_conv_params
+          ? bd + FILTER_BITS + 1 - conv_params->round_0
+          : bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz =
+      use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
+  const int offset_bits_vert =
+      use_conv_params
+          ? bd + 2 * FILTER_BITS - conv_params->round_0
+          : bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
+  if (use_conv_params) {
+    conv_params->do_post_rounding = 1;
+  }
   assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+#else
+  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
+  const int max_bits_horiz =
+      bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
+  const int offset_bits_vert =
+      bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
+#endif
+  (void)max_bits_horiz;
 
   for (i = p_row; i < p_row + p_height; i += 8) {
     for (j = p_col; j < p_col + p_width; j += 8) {
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      // Calculate the center of this 8x8 block,
+      // project to luma coordinates (if in a subsampled chroma plane),
+      // apply the affine transformation,
+      // then convert back to the original coordinates (if necessary)
+      const int32_t src_x = (j + 4) << subsampling_x;
+      const int32_t src_y = (i + 4) << subsampling_y;
+      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+      const int32_t x4 = dst_x >> subsampling_x;
+      const int32_t y4 = dst_y >> subsampling_y;
+
+      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
       sx4 += alpha * (-4) + beta * (-4);
       sy4 += gamma * (-4) + delta * (-4);
@@ -1533,9 +1359,8 @@ void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref,
 
             sum += ref[iy * stride + sample_x] * coeffs[m];
           }
-          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_0);
-          assert(0 <= sum &&
-                 sum < (1 << (bd + FILTER_BITS + 1 - conv_params->round_0)));
+          sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
+          assert(0 <= sum && sum < (1 << max_bits_horiz));
           tmp[(k + 7) * 8 + (l + 4)] = sum;
           sx += alpha;
         }
@@ -1552,26 +1377,43 @@ void av1_warp_affine_post_round_c(const int32_t *mat, const uint8_t *ref,
           const int16_t *coeffs = warped_filter[offs];
 
           int32_t sum = 1 << offset_bits_vert;
-
           for (m = 0; m < 8; ++m) {
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
-
-          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
-                (1 << (offset_bits_horiz + FILTER_BITS - conv_params->round_0 -
-                       conv_params->round_1)) -
-                (1 << (offset_bits_vert - conv_params->round_1));
-          CONV_BUF_TYPE *p =
-              &conv_params->dst[(i - p_row + k + 4) * conv_params->dst_stride +
-                                (j - p_col + l + 4)];
-          *p += sum;
+#if CONFIG_CONVOLVE_ROUND
+          if (use_conv_params) {
+            CONV_BUF_TYPE *p =
+                &conv_params
+                     ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
+                           (j - p_col + l + 4)];
+            sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+                  (1 << (offset_bits_horiz + FILTER_BITS -
+                         conv_params->round_0 - conv_params->round_1)) -
+                  (1 << (offset_bits_vert - conv_params->round_1));
+            if (conv_params->do_average)
+              *p += sum;
+            else
+              *p = sum;
+          } else {
+#else
+          {
+#endif
+            uint8_t *p =
+                &pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
+            sum = ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS);
+            assert(0 <= sum && sum < (1 << (bd + 2)));
+            uint8_t px = clip_pixel(sum - (1 << (bd - 1)) - (1 << bd));
+            if (conv_params->do_average)
+              *p = ROUND_POWER_OF_TWO(*p + px, 1);
+            else
+              *p = px;
+          }
           sy += gamma;
         }
       }
     }
   }
 }
-#endif  // CONFIG_CONVOLVE_ROUND
 
 static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
                        int width, int height, int stride, uint8_t *pred,
@@ -1590,23 +1432,9 @@ static void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref,
     const int16_t gamma = wm->gamma;
     const int16_t delta = wm->delta;
 
-#if CONFIG_CONVOLVE_ROUND
-    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
-      conv_params->do_post_rounding = 1;
-      av1_warp_affine_post_round(mat, ref, width, height, stride, pred, p_col,
-                                 p_row, p_width, p_height, p_stride,
-                                 subsampling_x, subsampling_y, conv_params,
-                                 alpha, beta, gamma, delta);
-    } else {
-      av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
-                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
-                      conv_params, alpha, beta, gamma, delta);
-    }
-#else
     av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                     p_width, p_height, p_stride, subsampling_x, subsampling_y,
                     conv_params, alpha, beta, gamma, delta);
-#endif
   } else {
     warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
                    p_height, p_stride, subsampling_x, subsampling_y, x_scale,
diff --git a/third_party/aom/av1/common/warped_motion.h b/third_party/aom/av1/common/warped_motion.h
index 845eb9a43..e05f6a85f 100644
--- a/third_party/aom/av1/common/warped_motion.h
+++ b/third_party/aom/av1/common/warped_motion.h
@@ -30,10 +30,9 @@
 #define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
 
 #if WARPED_MOTION_SORT_SAMPLES
-// #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
-// Search half bsize on the top and half bsize on the left, 1 upper-left block,
+// Search 1 row on the top and 1 column on the left, 1 upper-left block,
 // 1 upper-right block.
-#define SAMPLES_ARRAY_SIZE ((MAX_MIB_SIZE * MAX_MIB_SIZE + 2) * 2)
+#define SAMPLES_ARRAY_SIZE ((MAX_MIB_SIZE * 2 + 2) * 2)
 #else
 #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
 #endif  // WARPED_MOTION_SORT_SAMPLES
diff --git a/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
new file mode 100644
index 000000000..1f0fedb2a
--- /dev/null
+++ b/third_party/aom/av1/common/x86/av1_convolve_scale_sse4.c
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "./aom_dsp_rtcd.h"
+#include "aom_dsp/aom_convolve.h"
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
+#include "av1/common/convolve.h"
+
+// Make a mask for coefficients of 10/12 tap filters. The coefficients are
+// packed "89ab89ab". If it's a 12-tap filter, we want all 1's; if it's a
+// 10-tap filter, we want "11001100" to just match the 8,9 terms.
+static __m128i make_1012_mask(int ntaps) {
+  uint32_t low = 0xffffffff;
+  uint32_t high = (ntaps == 12) ? low : 0;
+  return _mm_set_epi32(high, low, high, low);
+}
+
+// Zero-extend the given input operand to an entire __m128i register.
+//
+// Note that there's almost an intrinsic to do this but 32-bit Visual Studio
+// doesn't have _mm_set_epi64x so we have to do it by hand.
+static __m128i extend_32_to_128(uint32_t x) {
+  return _mm_set_epi32(0, 0, 0, x);
+}
+
+// Load an SSE register from p and bitwise AND with a.
+static __m128i load_and_128i(const void *p, __m128i a) {
+  const __m128d ad = _mm_castsi128_pd(a);
+  const __m128d bd = _mm_load1_pd((const double *)p);
+  return _mm_castpd_si128(_mm_and_pd(ad, bd));
+}
+
+// The horizontal filter for av1_convolve_2d_scale_sse4_1. This is the more
+// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
+// hfilter8.
+static void hfilter(const uint8_t *src, int src_stride, int32_t *dst, int w,
+                    int h, int subpel_x_qn, int x_step_qn,
+                    const InterpFilterParams *filter_params, unsigned round) {
+  const int bd = 8;
+  const int ntaps = filter_params->taps;
+  assert(ntaps == 10 || ntaps == 12);
+
+  src -= ntaps / 2 - 1;
+
+  // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
+  // out the unneeded entries.
+  const __m128i hicoeff_mask = make_1012_mask(ntaps);
+
+  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(round);
+
+  int x_qn = subpel_x_qn;
+  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+    const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
+    // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
+    // are masked out with hicoeff_mask.
+    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
+    const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
+    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
+
+    int y;
+    for (y = 0; y <= h - 4; y += 4) {
+      const uint8_t *const src0 = src_col + y * src_stride;
+      const uint8_t *const src1 = src0 + 1 * src_stride;
+      const uint8_t *const src2 = src0 + 2 * src_stride;
+      const uint8_t *const src3 = src0 + 3 * src_stride;
+
+      // Load up source data. This is 8-bit input data, so each load gets 16
+      // pixels (we need at most 12)
+      const __m128i data08 = _mm_loadu_si128((__m128i *)src0);
+      const __m128i data18 = _mm_loadu_si128((__m128i *)src1);
+      const __m128i data28 = _mm_loadu_si128((__m128i *)src2);
+      const __m128i data38 = _mm_loadu_si128((__m128i *)src3);
+
+      // Now zero-extend up to 16-bit precision by interleaving with zeros. For
+      // the "high" pixels (8 to 11), interleave first (so that the expansion
+      // to 16-bits operates on an entire register).
+      const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
+      const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
+      const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
+      const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
+      const __m128i data01hi8 = _mm_unpackhi_epi32(data08, data18);
+      const __m128i data23hi8 = _mm_unpackhi_epi32(data28, data38);
+      const __m128i data01hi = _mm_unpacklo_epi8(data01hi8, zero);
+      const __m128i data23hi = _mm_unpacklo_epi8(data23hi8, zero);
+
+      // Multiply by coefficients
+      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
+      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
+      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
+      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+      const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
+      const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
+
+      // Reduce horizontally and add
+      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
+      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
+      const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
+      const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
+      const __m128i conv = _mm_add_epi32(convlo, convhi);
+
+      // Divide down by (1 << round), rounding to nearest.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+
+      // Write transposed to the output
+      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+    }
+    for (; y < h; ++y) {
+      const uint8_t *const src_row = src_col + y * src_stride;
+
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (int k = 0; k < ntaps; ++k) {
+        sum += filter[k] * src_row[k];
+      }
+
+      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
+    }
+  }
+}
+
+// A specialised version of hfilter, the horizontal filter for
+// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
+static void hfilter8(const uint8_t *src, int src_stride, int32_t *dst, int w,
+                     int h, int subpel_x_qn, int x_step_qn,
+                     const InterpFilterParams *filter_params, unsigned round) {
+  const int bd = 8;
+  const int ntaps = 8;
+
+  src -= ntaps / 2 - 1;
+
+  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(round);
+
+  int x_qn = subpel_x_qn;
+  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+    const uint8_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // Load the filter coefficients
+    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
+    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
+
+    int y;
+    for (y = 0; y <= h - 4; y += 4) {
+      const uint8_t *const src0 = src_col + y * src_stride;
+      const uint8_t *const src1 = src0 + 1 * src_stride;
+      const uint8_t *const src2 = src0 + 2 * src_stride;
+      const uint8_t *const src3 = src0 + 3 * src_stride;
+
+      // Load up source data. This is 8-bit input data; each load is just
+      // loading the lower half of the register and gets 8 pixels
+      const __m128i data08 = _mm_loadl_epi64((__m128i *)src0);
+      const __m128i data18 = _mm_loadl_epi64((__m128i *)src1);
+      const __m128i data28 = _mm_loadl_epi64((__m128i *)src2);
+      const __m128i data38 = _mm_loadl_epi64((__m128i *)src3);
+
+      // Now zero-extend up to 16-bit precision by interleaving with
+      // zeros. Drop the upper half of each register (which just had zeros)
+      const __m128i data0lo = _mm_unpacklo_epi8(data08, zero);
+      const __m128i data1lo = _mm_unpacklo_epi8(data18, zero);
+      const __m128i data2lo = _mm_unpacklo_epi8(data28, zero);
+      const __m128i data3lo = _mm_unpacklo_epi8(data38, zero);
+
+      // Multiply by coefficients
+      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
+      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
+      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
+      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+
+      // Reduce horizontally and add
+      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
+      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
+      const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
+
+      // Divide down by (1 << round), rounding to nearest.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+
+      // Write transposed to the output
+      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+    }
+    for (; y < h; ++y) {
+      const uint8_t *const src_row = src_col + y * src_stride;
+
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (int k = 0; k < ntaps; ++k) {
+        sum += filter[k] * src_row[k];
+      }
+
+      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
+    }
+  }
+}
+
+// Do a 12-tap convolution with the given coefficients, loading data from src.
+static __m128i convolve_32(const int32_t *src, __m128i coeff03, __m128i coeff47,
+                           __m128i coeff8d) {
+  const __m128i data03 = _mm_loadu_si128((__m128i *)src);
+  const __m128i data47 = _mm_loadu_si128((__m128i *)(src + 4));
+  const __m128i data8d = _mm_loadu_si128((__m128i *)(src + 8));
+  const __m128i conv03 = _mm_mullo_epi32(data03, coeff03);
+  const __m128i conv47 = _mm_mullo_epi32(data47, coeff47);
+  const __m128i conv8d = _mm_mullo_epi32(data8d, coeff8d);
+  return _mm_add_epi32(_mm_add_epi32(conv03, conv47), conv8d);
+}
+
+// Do an 8-tap convolution with the given coefficients, loading data from src.
+static __m128i convolve_32_8(const int32_t *src, __m128i coeff03,
+                             __m128i coeff47) {
+  const __m128i data03 = _mm_loadu_si128((__m128i *)src);
+  const __m128i data47 = _mm_loadu_si128((__m128i *)(src + 4));
+  const __m128i conv03 = _mm_mullo_epi32(data03, coeff03);
+  const __m128i conv47 = _mm_mullo_epi32(data47, coeff47);
+  return _mm_add_epi32(conv03, conv47);
+}
+
+// The vertical filter for av1_convolve_2d_scale_sse4_1. This is the more
+// general version, supporting 10 and 12 tap filters. For 8-tap filters, use
+// vfilter8.
+static void vfilter(const int32_t *src, int src_stride, int32_t *dst,
+                    int dst_stride, int w, int h, int subpel_y_qn,
+                    int y_step_qn, const InterpFilterParams *filter_params,
+                    const ConvolveParams *conv_params, int bd) {
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int ntaps = filter_params->taps;
+
+  // Construct a mask with which we'll AND filter coefficients 89ab to zero out
+  // the unneeded entries. The upper bits of this mask are unused.
+  const __m128i hicoeff_mask = make_1012_mask(ntaps);
+
+  int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(conv_params->round_1);
+
+  const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
+                         (1 << (offset_bits - conv_params->round_1 - 1)));
+  const __m128i sub = _mm_set1_epi32(sub32);
+
+  int y_qn = subpel_y_qn;
+  for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+    const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // Load up coefficients for the filter and sign-extend to 32-bit precision
+    // (to do so, calculate sign bits and then interleave)
+    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
+    const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
+    const __m128i coeffhi16 = load_and_128i(filter + 8, hicoeff_mask);
+    const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
+    const __m128i csignhi16 = _mm_cmplt_epi16(coeffhi16, zero);
+    const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
+    const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
+    const __m128i coeff8d = _mm_unpacklo_epi16(coeffhi16, csignhi16);
+
+    int x;
+    for (x = 0; x <= w - 4; x += 4) {
+      const int32_t *const src0 = src_y + x * src_stride;
+      const int32_t *const src1 = src0 + 1 * src_stride;
+      const int32_t *const src2 = src0 + 2 * src_stride;
+      const int32_t *const src3 = src0 + 3 * src_stride;
+
+      // Load the source data for the three rows, adding the three registers of
+      // convolved products to one as we go (conv0..conv3) to avoid the
+      // register pressure getting too high.
+      const __m128i conv0 = convolve_32(src0, coeff03, coeff47, coeff8d);
+      const __m128i conv1 = convolve_32(src1, coeff03, coeff47, coeff8d);
+      const __m128i conv2 = convolve_32(src2, coeff03, coeff47, coeff8d);
+      const __m128i conv3 = convolve_32(src3, coeff03, coeff47, coeff8d);
+
+      // Now reduce horizontally to get one lane for each result
+      const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
+      const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
+      const __m128i conv = _mm_hadd_epi32(conv01, conv23);
+
+      // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+      const __m128i subbed = _mm_sub_epi32(shifted, sub);
+
+      int32_t *dst_x = dst + y * dst_stride + x;
+      const __m128i result =
+          (conv_params->do_average)
+              ? _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x))
+              : subbed;
+
+      _mm_storeu_si128((__m128i *)dst_x, result);
+    }
+    for (; x < w; ++x) {
+      const int32_t *src_x = src_y + x * src_stride;
+      CONV_BUF_TYPE sum = 1 << offset_bits;
+      for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+  }
+}
+
+// A specialised version of vfilter, the vertical filter for
+// av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
+static void vfilter8(const int32_t *src, int src_stride, int32_t *dst,
+                     int dst_stride, int w, int h, int subpel_y_qn,
+                     int y_step_qn, const InterpFilterParams *filter_params,
+                     const ConvolveParams *conv_params, int bd) {
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int ntaps = 8;
+
+  int32_t round_add32 = (1 << conv_params->round_1) / 2 + (1 << offset_bits);
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(conv_params->round_1);
+
+  const int32_t sub32 = ((1 << (offset_bits - conv_params->round_1)) +
+                         (1 << (offset_bits - conv_params->round_1 - 1)));
+  const __m128i sub = _mm_set1_epi32(sub32);
+
+  int y_qn = subpel_y_qn;
+  for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+    const int32_t *src_y = src + (y_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // Load up coefficients for the filter and sign-extend to 32-bit precision
+    // (to do so, calculate sign bits and then interleave)
+    const __m128i zero = _mm_castps_si128(_mm_setzero_ps());
+    const __m128i coeff0716 = _mm_loadu_si128((__m128i *)filter);
+    const __m128i csign0716 = _mm_cmplt_epi16(coeff0716, zero);
+    const __m128i coeff03 = _mm_unpacklo_epi16(coeff0716, csign0716);
+    const __m128i coeff47 = _mm_unpackhi_epi16(coeff0716, csign0716);
+
+    int x;
+    for (x = 0; x <= w - 4; x += 4) {
+      const int32_t *const src0 = src_y + x * src_stride;
+      const int32_t *const src1 = src0 + 1 * src_stride;
+      const int32_t *const src2 = src0 + 2 * src_stride;
+      const int32_t *const src3 = src0 + 3 * src_stride;
+
+      // Load the source data for the three rows, adding the three registers of
+      // convolved products to one as we go (conv0..conv3) to avoid the
+      // register pressure getting too high.
+      const __m128i conv0 = convolve_32_8(src0, coeff03, coeff47);
+      const __m128i conv1 = convolve_32_8(src1, coeff03, coeff47);
+      const __m128i conv2 = convolve_32_8(src2, coeff03, coeff47);
+      const __m128i conv3 = convolve_32_8(src3, coeff03, coeff47);
+
+      // Now reduce horizontally to get one lane for each result
+      const __m128i conv01 = _mm_hadd_epi32(conv0, conv1);
+      const __m128i conv23 = _mm_hadd_epi32(conv2, conv3);
+      const __m128i conv = _mm_hadd_epi32(conv01, conv23);
+
+      // Divide down by (1 << round_1), rounding to nearest and subtract sub32.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+      const __m128i subbed = _mm_sub_epi32(shifted, sub);
+
+      int32_t *dst_x = dst + y * dst_stride + x;
+      const __m128i result =
+          (conv_params->do_average)
+              ? _mm_add_epi32(subbed, _mm_loadu_si128((__m128i *)dst_x))
+              : subbed;
+
+      _mm_storeu_si128((__m128i *)dst_x, result);
+    }
+    for (; x < w; ++x) {
+      const int32_t *src_x = src_y + x * src_stride;
+      CONV_BUF_TYPE sum = 1 << offset_bits;
+      for (int k = 0; k < ntaps; ++k) sum += filter[k] * src_x[k];
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sub32;
+      if (conv_params->do_average)
+        dst[y * dst_stride + x] += res;
+      else
+        dst[y * dst_stride + x] = res;
+    }
+  }
+}
+
+void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
+                                  CONV_BUF_TYPE *dst, int dst_stride, int w,
+                                  int h, InterpFilterParams *filter_params_x,
+                                  InterpFilterParams *filter_params_y,
+                                  const int subpel_x_qn, const int x_step_qn,
+                                  const int subpel_y_qn, const int y_step_qn,
+                                  ConvolveParams *conv_params) {
+  int32_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+
+  const int xtaps = filter_params_x->taps;
+  const int ytaps = filter_params_y->taps;
+
+  const int fo_vert = ytaps / 2 - 1;
+
+  // horizontal filter
+  if (xtaps == 8)
+    hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
+             x_step_qn, filter_params_x, conv_params->round_0);
+  else
+    hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h, subpel_x_qn,
+            x_step_qn, filter_params_x, conv_params->round_0);
+
+  // vertical filter (input is transposed)
+  if (ytaps == 8)
+    vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+             filter_params_y, conv_params, 8);
+  else
+    vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+            filter_params_y, conv_params, 8);
+}
+
+#if CONFIG_HIGHBITDEPTH
+// An wrapper to generate the SHUFPD instruction with __m128i types (just
+// writing _mm_shuffle_pd at the callsites gets a bit ugly because of the
+// casts)
+static __m128i mm_shuffle0_si128(__m128i a, __m128i b) {
+  __m128d ad = _mm_castsi128_pd(a);
+  __m128d bd = _mm_castsi128_pd(b);
+  return _mm_castpd_si128(_mm_shuffle_pd(ad, bd, 0));
+}
+
+// The horizontal filter for av1_highbd_convolve_2d_scale_sse4_1. This
+// is the more general version, supporting 10 and 12 tap filters. For
+// 8-tap filters, use hfilter8.
+static void highbd_hfilter(const uint16_t *src, int src_stride, int32_t *dst,
+                           int w, int h, int subpel_x_qn, int x_step_qn,
+                           const InterpFilterParams *filter_params,
+                           unsigned round, int bd) {
+  const int ntaps = filter_params->taps;
+  assert(ntaps == 10 || ntaps == 12);
+
+  src -= ntaps / 2 - 1;
+
+  // Construct a mask with which we'll AND filter coefficients 89ab89ab to zero
+  // out the unneeded entries.
+  const __m128i hicoeff_mask = make_1012_mask(ntaps);
+
+  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(round);
+
+  int x_qn = subpel_x_qn;
+  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+    const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // The "lo" coefficients are coefficients 0..7. For a 12-tap filter, the
+    // "hi" coefficients are arranged as 89ab89ab. For a 10-tap filter, they
+    // are masked out with hicoeff_mask.
+    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
+    const __m128i coeffhi = load_and_128i(filter + 8, hicoeff_mask);
+
+    int y;
+    for (y = 0; y <= h - 4; y += 4) {
+      const uint16_t *const src0 = src_col + y * src_stride;
+      const uint16_t *const src1 = src0 + 1 * src_stride;
+      const uint16_t *const src2 = src0 + 2 * src_stride;
+      const uint16_t *const src3 = src0 + 3 * src_stride;
+
+      // Load up source data. This is 16-bit input data, so each load gets 8
+      // pixels (we need at most 12)
+      const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
+      const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
+      const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
+      const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
+      const __m128i data0hi = _mm_loadu_si128((__m128i *)(src0 + 8));
+      const __m128i data1hi = _mm_loadu_si128((__m128i *)(src1 + 8));
+      const __m128i data2hi = _mm_loadu_si128((__m128i *)(src2 + 8));
+      const __m128i data3hi = _mm_loadu_si128((__m128i *)(src3 + 8));
+
+      // The "hi" data has rubbish in the top half so interleave pairs together
+      // to minimise the calculation we need to do.
+      const __m128i data01hi = mm_shuffle0_si128(data0hi, data1hi);
+      const __m128i data23hi = mm_shuffle0_si128(data2hi, data3hi);
+
+      // Multiply by coefficients
+      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
+      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
+      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
+      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+      const __m128i conv01hi = _mm_madd_epi16(data01hi, coeffhi);
+      const __m128i conv23hi = _mm_madd_epi16(data23hi, coeffhi);
+
+      // Reduce horizontally and add
+      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
+      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
+      const __m128i convlo = _mm_hadd_epi32(conv01lo, conv23lo);
+      const __m128i convhi = _mm_hadd_epi32(conv01hi, conv23hi);
+      const __m128i conv = _mm_add_epi32(convlo, convhi);
+
+      // Divide down by (1 << round), rounding to nearest.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+
+      // Write transposed to the output
+      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+    }
+    for (; y < h; ++y) {
+      const uint16_t *const src_row = src_col + y * src_stride;
+
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (int k = 0; k < ntaps; ++k) {
+        sum += filter[k] * src_row[k];
+      }
+
+      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
+    }
+  }
+}
+
+// A specialised version of hfilter, the horizontal filter for
+// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
+// filters.
+static void highbd_hfilter8(const uint16_t *src, int src_stride, int32_t *dst,
+                            int w, int h, int subpel_x_qn, int x_step_qn,
+                            const InterpFilterParams *filter_params,
+                            unsigned round, int bd) {
+  const int ntaps = 8;
+
+  src -= ntaps / 2 - 1;
+
+  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = extend_32_to_128(round);
+
+  int x_qn = subpel_x_qn;
+  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+    const uint16_t *const src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
+    const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+    assert(filter_idx < SUBPEL_SHIFTS);
+    const int16_t *filter =
+        av1_get_interp_filter_subpel_kernel(*filter_params, filter_idx);
+
+    // Load the filter coefficients
+    const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
+
+    int y;
+    for (y = 0; y <= h - 4; y += 4) {
+      const uint16_t *const src0 = src_col + y * src_stride;
+      const uint16_t *const src1 = src0 + 1 * src_stride;
+      const uint16_t *const src2 = src0 + 2 * src_stride;
+      const uint16_t *const src3 = src0 + 3 * src_stride;
+
+      // Load up source data. This is 16-bit input data, so each load gets the 8
+      // pixels we need.
+      const __m128i data0lo = _mm_loadu_si128((__m128i *)src0);
+      const __m128i data1lo = _mm_loadu_si128((__m128i *)src1);
+      const __m128i data2lo = _mm_loadu_si128((__m128i *)src2);
+      const __m128i data3lo = _mm_loadu_si128((__m128i *)src3);
+
+      // Multiply by coefficients
+      const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
+      const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
+      const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
+      const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+
+      // Reduce horizontally and add
+      const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
+      const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
+      const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
+
+      // Divide down by (1 << round), rounding to nearest.
+      const __m128i shifted =
+          _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+
+      // Write transposed to the output
+      _mm_storeu_si128((__m128i *)(dst + y + x * h), shifted);
+    }
+    for (; y < h; ++y) {
+      const uint16_t *const src_row = src_col + y * src_stride;
+
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (int k = 0; k < ntaps; ++k) {
+        sum += filter[k] * src_row[k];
+      }
+
+      dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
+    }
+  }
+}
+
+void av1_highbd_convolve_2d_scale_sse4_1(
+    const uint16_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride,
+    int w, int h, InterpFilterParams *filter_params_x,
+    InterpFilterParams *filter_params_y, const int subpel_x_qn,
+    const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
+    ConvolveParams *conv_params, int bd) {
+  int32_t tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+
+  const int xtaps = filter_params_x->taps;
+  const int ytaps = filter_params_y->taps;
+  const int fo_vert = ytaps / 2 - 1;
+
+  // horizontal filter
+  if (xtaps == 8)
+    highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
+                    subpel_x_qn, x_step_qn, filter_params_x,
+                    conv_params->round_0, bd);
+  else
+    highbd_hfilter(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
+                   subpel_x_qn, x_step_qn, filter_params_x,
+                   conv_params->round_0, bd);
+
+  // vertical filter (input is transposed)
+  if (ytaps == 8)
+    vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+             filter_params_y, conv_params, bd);
+  else
+    vfilter(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+            filter_params_y, conv_params, bd);
+}
+#endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c b/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c
index f7824b627..58ede028a 100644
--- a/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c
+++ b/third_party/aom/av1/common/x86/av1_fwd_txfm2d_sse4.c
@@ -74,17 +74,9 @@ static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
 }
 
 void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
-                                 int stride, int tx_type, int bd) {
+                                 int stride, TX_TYPE tx_type, int bd) {
   DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]);
   TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
   (void)bd;
   fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
 }
-
-void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
-                                 int stride, int tx_type, int bd) {
-  DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
-  TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
-  (void)bd;
-  fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
-}
diff --git a/third_party/aom/av1/common/x86/convolve_2d_sse2.c b/third_party/aom/av1/common/x86/convolve_2d_sse2.c
index 46c2674ca..e4d352c0e 100644
--- a/third_party/aom/av1/common/x86/convolve_2d_sse2.c
+++ b/third_party/aom/av1/common/x86/convolve_2d_sse2.c
@@ -31,6 +31,7 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
   int i, j;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const int do_average = conv_params->do_average;
   const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
   const __m128i zero = _mm_setzero_si128();
@@ -181,9 +182,15 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
 
         // Accumulate values into the destination buffer
         __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
-        _mm_storeu_si128(p + 1,
-                         _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        if (do_average) {
+          _mm_storeu_si128(p + 0,
+                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+          _mm_storeu_si128(p + 1,
+                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        } else {
+          _mm_storeu_si128(p + 0, res_lo_round);
+          _mm_storeu_si128(p + 1, res_hi_round);
+        }
       }
     }
   }
@@ -204,6 +211,7 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
   int i, j;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const int do_average = conv_params->do_average;
   const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
   const __m128i zero = _mm_setzero_si128();
@@ -357,9 +365,15 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
 
         // Accumulate values into the destination buffer
         __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
-        _mm_storeu_si128(p + 1,
-                         _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        if (do_average) {
+          _mm_storeu_si128(p + 0,
+                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+          _mm_storeu_si128(p + 1,
+                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        } else {
+          _mm_storeu_si128(p + 0, res_lo_round);
+          _mm_storeu_si128(p + 1, res_hi_round);
+        }
       }
     }
   }
diff --git a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
index ff4a0a0fe..195f0f570 100644
--- a/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/third_party/aom/av1/common/x86/highbd_convolve_2d_ssse3.c
@@ -32,6 +32,7 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
   int i, j;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
+  const int do_average = conv_params->do_average;
   const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
   /* Horizontal filter */
@@ -185,9 +186,15 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
 
         // Accumulate values into the destination buffer
         __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
-        _mm_storeu_si128(p + 1,
-                         _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        if (do_average) {
+          _mm_storeu_si128(p + 0,
+                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+          _mm_storeu_si128(p + 1,
+                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        } else {
+          _mm_storeu_si128(p + 0, res_lo_round);
+          _mm_storeu_si128(p + 1, res_hi_round);
+        }
       }
     }
   }
@@ -204,6 +211,7 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
   int im_h = h + filter_params_y->taps - 1;
   int im_stride = MAX_SB_SIZE;
   int i, j;
+  const int do_average = conv_params->do_average;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
   const uint16_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
@@ -362,9 +370,15 @@ void av1_highbd_convolve_2d_ssse3(const uint16_t *src, int src_stride,
 
         // Accumulate values into the destination buffer
         __m128i *const p = (__m128i *)&dst[i * dst_stride + j];
-        _mm_storeu_si128(p, _mm_add_epi32(_mm_loadu_si128(p), res_lo_round));
-        _mm_storeu_si128(p + 1,
-                         _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        if (do_average) {
+          _mm_storeu_si128(p + 0,
+                           _mm_add_epi32(_mm_loadu_si128(p + 0), res_lo_round));
+          _mm_storeu_si128(p + 1,
+                           _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi_round));
+        } else {
+          _mm_storeu_si128(p + 0, res_lo_round);
+          _mm_storeu_si128(p + 1, res_hi_round);
+        }
       }
     }
   }
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
index dd2a681bc..0e833e6d9 100644
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/third_party/aom/av1/common/x86/highbd_inv_txfm_avx2.c
@@ -599,7 +599,7 @@ static void idct32_avx2(__m256i *in, __m256i *out, int bit) {
 }
 
 void av1_inv_txfm2d_add_32x32_avx2(const int32_t *coeff, uint16_t *output,
-                                   int stride, int tx_type, int bd) {
+                                   int stride, TX_TYPE tx_type, int bd) {
   __m256i in[128], out[128];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
diff --git a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
index a93699f0b..8613bed86 100644
--- a/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/third_party/aom/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -230,7 +230,7 @@ static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
 }
 
 void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
-                                   int stride, int tx_type, int bd) {
+                                   int stride, TX_TYPE tx_type, int bd) {
   __m128i in[4];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
@@ -706,7 +706,7 @@ static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
 }
 
 void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
-                                   int stride, int tx_type, int bd) {
+                                   int stride, TX_TYPE tx_type, int bd) {
   __m128i in[16], out[16];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
@@ -1316,7 +1316,7 @@ static void round_shift_16x16(__m128i *in, int shift) {
 }
 
 void av1_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
-                                     int stride, int tx_type, int bd) {
+                                     int stride, TX_TYPE tx_type, int bd) {
   __m128i in[64], out[64];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
diff --git a/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c b/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c
index 35d637f72..71b0ec7a3 100644
--- a/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c
+++ b/third_party/aom/av1/common/x86/highbd_warp_plane_ssse3.c
@@ -28,6 +28,20 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
 #error "HORSHEAR_REDUCE_PREC_BITS < 5 not currently supported by SSSE3 filter"
 #endif
   int i, j, k;
+#if CONFIG_CONVOLVE_ROUND
+  const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
+  const int reduce_bits_horiz =
+      use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz =
+      use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
+  if (use_conv_params) {
+    conv_params->do_post_rounding = 1;
+  }
+  assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+#else
+  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
+#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -43,30 +57,17 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
 
   for (i = 0; i < p_height; i += 8) {
     for (j = 0; j < p_width; j += 8) {
-      // (x, y) coordinates of the center of this block in the destination
-      // image
-      const int32_t dst_x = p_col + j + 4;
-      const int32_t dst_y = p_row + i + 4;
-
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      const int32_t src_x = (p_col + j + 4) << subsampling_x;
+      const int32_t src_y = (p_row + i + 4) << subsampling_y;
+      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+      const int32_t x4 = dst_x >> subsampling_x;
+      const int32_t y4 = dst_y >> subsampling_y;
+
+      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
       // Add in all the constant terms, including rounding and offset
       sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
@@ -154,9 +155,8 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
           // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
           const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
 
-          const __m128i round_const =
-              _mm_set1_epi32((1 << (bd + WARPEDPIXEL_FILTER_BITS - 1)) +
-                             ((1 << HORSHEAR_REDUCE_PREC_BITS) >> 1));
+          const __m128i round_const = _mm_set1_epi32(
+              (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
 
           // Calculate filtered results
           const __m128i res_0 = _mm_madd_epi16(src, coeff_0);
@@ -169,8 +169,8 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
 
           __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
                                            _mm_add_epi32(res_2, res_6));
-          res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
-                                    HORSHEAR_REDUCE_PREC_BITS);
+          res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
+                                   _mm_cvtsi32_si128(reduce_bits_horiz));
 
           // Filter odd-index pixels
           const __m128i tmp_1 = _mm_loadu_si128(
@@ -207,8 +207,8 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
 
           __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
                                           _mm_add_epi32(res_3, res_7));
-          res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
-                                   HORSHEAR_REDUCE_PREC_BITS);
+          res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
+                                  _mm_cvtsi32_si128(reduce_bits_horiz));
 
           // Combine results into one register.
           // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
@@ -299,39 +299,65 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
                                               _mm_add_epi32(res_5, res_7));
 
         // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        // Round and pack into 8 bits
-        const __m128i round_const =
-            _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
-                           ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
-
-        const __m128i res_lo_round = _mm_srai_epi32(
-            _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
-        const __m128i res_hi_round = _mm_srai_epi32(
-            _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
-
-        __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
-        // Clamp res_16bit to the range [0, 2^bd - 1]
-        const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
-        const __m128i zero = _mm_setzero_si128();
-        res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
-
-        // Store, blending with 'pred' if needed
-        __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
-        // Note: If we're outputting a 4x4 block, we need to be very careful
-        // to only output 4 pixels at this point, to avoid encode/decode
-        // mismatches when encoding with multiple threads.
-        if (p_width == 4) {
-          if (comp_avg)
-            res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
-          _mm_storel_epi64(p, res_16bit);
+        __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+        __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+#if CONFIG_CONVOLVE_ROUND
+        if (use_conv_params) {
+          __m128i *const p =
+              (__m128i *)&conv_params
+                  ->dst[(i + k + 4) * conv_params->dst_stride + j];
+          const __m128i round_const = _mm_set1_epi32(
+              -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
+              ((1 << (conv_params->round_1)) >> 1));
+          res_lo = _mm_add_epi32(res_lo, round_const);
+          res_lo =
+              _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
+          if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
+          _mm_storeu_si128(p, res_lo);
+          if (p_width > 4) {
+            res_hi = _mm_add_epi32(res_hi, round_const);
+            res_hi =
+                _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
+            if (comp_avg)
+              res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
+            _mm_storeu_si128(p + 1, res_hi);
+          }
         } else {
-          if (comp_avg)
-            res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
-          _mm_storeu_si128(p, res_16bit);
+#else
+        {
+#endif
+          // Round and pack into 8 bits
+          const __m128i round_const =
+              _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
+                             ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
+
+          const __m128i res_lo_round = _mm_srai_epi32(
+              _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
+          const __m128i res_hi_round = _mm_srai_epi32(
+              _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
+
+          __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+          // Clamp res_16bit to the range [0, 2^bd - 1]
+          const __m128i max_val = _mm_set1_epi16((1 << bd) - 1);
+          const __m128i zero = _mm_setzero_si128();
+          res_16bit = _mm_max_epi16(_mm_min_epi16(res_16bit, max_val), zero);
+
+          // Store, blending with 'pred' if needed
+          __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+
+          // Note: If we're outputting a 4x4 block, we need to be very careful
+          // to only output 4 pixels at this point, to avoid encode/decode
+          // mismatches when encoding with multiple threads.
+          if (p_width == 4) {
+            if (comp_avg)
+              res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
+            _mm_storel_epi64(p, res_16bit);
+          } else {
+            if (comp_avg)
+              res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
+            _mm_storeu_si128(p, res_16bit);
+          }
         }
       }
     }
diff --git a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
index 0648b95b3..c440d0f88 100644
--- a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
+++ b/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
@@ -366,7 +366,7 @@ static void iidtx16(__m256i *in) {
 void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   __m256i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   load_buffer_16x16(input, in);
   switch (tx_type) {
diff --git a/third_party/aom/av1/common/x86/idct_intrin_sse2.c b/third_party/aom/av1/common/x86/idct_intrin_sse2.c
index bf12a26d3..541165c8d 100644
--- a/third_party/aom/av1/common/x86/idct_intrin_sse2.c
+++ b/third_party/aom/av1/common/x86/idct_intrin_sse2.c
@@ -63,7 +63,7 @@ void av1_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
   __m128i in[2];
   const __m128i zero = _mm_setzero_si128();
   const __m128i eight = _mm_set1_epi16(8);
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   in[0] = load_input_data(input);
   in[1] = load_input_data(input + 8);
@@ -155,7 +155,7 @@ void av1_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
   __m128i in[8];
   const __m128i zero = _mm_setzero_si128();
   const __m128i final_rounding = _mm_set1_epi16(1 << 4);
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   // load input data
   in[0] = load_input_data(input);
@@ -257,7 +257,7 @@ void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
   __m128i in[32];
   __m128i *in0 = &in[0];
   __m128i *in1 = &in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   load_buffer_8x16(input, in0);
   input += 8;
@@ -393,7 +393,7 @@ static INLINE void flip_buffer_lr_8x8(__m128i *in) {
 void av1_iht8x16_128_add_sse2(const tran_low_t *input, uint8_t *dest,
                               int stride, const TxfmParam *txfm_param) {
   __m128i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   in[0] = load_input_data(input + 0 * 8);
   in[1] = load_input_data(input + 1 * 8);
@@ -559,7 +559,7 @@ static INLINE void write_buffer_8x8_round6(uint8_t *dest, __m128i *in,
 void av1_iht16x8_128_add_sse2(const tran_low_t *input, uint8_t *dest,
                               int stride, const TxfmParam *txfm_param) {
   __m128i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   // Transpose 16x8 input into in[]
   in[0] = load_input_data(input + 0 * 16);
@@ -720,7 +720,7 @@ static INLINE void write_buffer_8x4_round5(uint8_t *dest, __m128i *in,
 void av1_iht8x4_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
                             const TxfmParam *txfm_param) {
   __m128i in[8];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   in[0] = load_input_data(input + 0 * 8);
   in[1] = load_input_data(input + 1 * 8);
@@ -905,7 +905,7 @@ static INLINE void write_buffer_4x8_round5(uint8_t *dest, __m128i *in,
 void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
                             const TxfmParam *txfm_param) {
   __m128i in[8];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   // Load rows, packed two per element of 'in'.
   // We pack into the bottom half of 'in' so that the
@@ -1128,7 +1128,7 @@ static INLINE void write_buffer_16x32_round6(uint8_t *dest, __m128i *intl,
 void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   __m128i intl[16], intr[16], inbl[16], inbr[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 
   int i;
   for (i = 0; i < 16; ++i) {
@@ -1282,7 +1282,7 @@ static INLINE void write_buffer_32x16_round6(uint8_t *dest, __m128i *in0,
 void av1_iht32x16_512_add_sse2(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
   __m128i in0[16], in1[16], in2[16], in3[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   int i;
 
   for (i = 0; i < 16; ++i) {
diff --git a/third_party/aom/av1/common/x86/intra_edge_sse4.c b/third_party/aom/av1/common/x86/intra_edge_sse4.c
new file mode 100644
index 000000000..ea4acff33
--- /dev/null
+++ b/third_party/aom/av1/common/x86/intra_edge_sse4.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+
+void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) {
+  if (!strength) return;
+
+  DECLARE_ALIGNED(16, static const int8_t, kern[3][16]) = {
+    { 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0, 4, 8, 4, 0 },  // strength 1: 4,8,4
+    { 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0, 5, 6, 5, 0 },  // strength 2: 5,6,5
+    { 2, 4, 4, 4, 2, 0, 0, 0, 2, 4, 4, 4, 2, 0, 0, 0 }  // strength 3: 2,4,4,4,2
+  };
+
+  DECLARE_ALIGNED(16, static const int8_t, v_const[5][16]) = {
+    { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 },
+    { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+  };
+
+  // Extend the first and last samples to simplify the loop for the 5-tap case
+  p[-1] = p[0];
+  __m128i last = _mm_set1_epi8(p[sz - 1]);
+  _mm_storeu_si128((__m128i *)&p[sz], last);
+
+  // Adjust input pointer for filter support area
+  uint8_t *in = (strength == 3) ? p - 1 : p;
+
+  // Avoid modifying first/last samples
+  uint8_t *out = p + 1;
+  int len = sz - 2;
+
+  const int use_3tap_filter = (strength < 3);
+
+  if (use_3tap_filter) {
+    __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
+    __m128i shuf0 = _mm_lddqu_si128((__m128i const *)v_const[0]);
+    __m128i shuf1 = _mm_lddqu_si128((__m128i const *)v_const[1]);
+    __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]);
+    __m128i in0 = _mm_lddqu_si128((__m128i *)in);
+    while (len > 0) {
+      int n_out = (len < 8) ? len : 8;
+      __m128i d0 = _mm_shuffle_epi8(in0, shuf0);
+      __m128i d1 = _mm_shuffle_epi8(in0, shuf1);
+      d0 = _mm_maddubs_epi16(d0, coef0);
+      d1 = _mm_maddubs_epi16(d1, coef0);
+      d0 = _mm_hadd_epi16(d0, d1);
+      __m128i eight = _mm_set1_epi16(8);
+      d0 = _mm_add_epi16(d0, eight);
+      d0 = _mm_srai_epi16(d0, 4);
+      d0 = _mm_packus_epi16(d0, d0);
+      __m128i out0 = _mm_lddqu_si128((__m128i *)out);
+      __m128i n0 = _mm_set1_epi8(n_out);
+      __m128i mask = _mm_cmpgt_epi8(n0, iden);
+      out0 = _mm_blendv_epi8(out0, d0, mask);
+      _mm_storel_epi64((__m128i *)out, out0);
+      __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16));
+      in0 = _mm_alignr_epi8(in1, in0, 8);
+      in += 8;
+      out += 8;
+      len -= n_out;
+    }
+  } else {  // 5-tap filter
+    __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
+    __m128i two = _mm_set1_epi8(2);
+    __m128i shuf_a = _mm_lddqu_si128((__m128i const *)v_const[2]);
+    __m128i shuf_b = _mm_add_epi8(shuf_a, two);
+    __m128i shuf_c = _mm_add_epi8(shuf_b, two);
+    __m128i shuf_d = _mm_add_epi8(shuf_c, two);
+    __m128i iden = _mm_lddqu_si128((__m128i *)v_const[3]);
+    __m128i in0 = _mm_lddqu_si128((__m128i *)in);
+    while (len > 0) {
+      int n_out = (len < 8) ? len : 8;
+      __m128i d0 = _mm_shuffle_epi8(in0, shuf_a);
+      __m128i d1 = _mm_shuffle_epi8(in0, shuf_b);
+      __m128i d2 = _mm_shuffle_epi8(in0, shuf_c);
+      __m128i d3 = _mm_shuffle_epi8(in0, shuf_d);
+      d0 = _mm_maddubs_epi16(d0, coef0);
+      d1 = _mm_maddubs_epi16(d1, coef0);
+      d2 = _mm_maddubs_epi16(d2, coef0);
+      d3 = _mm_maddubs_epi16(d3, coef0);
+      d0 = _mm_hadd_epi16(d0, d1);
+      d2 = _mm_hadd_epi16(d2, d3);
+      d0 = _mm_hadd_epi16(d0, d2);
+      __m128i eight = _mm_set1_epi16(8);
+      d0 = _mm_add_epi16(d0, eight);
+      d0 = _mm_srai_epi16(d0, 4);
+      d0 = _mm_packus_epi16(d0, d0);
+      __m128i out0 = _mm_lddqu_si128((__m128i *)out);
+      __m128i n0 = _mm_set1_epi8(n_out);
+      __m128i mask = _mm_cmpgt_epi8(n0, iden);
+      out0 = _mm_blendv_epi8(out0, d0, mask);
+      _mm_storel_epi64((__m128i *)out, out0);
+      __m128i in1 = _mm_lddqu_si128((__m128i *)(in + 16));
+      in0 = _mm_alignr_epi8(in1, in0, 8);
+      in += 8;
+      out += 8;
+      len -= n_out;
+    }
+  }
+}
+
+void av1_filter_intra_edge_high_sse4_1(uint16_t *p, int sz, int strength) {
+  if (!strength) return;
+
+  DECLARE_ALIGNED(16, static const int16_t, kern[3][8]) = {
+    { 4, 8, 4, 8, 4, 8, 4, 8 },  // strength 1: 4,8,4
+    { 5, 6, 5, 6, 5, 6, 5, 6 },  // strength 2: 5,6,5
+    { 2, 4, 2, 4, 2, 4, 2, 4 }   // strength 3: 2,4,4,4,2
+  };
+
+  DECLARE_ALIGNED(16, static const int16_t,
+                  v_const[1][8]) = { { 0, 1, 2, 3, 4, 5, 6, 7 } };
+
+  // Extend the first and last samples to simplify the loop for the 5-tap case
+  p[-1] = p[0];
+  __m128i last = _mm_set1_epi16(p[sz - 1]);
+  _mm_storeu_si128((__m128i *)&p[sz], last);
+
+  // Adjust input pointer for filter support area
+  uint16_t *in = (strength == 3) ? p - 1 : p;
+
+  // Avoid modifying first/last samples
+  uint16_t *out = p + 1;
+  int len = sz - 2;
+
+  const int use_3tap_filter = (strength < 3);
+
+  if (use_3tap_filter) {
+    __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
+    __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]);
+    __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
+    __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
+    while (len > 0) {
+      int n_out = (len < 8) ? len : 8;
+      __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
+      __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
+      __m128i in02 = _mm_add_epi16(in0, in2);
+      __m128i d0 = _mm_unpacklo_epi16(in02, in1);
+      __m128i d1 = _mm_unpackhi_epi16(in02, in1);
+      d0 = _mm_mullo_epi16(d0, coef0);
+      d1 = _mm_mullo_epi16(d1, coef0);
+      d0 = _mm_hadd_epi16(d0, d1);
+      __m128i eight = _mm_set1_epi16(8);
+      d0 = _mm_add_epi16(d0, eight);
+      d0 = _mm_srli_epi16(d0, 4);
+      __m128i out0 = _mm_lddqu_si128((__m128i *)out);
+      __m128i n0 = _mm_set1_epi16(n_out);
+      __m128i mask = _mm_cmpgt_epi16(n0, iden);
+      out0 = _mm_blendv_epi8(out0, d0, mask);
+      _mm_storeu_si128((__m128i *)out, out0);
+      in += 8;
+      in0 = in8;
+      in8 = _mm_lddqu_si128((__m128i *)&in[8]);
+      out += 8;
+      len -= n_out;
+    }
+  } else {  // 5-tap filter
+    __m128i coef0 = _mm_lddqu_si128((__m128i const *)kern[strength - 1]);
+    __m128i iden = _mm_lddqu_si128((__m128i *)v_const[0]);
+    __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
+    __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
+    while (len > 0) {
+      int n_out = (len < 8) ? len : 8;
+      __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
+      __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
+      __m128i in3 = _mm_alignr_epi8(in8, in0, 6);
+      __m128i in4 = _mm_alignr_epi8(in8, in0, 8);
+      __m128i in04 = _mm_add_epi16(in0, in4);
+      __m128i in123 = _mm_add_epi16(in1, in2);
+      in123 = _mm_add_epi16(in123, in3);
+      __m128i d0 = _mm_unpacklo_epi16(in04, in123);
+      __m128i d1 = _mm_unpackhi_epi16(in04, in123);
+      d0 = _mm_mullo_epi16(d0, coef0);
+      d1 = _mm_mullo_epi16(d1, coef0);
+      d0 = _mm_hadd_epi16(d0, d1);
+      __m128i eight = _mm_set1_epi16(8);
+      d0 = _mm_add_epi16(d0, eight);
+      d0 = _mm_srli_epi16(d0, 4);
+      __m128i out0 = _mm_lddqu_si128((__m128i *)out);
+      __m128i n0 = _mm_set1_epi16(n_out);
+      __m128i mask = _mm_cmpgt_epi16(n0, iden);
+      out0 = _mm_blendv_epi8(out0, d0, mask);
+      _mm_storeu_si128((__m128i *)out, out0);
+      in += 8;
+      in0 = in8;
+      in8 = _mm_lddqu_si128((__m128i *)&in[8]);
+      out += 8;
+      len -= n_out;
+    }
+  }
+}
+
+void av1_upsample_intra_edge_sse4_1(uint8_t *p, int sz) {
+  // interpolate half-sample positions
+  assert(sz <= 24);
+
+  DECLARE_ALIGNED(16, static const int8_t, kernel[1][16]) = {
+    { -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1, -1, 9, 9, -1 }
+  };
+
+  DECLARE_ALIGNED(16, static const int8_t, v_const[2][16]) = {
+    { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 },
+    { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 }
+  };
+
+  // Extend first/last samples (upper-left p[-1], last p[sz-1])
+  // to support 4-tap filter
+  p[-2] = p[-1];
+  p[sz] = p[sz - 1];
+
+  uint8_t *in = &p[-2];
+  uint8_t *out = &p[-2];
+
+  int n = sz + 1;  // Input length including upper-left sample
+
+  __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
+  __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]);
+
+  __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]);
+  __m128i shuf0 = _mm_lddqu_si128((__m128i *)v_const[0]);
+  __m128i shuf1 = _mm_lddqu_si128((__m128i *)v_const[1]);
+
+  while (n > 0) {
+    __m128i in8 = _mm_alignr_epi8(in16, in0, 8);
+    __m128i d0 = _mm_shuffle_epi8(in0, shuf0);
+    __m128i d1 = _mm_shuffle_epi8(in0, shuf1);
+    __m128i d2 = _mm_shuffle_epi8(in8, shuf0);
+    __m128i d3 = _mm_shuffle_epi8(in8, shuf1);
+    d0 = _mm_maddubs_epi16(d0, coef0);
+    d1 = _mm_maddubs_epi16(d1, coef0);
+    d2 = _mm_maddubs_epi16(d2, coef0);
+    d3 = _mm_maddubs_epi16(d3, coef0);
+    d0 = _mm_hadd_epi16(d0, d1);
+    d2 = _mm_hadd_epi16(d2, d3);
+    __m128i eight = _mm_set1_epi16(8);
+    d0 = _mm_add_epi16(d0, eight);
+    d2 = _mm_add_epi16(d2, eight);
+    d0 = _mm_srai_epi16(d0, 4);
+    d2 = _mm_srai_epi16(d2, 4);
+    d0 = _mm_packus_epi16(d0, d2);
+    __m128i in1 = _mm_alignr_epi8(in16, in0, 1);
+    __m128i out0 = _mm_unpacklo_epi8(in1, d0);
+    __m128i out1 = _mm_unpackhi_epi8(in1, d0);
+    _mm_storeu_si128((__m128i *)&out[0], out0);
+    _mm_storeu_si128((__m128i *)&out[16], out1);
+    in0 = in16;
+    in16 = _mm_setzero_si128();
+    out += 32;
+    n -= 16;
+  }
+}
+
+void av1_upsample_intra_edge_high_sse4_1(uint16_t *p, int sz, int bd) {
+  // interpolate half-sample positions
+  assert(sz <= 24);
+
+  DECLARE_ALIGNED(16, static const int16_t,
+                  kernel[1][8]) = { { -1, 9, -1, 9, -1, 9, -1, 9 } };
+
+  // Extend first/last samples (upper-left p[-1], last p[sz-1])
+  // to support 4-tap filter
+  p[-2] = p[-1];
+  p[sz] = p[sz - 1];
+
+  uint16_t *in = &p[-2];
+  uint16_t *out = in;
+  int n = sz + 1;
+
+  __m128i in0 = _mm_lddqu_si128((__m128i *)&in[0]);
+  __m128i in8 = _mm_lddqu_si128((__m128i *)&in[8]);
+  __m128i in16 = _mm_lddqu_si128((__m128i *)&in[16]);
+  __m128i in24 = _mm_lddqu_si128((__m128i *)&in[24]);
+
+  while (n > 0) {
+    __m128i in1 = _mm_alignr_epi8(in8, in0, 2);
+    __m128i in2 = _mm_alignr_epi8(in8, in0, 4);
+    __m128i in3 = _mm_alignr_epi8(in8, in0, 6);
+    __m128i sum0 = _mm_add_epi16(in0, in3);
+    __m128i sum1 = _mm_add_epi16(in1, in2);
+    __m128i d0 = _mm_unpacklo_epi16(sum0, sum1);
+    __m128i d1 = _mm_unpackhi_epi16(sum0, sum1);
+    __m128i coef0 = _mm_lddqu_si128((__m128i *)kernel[0]);
+    d0 = _mm_madd_epi16(d0, coef0);
+    d1 = _mm_madd_epi16(d1, coef0);
+    __m128i eight = _mm_set1_epi32(8);
+    d0 = _mm_add_epi32(d0, eight);
+    d1 = _mm_add_epi32(d1, eight);
+    d0 = _mm_srai_epi32(d0, 4);
+    d1 = _mm_srai_epi32(d1, 4);
+    d0 = _mm_packus_epi32(d0, d1);
+    __m128i max0 = _mm_set1_epi16((1 << bd) - 1);
+    d0 = _mm_min_epi16(d0, max0);
+    __m128i out0 = _mm_unpacklo_epi16(in1, d0);
+    __m128i out1 = _mm_unpackhi_epi16(in1, d0);
+    _mm_storeu_si128((__m128i *)&out[0], out0);
+    _mm_storeu_si128((__m128i *)&out[8], out1);
+    in0 = in8;
+    in8 = in16;
+    in16 = in24;
+    in24 = _mm_setzero_si128();
+    out += 16;
+    n -= 8;
+  }
+}
diff --git a/third_party/aom/av1/common/x86/selfguided_sse4.c b/third_party/aom/av1/common/x86/selfguided_sse4.c
index e2e4f51c3..4006b8518 100644
--- a/third_party/aom/av1/common/x86/selfguided_sse4.c
+++ b/third_party/aom/av1/common/x86/selfguided_sse4.c
@@ -3,6 +3,7 @@
 #include "./aom_config.h"
 #include "./av1_rtcd.h"
 #include "av1/common/restoration.h"
+#include "aom_dsp/x86/synonyms.h"
 
 /* Calculate four consecutive entries of the intermediate A and B arrays
    (corresponding to the first loop in the C version of
@@ -71,8 +72,8 @@ static void selfguided_restoration_1_v(uint8_t *src, int width, int height,
     __m128i a, b, x, y, x2, y2;
     __m128i sum, sum_sq, tmp;
 
-    a = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[j]));
-    b = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[src_stride + j]));
+    a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
+    b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
 
     sum = _mm_cvtepi16_epi32(_mm_add_epi16(a, b));
     tmp = _mm_unpacklo_epi16(a, b);
@@ -81,7 +82,7 @@ static void selfguided_restoration_1_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[j], sum);
     _mm_store_si128((__m128i *)&A[j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -91,9 +92,9 @@ static void selfguided_restoration_1_v(uint8_t *src, int width, int height,
       _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
 
       x = _mm_cvtepu8_epi32(
-          _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
+          xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
       y = _mm_cvtepu8_epi32(
-          _mm_loadl_epi64((__m128i *)&src[(i + 2) * src_stride + j]));
+          xx_loadl_32((__m128i *)&src[(i + 2) * src_stride + j]));
 
       sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
 
@@ -106,7 +107,7 @@ static void selfguided_restoration_1_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -242,9 +243,9 @@ static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
     __m128i a, b, c, c2, x, y, x2, y2;
     __m128i sum, sum_sq, tmp;
 
-    a = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[j]));
-    b = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[src_stride + j]));
-    c = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]));
+    a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
+    b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
+    c = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
 
     sum = _mm_cvtepi16_epi32(_mm_add_epi16(_mm_add_epi16(a, b), c));
     // Important: Since c may be up to 2^8, the result on squaring may
@@ -256,7 +257,7 @@ static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[j], sum);
     _mm_store_si128((__m128i *)&A[j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[3 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[3 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -264,7 +265,7 @@ static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
     _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[4 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[4 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -289,7 +290,7 @@ static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 2) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 2) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -298,7 +299,7 @@ static void selfguided_restoration_2_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -443,10 +444,10 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     __m128i a, b, c, d, x, y, x2, y2;
     __m128i sum, sum_sq, tmp, tmp2;
 
-    a = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[j]));
-    b = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[src_stride + j]));
-    c = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[2 * src_stride + j]));
-    d = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i *)&src[3 * src_stride + j]));
+    a = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[j]));
+    b = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[src_stride + j]));
+    c = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[2 * src_stride + j]));
+    d = _mm_cvtepu8_epi16(xx_loadl_32((__m128i *)&src[3 * src_stride + j]));
 
     sum = _mm_cvtepi16_epi32(
         _mm_add_epi16(_mm_add_epi16(a, b), _mm_add_epi16(c, d)));
@@ -458,7 +459,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[j], sum);
     _mm_store_si128((__m128i *)&A[j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[4 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[4 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -466,7 +467,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[buf_stride + j], sum);
     _mm_store_si128((__m128i *)&A[buf_stride + j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[5 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[5 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -474,7 +475,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&B[2 * buf_stride + j], sum);
     _mm_store_si128((__m128i *)&A[2 * buf_stride + j], sum_sq);
 
-    x = _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)&src[6 * src_stride + j]));
+    x = _mm_cvtepu8_epi32(xx_loadl_32((__m128i *)&src[6 * src_stride + j]));
     sum = _mm_add_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_add_epi32(sum_sq, x2);
@@ -483,10 +484,8 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
       _mm_store_si128((__m128i *)&B[i * buf_stride + j], sum);
       _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
 
-      x = _mm_cvtepu8_epi32(
-          _mm_cvtsi32_si128(*((int *)&src[(i - 3) * src_stride + j])));
-      y = _mm_cvtepu8_epi32(
-          _mm_cvtsi32_si128(*((int *)&src[(i + 4) * src_stride + j])));
+      x = _mm_cvtepu8_epi32(xx_loadl_32(&src[(i - 3) * src_stride + j]));
+      y = _mm_cvtepu8_epi32(xx_loadl_32(&src[(i + 4) * src_stride + j]));
 
       sum = _mm_add_epi32(sum, _mm_sub_epi32(y, x));
 
@@ -499,7 +498,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[i * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 3) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 3) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -508,7 +507,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[(i + 1) * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 2) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 2) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -517,7 +516,7 @@ static void selfguided_restoration_3_v(uint8_t *src, int width, int height,
     _mm_store_si128((__m128i *)&A[(i + 2) * buf_stride + j], sum_sq);
 
     x = _mm_cvtepu8_epi32(
-        _mm_loadl_epi64((__m128i *)&src[(i - 1) * src_stride + j]));
+        xx_loadl_32((__m128i *)&src[(i - 1) * src_stride + j]));
     sum = _mm_sub_epi32(sum, x);
     x2 = _mm_mullo_epi32(x, x);
     sum_sq = _mm_sub_epi32(sum_sq, x2);
@@ -664,38 +663,48 @@ static void selfguided_restoration_3_h(int32_t *A, int32_t *B, int width,
 }
 
 void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
-                                       int stride, int32_t *dst, int dst_stride,
-                                       int r, int eps, int32_t *tmpbuf) {
-  int32_t *A = tmpbuf;
-  int32_t *B = A + SGRPROJ_OUTBUF_SIZE;
+                                       int dgd_stride, int32_t *dst,
+                                       int dst_stride, int r, int eps) {
+  const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+  const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
+  int32_t A_[RESTORATION_PROC_UNIT_PELS];
+  int32_t B_[RESTORATION_PROC_UNIT_PELS];
+  int32_t *A = A_;
+  int32_t *B = B_;
   int i, j;
   // Adjusting the stride of A and B here appears to avoid bad cache effects,
   // leading to a significant speed improvement.
   // We also align the stride to a multiple of 16 bytes for efficiency.
-  int buf_stride = ((width + 3) & ~3) + 16;
+  int buf_stride = ((width_ext + 3) & ~3) + 16;
 
   // Don't filter tiles with dimensions < 5 on any axis
   if ((width < 5) || (height < 5)) return;
 
+  uint8_t *dgd0 = dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ;
   if (r == 1) {
-    selfguided_restoration_1_v(dgd, width, height, stride, A, B, buf_stride);
-    selfguided_restoration_1_h(A, B, width, height, buf_stride, eps, 8);
+    selfguided_restoration_1_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
+                               buf_stride);
+    selfguided_restoration_1_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
   } else if (r == 2) {
-    selfguided_restoration_2_v(dgd, width, height, stride, A, B, buf_stride);
-    selfguided_restoration_2_h(A, B, width, height, buf_stride, eps, 8);
+    selfguided_restoration_2_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
+                               buf_stride);
+    selfguided_restoration_2_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
   } else if (r == 3) {
-    selfguided_restoration_3_v(dgd, width, height, stride, A, B, buf_stride);
-    selfguided_restoration_3_h(A, B, width, height, buf_stride, eps, 8);
+    selfguided_restoration_3_v(dgd0, width_ext, height_ext, dgd_stride, A, B,
+                               buf_stride);
+    selfguided_restoration_3_h(A, B, width_ext, height_ext, buf_stride, eps, 8);
   } else {
     assert(0);
   }
+  A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+  B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
 
   {
     i = 0;
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] +
@@ -707,7 +716,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     }
     for (j = 1; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
@@ -720,7 +729,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] +
@@ -735,7 +744,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
@@ -751,7 +760,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     // Vectorize the innermost loop
     for (j = 1; j < width - 1; j += 4) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 5;
 
@@ -804,7 +813,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     // (typically have 2 such pixels, but may have anywhere between 0 and 3)
     for (; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 5;
       const int32_t a =
@@ -826,7 +835,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
@@ -845,7 +854,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] +
@@ -857,7 +866,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     }
     for (j = 1; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
@@ -870,7 +879,7 @@ void av1_selfguided_restoration_sse4_1(uint8_t *dgd, int width, int height,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] +
@@ -1051,7 +1060,6 @@ void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height,
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
-  int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
   int i, j;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if USE_HIGHPASS_IN_SGRPROJ
@@ -1059,12 +1067,10 @@ void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height,
                              sgr_params[eps].corner, sgr_params[eps].edge);
 #else
     av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt1, width,
-                                      sgr_params[eps].r1, sgr_params[eps].e1,
-                                      tmpbuf2);
+                                      sgr_params[eps].r1, sgr_params[eps].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
   av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt2, width,
-                                    sgr_params[eps].r2, sgr_params[eps].e2,
-                                    tmpbuf2);
+                                    sgr_params[eps].r2, sgr_params[eps].e2);
   decode_xq(xqd, xq);
 
   __m128i xq0 = _mm_set1_epi32(xq[0]);
@@ -1364,43 +1370,52 @@ static void highbd_selfguided_restoration_3_v(uint16_t *src, int width,
 }
 
 void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
-                                              int height, int stride,
+                                              int height, int dgd_stride,
                                               int32_t *dst, int dst_stride,
-                                              int bit_depth, int r, int eps,
-                                              int32_t *tmpbuf) {
-  int32_t *A = tmpbuf;
-  int32_t *B = A + SGRPROJ_OUTBUF_SIZE;
+                                              int bit_depth, int r, int eps) {
+  const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+  const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
+  int32_t A_[RESTORATION_PROC_UNIT_PELS];
+  int32_t B_[RESTORATION_PROC_UNIT_PELS];
+  int32_t *A = A_;
+  int32_t *B = B_;
   int i, j;
   // Adjusting the stride of A and B here appears to avoid bad cache effects,
   // leading to a significant speed improvement.
   // We also align the stride to a multiple of 16 bytes for efficiency.
-  int buf_stride = ((width + 3) & ~3) + 16;
+  int buf_stride = ((width_ext + 3) & ~3) + 16;
 
   // Don't filter tiles with dimensions < 5 on any axis
   if ((width < 5) || (height < 5)) return;
 
+  uint16_t *dgd0 = dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ;
   if (r == 1) {
-    highbd_selfguided_restoration_1_v(dgd, width, height, stride, A, B,
-                                      buf_stride);
-    selfguided_restoration_1_h(A, B, width, height, buf_stride, eps, bit_depth);
+    highbd_selfguided_restoration_1_v(dgd0, width_ext, height_ext, dgd_stride,
+                                      A, B, buf_stride);
+    selfguided_restoration_1_h(A, B, width_ext, height_ext, buf_stride, eps,
+                               bit_depth);
   } else if (r == 2) {
-    highbd_selfguided_restoration_2_v(dgd, width, height, stride, A, B,
-                                      buf_stride);
-    selfguided_restoration_2_h(A, B, width, height, buf_stride, eps, bit_depth);
+    highbd_selfguided_restoration_2_v(dgd0, width_ext, height_ext, dgd_stride,
+                                      A, B, buf_stride);
+    selfguided_restoration_2_h(A, B, width_ext, height_ext, buf_stride, eps,
+                               bit_depth);
   } else if (r == 3) {
-    highbd_selfguided_restoration_3_v(dgd, width, height, stride, A, B,
-                                      buf_stride);
-    selfguided_restoration_3_h(A, B, width, height, buf_stride, eps, bit_depth);
+    highbd_selfguided_restoration_3_v(dgd0, width_ext, height_ext, dgd_stride,
+                                      A, B, buf_stride);
+    selfguided_restoration_3_h(A, B, width_ext, height_ext, buf_stride, eps,
+                               bit_depth);
   } else {
     assert(0);
   }
+  A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+  B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
 
   {
     i = 0;
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] +
@@ -1412,7 +1427,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     }
     for (j = 1; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] +
@@ -1425,7 +1440,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] +
@@ -1440,7 +1455,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
@@ -1456,7 +1471,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     // Vectorize the innermost loop
     for (j = 1; j < width - 1; j += 4) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 5;
 
@@ -1509,7 +1524,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     // (typically have 2 such pixels, but may have anywhere between 0 and 3)
     for (; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 5;
       const int32_t a =
@@ -1531,7 +1546,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) +
@@ -1550,7 +1565,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     j = 0;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] +
@@ -1562,7 +1577,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     }
     for (j = 1; j < width - 1; ++j) {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] +
@@ -1575,7 +1590,7 @@ void av1_selfguided_restoration_highbd_sse4_1(uint16_t *dgd, int width,
     j = width - 1;
     {
       const int k = i * buf_stride + j;
-      const int l = i * stride + j;
+      const int l = i * dgd_stride + j;
       const int m = i * dst_stride + j;
       const int nb = 3;
       const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] +
@@ -1725,7 +1740,6 @@ void apply_selfguided_restoration_highbd_sse4_1(
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
-  int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
   int i, j;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if USE_HIGHPASS_IN_SGRPROJ
@@ -1735,11 +1749,11 @@ void apply_selfguided_restoration_highbd_sse4_1(
 #else
   av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt1,
                                            width, bit_depth, sgr_params[eps].r1,
-                                           sgr_params[eps].e1, tmpbuf2);
+                                           sgr_params[eps].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
   av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt2,
                                            width, bit_depth, sgr_params[eps].r2,
-                                           sgr_params[eps].e2, tmpbuf2);
+                                           sgr_params[eps].e2);
   decode_xq(xqd, xq);
 
   __m128i xq0 = _mm_set1_epi32(xq[0]);
diff --git a/third_party/aom/av1/common/x86/warp_plane_sse2.c b/third_party/aom/av1/common/x86/warp_plane_sse2.c
index 5a22d9abf..d30466ae6 100644
--- a/third_party/aom/av1/common/x86/warp_plane_sse2.c
+++ b/third_party/aom/av1/common/x86/warp_plane_sse2.c
@@ -24,6 +24,20 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
+#if CONFIG_CONVOLVE_ROUND
+  const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
+  const int reduce_bits_horiz =
+      use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz =
+      use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
+  if (use_conv_params) {
+    conv_params->do_post_rounding = 1;
+  }
+  assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+#else
+  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
+#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -39,30 +53,17 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
 
   for (i = 0; i < p_height; i += 8) {
     for (j = 0; j < p_width; j += 8) {
-      // (x, y) coordinates of the center of this block in the destination
-      // image
-      const int32_t dst_x = p_col + j + 4;
-      const int32_t dst_y = p_row + i + 4;
-
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      const int32_t src_x = (p_col + j + 4) << subsampling_x;
+      const int32_t src_y = (p_row + i + 4) << subsampling_y;
+      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+      const int32_t x4 = dst_x >> subsampling_x;
+      const int32_t y4 = dst_y >> subsampling_y;
+
+      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
       // Add in all the constant terms, including rounding and offset
       sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
@@ -149,9 +150,8 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
           // coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
           const __m128i coeff_6 = _mm_unpackhi_epi64(tmp_12, tmp_14);
 
-          const __m128i round_const =
-              _mm_set1_epi32((1 << (bd + WARPEDPIXEL_FILTER_BITS - 1)) +
-                             ((1 << HORSHEAR_REDUCE_PREC_BITS) >> 1));
+          const __m128i round_const = _mm_set1_epi32(
+              (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
 
           // Calculate filtered results
           const __m128i src_0 = _mm_unpacklo_epi8(src, zero);
@@ -165,8 +165,8 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
 
           __m128i res_even = _mm_add_epi32(_mm_add_epi32(res_0, res_4),
                                            _mm_add_epi32(res_2, res_6));
-          res_even = _mm_srai_epi32(_mm_add_epi32(res_even, round_const),
-                                    HORSHEAR_REDUCE_PREC_BITS);
+          res_even = _mm_sra_epi32(_mm_add_epi32(res_even, round_const),
+                                   _mm_cvtsi32_si128(reduce_bits_horiz));
 
           // Filter odd-index pixels
           const __m128i tmp_1 = _mm_loadu_si128(
@@ -203,8 +203,8 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
 
           __m128i res_odd = _mm_add_epi32(_mm_add_epi32(res_1, res_5),
                                           _mm_add_epi32(res_3, res_7));
-          res_odd = _mm_srai_epi32(_mm_add_epi32(res_odd, round_const),
-                                   HORSHEAR_REDUCE_PREC_BITS);
+          res_odd = _mm_sra_epi32(_mm_add_epi32(res_odd, round_const),
+                                  _mm_cvtsi32_si128(reduce_bits_horiz));
 
           // Combine results into one register.
           // We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
@@ -295,37 +295,63 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
                                               _mm_add_epi32(res_5, res_7));
 
         // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        // Round and pack into 8 bits
-        const __m128i round_const =
-            _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
-                           ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
-
-        const __m128i res_lo_round = _mm_srai_epi32(
-            _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
-        const __m128i res_hi_round = _mm_srai_epi32(
-            _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
-
-        const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
-        __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
-        // Store, blending with 'pred' if needed
-        __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
-        // Note: If we're outputting a 4x4 block, we need to be very careful
-        // to only output 4 pixels at this point, to avoid encode/decode
-        // mismatches when encoding with multiple threads.
-        if (p_width == 4) {
-          if (comp_avg) {
-            const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
-            res_8bit = _mm_avg_epu8(res_8bit, orig);
+        __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+        __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+#if CONFIG_CONVOLVE_ROUND
+        if (use_conv_params) {
+          __m128i *const p =
+              (__m128i *)&conv_params
+                  ->dst[(i + k + 4) * conv_params->dst_stride + j];
+          const __m128i round_const = _mm_set1_epi32(
+              -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
+              ((1 << (conv_params->round_1)) >> 1));
+          res_lo = _mm_add_epi32(res_lo, round_const);
+          res_lo =
+              _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
+          if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
+          _mm_storeu_si128(p, res_lo);
+          if (p_width > 4) {
+            res_hi = _mm_add_epi32(res_hi, round_const);
+            res_hi =
+                _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
+            if (comp_avg)
+              res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
+            _mm_storeu_si128(p + 1, res_hi);
           }
-          *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
         } else {
-          if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
-          _mm_storel_epi64(p, res_8bit);
+#else
+        {
+#endif
+          // Round and pack into 8 bits
+          const __m128i round_const =
+              _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
+                             ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
+
+          const __m128i res_lo_round = _mm_srai_epi32(
+              _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
+          const __m128i res_hi_round = _mm_srai_epi32(
+              _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
+
+          const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+          __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
+
+          // Store, blending with 'pred' if needed
+          __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+
+          // Note: If we're outputting a 4x4 block, we need to be very careful
+          // to only output 4 pixels at this point, to avoid encode/decode
+          // mismatches when encoding with multiple threads.
+          if (p_width == 4) {
+            if (comp_avg) {
+              const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
+              res_8bit = _mm_avg_epu8(res_8bit, orig);
+            }
+            *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
+          } else {
+            if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
+            _mm_storel_epi64(p, res_8bit);
+          }
         }
       }
     }
diff --git a/third_party/aom/av1/common/x86/warp_plane_ssse3.c b/third_party/aom/av1/common/x86/warp_plane_ssse3.c
index f8e6f62ba..3986ad389 100644
--- a/third_party/aom/av1/common/x86/warp_plane_ssse3.c
+++ b/third_party/aom/av1/common/x86/warp_plane_ssse3.c
@@ -211,6 +211,20 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
+#if CONFIG_CONVOLVE_ROUND
+  const int use_conv_params = conv_params->round == CONVOLVE_OPT_NO_ROUND;
+  const int reduce_bits_horiz =
+      use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz =
+      use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
+  if (use_conv_params) {
+    conv_params->do_post_rounding = 1;
+  }
+  assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+#else
+  const int reduce_bits_horiz = HORSHEAR_REDUCE_PREC_BITS;
+  const int offset_bits_horiz = bd + WARPEDPIXEL_FILTER_BITS - 1;
+#endif
 
   /* Note: For this code to work, the left/right frame borders need to be
      extended by at least 13 pixels each. By the time we get here, other
@@ -226,30 +240,17 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
 
   for (i = 0; i < p_height; i += 8) {
     for (j = 0; j < p_width; j += 8) {
-      // (x, y) coordinates of the center of this block in the destination
-      // image
-      const int32_t dst_x = p_col + j + 4;
-      const int32_t dst_y = p_row + i + 4;
-
-      int32_t x4, y4, ix4, sx4, iy4, sy4;
-      if (subsampling_x)
-        x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
-              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
-
-      if (subsampling_y)
-        y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
-              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
-             4;
-      else
-        y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
-
-      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
-      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
-      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
-      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      const int32_t src_x = (p_col + j + 4) << subsampling_x;
+      const int32_t src_y = (p_row + i + 4) << subsampling_y;
+      const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
+      const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+      const int32_t x4 = dst_x >> subsampling_x;
+      const int32_t y4 = dst_y >> subsampling_y;
+
+      int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
       // Add in all the constant terms, including rounding and offset
       sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
@@ -369,9 +370,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
               _mm_srli_si128(src_odd, 4), _mm_srli_si128(src_even, 6));
           const __m128i res_57 = _mm_maddubs_epi16(src_57, coeff_57);
 
-          const __m128i round_const =
-              _mm_set1_epi16((1 << (bd + WARPEDPIXEL_FILTER_BITS - 1)) +
-                             ((1 << HORSHEAR_REDUCE_PREC_BITS) >> 1));
+          const __m128i round_const = _mm_set1_epi16(
+              (1 << offset_bits_horiz) + ((1 << reduce_bits_horiz) >> 1));
 
           // Note: The values res_02 + res_46 and res_13 + res_57 both
           // fit into int16s at this point, but their sum may be too wide to fit
@@ -385,7 +385,7 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
           const __m128i res_odd = _mm_add_epi16(res_13, res_57);
           const __m128i res =
               _mm_add_epi16(_mm_add_epi16(res_even, res_odd), round_const);
-          tmp[k + 7] = _mm_srli_epi16(res, HORSHEAR_REDUCE_PREC_BITS);
+          tmp[k + 7] = _mm_srl_epi16(res, _mm_cvtsi32_si128(reduce_bits_horiz));
         }
       }
 
@@ -471,37 +471,63 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
                                               _mm_add_epi32(res_5, res_7));
 
         // Rearrange pixels back into the order 0 ... 7
-        const __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
-        const __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
-
-        // Round and pack into 8 bits
-        const __m128i round_const =
-            _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
-                           ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
-
-        const __m128i res_lo_round = _mm_srai_epi32(
-            _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
-        const __m128i res_hi_round = _mm_srai_epi32(
-            _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
-
-        const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
-        __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
-
-        // Store, blending with 'pred' if needed
-        __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
-
-        // Note: If we're outputting a 4x4 block, we need to be very careful
-        // to only output 4 pixels at this point, to avoid encode/decode
-        // mismatches when encoding with multiple threads.
-        if (p_width == 4) {
-          if (comp_avg) {
-            const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
-            res_8bit = _mm_avg_epu8(res_8bit, orig);
+        __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
+        __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
+
+#if CONFIG_CONVOLVE_ROUND
+        if (use_conv_params) {
+          __m128i *const p =
+              (__m128i *)&conv_params
+                  ->dst[(i + k + 4) * conv_params->dst_stride + j];
+          const __m128i round_const = _mm_set1_epi32(
+              -(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) +
+              ((1 << (conv_params->round_1)) >> 1));
+          res_lo = _mm_add_epi32(res_lo, round_const);
+          res_lo =
+              _mm_srl_epi16(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
+          if (comp_avg) res_lo = _mm_add_epi32(_mm_loadu_si128(p), res_lo);
+          _mm_storeu_si128(p, res_lo);
+          if (p_width > 4) {
+            res_hi = _mm_add_epi32(res_hi, round_const);
+            res_hi =
+                _mm_srl_epi16(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
+            if (comp_avg)
+              res_hi = _mm_add_epi32(_mm_loadu_si128(p + 1), res_hi);
+            _mm_storeu_si128(p + 1, res_hi);
           }
-          *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
         } else {
-          if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
-          _mm_storel_epi64(p, res_8bit);
+#else
+        {
+#endif
+          // Round and pack into 8 bits
+          const __m128i round_const =
+              _mm_set1_epi32(-(1 << (bd + VERSHEAR_REDUCE_PREC_BITS - 1)) +
+                             ((1 << VERSHEAR_REDUCE_PREC_BITS) >> 1));
+
+          const __m128i res_lo_round = _mm_srai_epi32(
+              _mm_add_epi32(res_lo, round_const), VERSHEAR_REDUCE_PREC_BITS);
+          const __m128i res_hi_round = _mm_srai_epi32(
+              _mm_add_epi32(res_hi, round_const), VERSHEAR_REDUCE_PREC_BITS);
+
+          const __m128i res_16bit = _mm_packs_epi32(res_lo_round, res_hi_round);
+          __m128i res_8bit = _mm_packus_epi16(res_16bit, res_16bit);
+
+          // Store, blending with 'pred' if needed
+          __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
+
+          // Note: If we're outputting a 4x4 block, we need to be very careful
+          // to only output 4 pixels at this point, to avoid encode/decode
+          // mismatches when encoding with multiple threads.
+          if (p_width == 4) {
+            if (comp_avg) {
+              const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
+              res_8bit = _mm_avg_epu8(res_8bit, orig);
+            }
+            *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
+          } else {
+            if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
+            _mm_storel_epi64(p, res_8bit);
+          }
         }
       }
     }
diff --git a/third_party/aom/av1/decoder/decodeframe.c b/third_party/aom/av1/decoder/decodeframe.c
index 247e60e04..9ec3b60eb 100644
--- a/third_party/aom/av1/decoder/decodeframe.c
+++ b/third_party/aom/av1/decoder/decodeframe.c
@@ -19,9 +19,9 @@
 
 #include "aom/aom_codec.h"
 #include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/binary_codes_reader.h"
 #include "aom_dsp/bitreader.h"
 #include "aom_dsp/bitreader_buffer.h"
-#include "aom_dsp/binary_codes_reader.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
 #include "aom_ports/mem_ops.h"
@@ -44,6 +44,7 @@
 #include "av1/common/entropymode.h"
 #include "av1/common/entropymv.h"
 #include "av1/common/idct.h"
+#include "av1/common/mvref_common.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/reconinter.h"
@@ -63,6 +64,7 @@
 #endif
 #include "av1/decoder/detokenize.h"
 #include "av1/decoder/dsubexp.h"
+#include "av1/decoder/symbolrate.h"
 
 #if CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
 #include "av1/common/warped_motion.h"
@@ -85,6 +87,17 @@
 #include "av1/common/cfl.h"
 #endif
 
+#if CONFIG_STRIPED_LOOP_RESTORATION && !CONFIG_LOOP_RESTORATION
+#error "striped_loop_restoration requires loop_restoration"
+#endif
+
+#if CONFIG_LOOP_RESTORATION
+static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
+                                            MACROBLOCKD *xd,
+                                            aom_reader *const r, int plane,
+                                            int rtile_idx);
+#endif
+
 static struct aom_read_bit_buffer *init_read_bit_buffer(
     AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
     const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]);
@@ -94,7 +107,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
                                        struct aom_read_bit_buffer *rb);
 
 static int is_compound_reference_allowed(const AV1_COMMON *cm) {
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS  // Normative in decoder
+#if CONFIG_ONE_SIDED_COMPOUND  // Normative in decoder
   return !frame_is_intra_only(cm);
 #else
   int i;
@@ -103,7 +116,7 @@ static int is_compound_reference_allowed(const AV1_COMMON *cm) {
     if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1;
 
   return 0;
-#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#endif  // CONFIG_ONE_SIDED_COMPOUND
 }
 
 static void setup_compound_reference_mode(AV1_COMMON *cm) {
@@ -114,12 +127,8 @@ static void setup_compound_reference_mode(AV1_COMMON *cm) {
   cm->comp_fwd_ref[3] = GOLDEN_FRAME;
 
   cm->comp_bwd_ref[0] = BWDREF_FRAME;
-#if CONFIG_ALTREF2
   cm->comp_bwd_ref[1] = ALTREF2_FRAME;
   cm->comp_bwd_ref[2] = ALTREF_FRAME;
-#else   // !CONFIG_ALTREF2
-  cm->comp_bwd_ref[1] = ALTREF_FRAME;
-#endif  // CONFIG_ALTREF2
 #else   // !CONFIG_EXT_REFS
   if (cm->ref_frame_sign_bias[LAST_FRAME] ==
       cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
@@ -167,7 +176,7 @@ static TX_MODE read_tx_mode(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
 #endif  // CONFIG_VAR_TX_NO_TX_MODE
 }
 
-#if !CONFIG_NEW_MULTISYMBOL
+#if !CONFIG_RESTRICT_COMPRESSED_HDR
 static void read_inter_mode_probs(FRAME_CONTEXT *fc, aom_reader *r) {
   int i;
   for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
@@ -196,15 +205,11 @@ static REFERENCE_MODE read_frame_reference_mode(
   }
 }
 
+#if !CONFIG_RESTRICT_COMPRESSED_HDR
 static void read_frame_reference_mode_probs(AV1_COMMON *cm, aom_reader *r) {
-#if CONFIG_NEW_MULTISYMBOL && !CONFIG_EXT_COMP_REFS
-  (void)r;
-#else
   FRAME_CONTEXT *const fc = cm->fc;
   int i;
-#endif
 
-#if !CONFIG_NEW_MULTISYMBOL
   if (cm->reference_mode == REFERENCE_MODE_SELECT)
     for (i = 0; i < COMP_INTER_CONTEXTS; ++i)
       av1_diff_update_prob(r, &fc->comp_inter_prob[i], ACCT_STR);
@@ -217,7 +222,6 @@ static void read_frame_reference_mode_probs(AV1_COMMON *cm, aom_reader *r) {
       }
     }
   }
-#endif
 
   if (cm->reference_mode != SINGLE_REFERENCE) {
 #if CONFIG_EXT_COMP_REFS
@@ -231,7 +235,6 @@ static void read_frame_reference_mode_probs(AV1_COMMON *cm, aom_reader *r) {
     }
 #endif  // CONFIG_EXT_COMP_REFS
 
-#if !CONFIG_NEW_MULTISYMBOL
     for (i = 0; i < REF_CONTEXTS; ++i) {
       int j;
 #if CONFIG_EXT_REFS
@@ -244,11 +247,9 @@ static void read_frame_reference_mode_probs(AV1_COMMON *cm, aom_reader *r) {
         av1_diff_update_prob(r, &fc->comp_ref_prob[i][j], ACCT_STR);
 #endif  // CONFIG_EXT_REFS
     }
-#endif  // CONFIG_NEW_MULTISYMBOL
   }
 }
 
-#if !CONFIG_NEW_MULTISYMBOL
 static void update_mv_probs(aom_prob *p, int n, aom_reader *r) {
   int i;
   for (i = 0; i < n; ++i) av1_diff_update_prob(r, &p[i], ACCT_STR);
@@ -267,7 +268,7 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, aom_reader *r) {
 #endif
 
 static void inverse_transform_block(MACROBLOCKD *xd, int plane,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                     PREDICTION_MODE mode,
 #endif
                                     const TX_TYPE tx_type,
@@ -276,9 +277,12 @@ static void inverse_transform_block(MACROBLOCKD *xd, int plane,
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = pd->dqcoeff;
   av1_inverse_transform_block(xd, dqcoeff,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                               mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              xd->mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                               tx_type, tx_size, dst, stride, eob);
   memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
 }
@@ -337,10 +341,11 @@ static int av1_pvq_decode_helper(MACROBLOCKD *xd, tran_low_t *ref_coeff,
     pvq_dc_quant = 1;
   else {
     if (use_activity_masking)
-      pvq_dc_quant = OD_MAXI(
-          1, (quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift) *
-                     dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >>
-                 4);
+      pvq_dc_quant =
+          OD_MAXI(1,
+                  (quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift) *
+                          dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >>
+                      4);
     else
       pvq_dc_quant =
           OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift);
@@ -471,133 +476,6 @@ static int av1_pvq_decode_helper2(AV1_COMMON *cm, MACROBLOCKD *const xd,
 }
 #endif
 
-#if CONFIG_DPCM_INTRA
-static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                    const tran_low_t *dqcoeff, uint8_t *dst,
-                                    int dst_stride) {
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_dpcm_inv_txfm_add_func(tx1d_width);
-  for (int r = 0; r < tx1d_height; ++r) {
-    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
-    inverse_tx(dqcoeff, 1, tx_type_1d, dst);
-    dqcoeff += tx1d_width;
-    dst += dst_stride;
-  }
-}
-
-static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                    const tran_low_t *dqcoeff, uint8_t *dst,
-                                    int dst_stride) {
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_dpcm_inv_txfm_add_func(tx1d_height);
-  tran_low_t tx_buff[64];
-  for (int c = 0; c < tx1d_width; ++c, ++dqcoeff, ++dst) {
-    for (int r = 0; r < tx1d_height; ++r) {
-      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
-      tx_buff[r] = dqcoeff[r * tx1d_width];
-    }
-    inverse_tx(tx_buff, dst_stride, tx_type_1d, dst);
-  }
-}
-
-#if CONFIG_HIGHBITDEPTH
-static void hbd_process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                        int bd, const tran_low_t *dqcoeff,
-                                        uint8_t *dst8, int dst_stride) {
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  hbd_dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
-  for (int r = 0; r < tx1d_height; ++r) {
-    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
-    inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst, 1);
-    dqcoeff += tx1d_width;
-    dst += dst_stride;
-  }
-}
-
-static void hbd_process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                        int bd, const tran_low_t *dqcoeff,
-                                        uint8_t *dst8, int dst_stride) {
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  hbd_dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
-  tran_low_t tx_buff[64];
-  switch (tx1d_height) {
-    case 4: inverse_tx = av1_hbd_dpcm_inv_txfm_add_4_c; break;
-    case 8: inverse_tx = av1_hbd_dpcm_inv_txfm_add_8_c; break;
-    case 16: inverse_tx = av1_hbd_dpcm_inv_txfm_add_16_c; break;
-    case 32: inverse_tx = av1_hbd_dpcm_inv_txfm_add_32_c; break;
-    default: assert(0);
-  }
-
-  for (int c = 0; c < tx1d_width; ++c, ++dqcoeff, ++dst) {
-    for (int r = 0; r < tx1d_height; ++r) {
-      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
-      tx_buff[r] = dqcoeff[r * tx1d_width];
-    }
-    inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst, 0);
-  }
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-static void inverse_transform_block_dpcm(MACROBLOCKD *xd, int plane,
-                                         PREDICTION_MODE mode, TX_SIZE tx_size,
-                                         TX_TYPE tx_type, uint8_t *dst,
-                                         int dst_stride, int16_t scan_line) {
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *const dqcoeff = pd->dqcoeff;
-  TX_TYPE_1D tx_type_1d = DCT_1D;
-  switch (tx_type) {
-    case IDTX: tx_type_1d = IDTX_1D; break;
-    case V_DCT:
-      assert(mode == H_PRED);
-      tx_type_1d = DCT_1D;
-      break;
-    case H_DCT:
-      assert(mode == V_PRED);
-      tx_type_1d = DCT_1D;
-      break;
-    default: assert(0);
-  }
-  switch (mode) {
-    case V_PRED:
-#if CONFIG_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, dqcoeff, dst,
-                                    dst_stride);
-      } else {
-#endif  // CONFIG_HIGHBITDEPTH
-        process_block_dpcm_vert(tx_size, tx_type_1d, dqcoeff, dst, dst_stride);
-#if CONFIG_HIGHBITDEPTH
-      }
-#endif  // CONFIG_HIGHBITDEPTH
-      break;
-    case H_PRED:
-#if CONFIG_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, dqcoeff, dst,
-                                    dst_stride);
-      } else {
-#endif  // CONFIG_HIGHBITDEPTH
-        process_block_dpcm_horz(tx_size, tx_type_1d, dqcoeff, dst, dst_stride);
-#if CONFIG_HIGHBITDEPTH
-      }
-#endif  // CONFIG_HIGHBITDEPTH
-      break;
-    default: assert(0);
-  }
-  memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
-}
-#endif  // CONFIG_DPCM_INTRA
-
 static void predict_and_reconstruct_intra_block(
     AV1_COMMON *cm, MACROBLOCKD *const xd, aom_reader *const r,
     MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) {
@@ -606,7 +484,7 @@ static void predict_and_reconstruct_intra_block(
 #if CONFIG_PVQ
   (void)r;
 #endif
-  av1_predict_intra_block_facade(xd, plane, block_idx, col, row, tx_size);
+  av1_predict_intra_block_facade(cm, xd, plane, block_idx, col, row, tx_size);
 
   if (!mbmi->skip) {
 #if !CONFIG_PVQ
@@ -631,25 +509,12 @@ static void predict_and_reconstruct_intra_block(
     if (eob) {
       uint8_t *dst =
           &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
-#if CONFIG_DPCM_INTRA || CONFIG_LGT
-      const PREDICTION_MODE mode =
-          get_prediction_mode(xd->mi[0], plane, tx_size, block_idx);
-#if CONFIG_DPCM_INTRA
-      if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
-        inverse_transform_block_dpcm(xd, plane, mode, tx_size, tx_type, dst,
-                                     pd->dst.stride, max_scan_line);
-      } else {
-#endif  // CONFIG_DPCM_INTRA
-#endif  // CONFIG_DPCM_INTRA || CONFIG_LGT
-        inverse_transform_block(xd, plane,
-#if CONFIG_LGT
-                                mode,
-#endif
-                                tx_type, tx_size, dst, pd->dst.stride,
-                                max_scan_line, eob);
-#if CONFIG_DPCM_INTRA
-      }
-#endif  // CONFIG_DPCM_INTRA
+      inverse_transform_block(xd, plane,
+#if CONFIG_LGT_FROM_PRED
+                              mbmi->mode,
+#endif
+                              tx_type, tx_size, dst, pd->dst.stride,
+                              max_scan_line, eob);
     }
 #else   // !CONFIG_PVQ
     const TX_TYPE tx_type =
@@ -658,21 +523,10 @@ static void predict_and_reconstruct_intra_block(
 #endif  // !CONFIG_PVQ
   }
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y) {
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-#if CONFIG_CHROMA_SUB8X8
-    const BLOCK_SIZE plane_bsize =
-        AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
-#else
-    const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
-#endif
-    uint8_t *dst =
-        &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
-    // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
-    // intra predicted.
-    cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size, plane_bsize);
+  if (plane == AOM_PLANE_Y && xd->cfl->store_y) {
+    cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type);
   }
-#endif
+#endif  // CONFIG_CFL
 }
 
 #if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE
@@ -714,7 +568,7 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
         &max_scan_line, r, mbmi->segment_id);
 #endif  // CONFIG_LV_MAP
     inverse_transform_block(xd, plane,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                             mbmi->mode,
 #endif
                             tx_type, plane_tx_size,
@@ -729,7 +583,8 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
     if (is_qttx) assert(blk_row == 0 && blk_col == 0 && block == 0);
 #else
     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    assert(sub_txs < tx_size);
+    assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
+    assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
 #endif
     const int bsl = tx_size_wide_unit[sub_txs];
     int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
@@ -801,7 +656,7 @@ static int reconstruct_inter_block(AV1_COMMON *cm, MACROBLOCKD *const xd,
       &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
   if (eob)
     inverse_transform_block(xd, plane,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                             xd->mi[0]->mbmi.mode,
 #endif
                             tx_type, tx_size, dst, pd->dst.stride,
@@ -961,13 +816,13 @@ static void set_param_topblock(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 static void set_ref(AV1_COMMON *const cm, MACROBLOCKD *const xd, int idx,
                     int mi_row, int mi_col) {
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   RefBuffer *ref_buffer =
       has_second_ref(mbmi) ? &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]
                            : &cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME];
 #else
   RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   xd->block_refs[idx] = ref_buffer;
   if (!av1_is_valid_scale(&ref_buffer->sf))
     aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
@@ -1006,9 +861,9 @@ static void dec_predict_b_extend(
                             mi_row_ori, mi_col_ori);
   set_ref(cm, xd, 0, mi_row_pred, mi_col_pred);
   if (has_second_ref(&xd->mi[0]->mbmi)
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
       || is_inter_singleref_comp_mode(xd->mi[0]->mbmi.mode)
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
           )
     set_ref(cm, xd, 1, mi_row_pred, mi_col_pred);
   if (!bextend) mbmi->tx_size = max_txsize_lookup[bsize_top];
@@ -1019,19 +874,13 @@ static void dec_predict_b_extend(
       (c >> xd->plane[plane].subsampling_x);
 
   if (!b_sub8x8)
-    av1_build_inter_predictor_sb_extend(&pbi->common, xd,
-#if CONFIG_EXT_INTER
-                                        mi_row_ori, mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                                        mi_row_pred, mi_col_pred, plane,
-                                        bsize_pred);
+    av1_build_inter_predictor_sb_extend(&pbi->common, xd, mi_row_ori,
+                                        mi_col_ori, mi_row_pred, mi_col_pred,
+                                        plane, bsize_pred);
   else
-    av1_build_inter_predictor_sb_sub8x8_extend(&pbi->common, xd,
-#if CONFIG_EXT_INTER
-                                               mi_row_ori, mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                                               mi_row_pred, mi_col_pred, plane,
-                                               bsize_pred, block);
+    av1_build_inter_predictor_sb_sub8x8_extend(
+        &pbi->common, xd, mi_row_ori, mi_col_ori, mi_row_pred, mi_col_pred,
+        plane, bsize_pred, block);
 }
 
 static void dec_extend_dir(AV1Decoder *const pbi, MACROBLOCKD *const xd,
@@ -1556,6 +1405,9 @@ static void dec_predict_sb_complex(AV1Decoder *const pbi, MACROBLOCKD *const xd,
       }
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
                            mi_row_top, mi_col_top, dst_buf, dst_stride,
@@ -1786,7 +1638,6 @@ static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
 #endif
   av1_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
 #endif  // CONFIG_SUPERTX
-
   if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
     const BLOCK_SIZE uv_subsize =
         ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
@@ -1803,6 +1654,94 @@ static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
   aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
 }
 
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+static void set_mode_info_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                                  int mi_row, int mi_col) {
+  const int offset = mi_row * cm->mi_stride + mi_col;
+  xd->mi = cm->mi_grid_visible + offset;
+  xd->mi[0] = &cm->mi[offset];
+}
+
+static void get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row,
+                             int mi_col, int bsize, int mode) {
+  uint8_t *pred_buf[4][MAX_MB_PLANE];
+  int pred_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+  // target block in pxl
+  int pxl_row = mi_row << MI_SIZE_LOG2;
+  int pxl_col = mi_col << MI_SIZE_LOG2;
+
+  int plane;
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    int len = sizeof(uint16_t);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE,
+                            len);
+  } else {
+#endif  // CONFIG_HIGHBITDEPTH
+    ASSIGN_ALIGNED_PTRS(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE);
+#if CONFIG_HIGHBITDEPTH
+  }
+#endif
+  av1_get_ext_blk_preds(cm, xd, bsize, mi_row, mi_col, pred_buf, pred_stride);
+  av1_get_ori_blk_pred(cm, xd, bsize, mi_row, mi_col, pred_buf[3], pred_stride);
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    build_ncobmc_intrpl_pred(cm, xd, plane, pxl_row, pxl_col, bsize, pred_buf,
+                             pred_stride, mode);
+  }
+}
+
+static void av1_get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                                 int bsize, const int mi_row, const int mi_col,
+                                 const NCOBMC_MODE modes) {
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+
+  assert(bsize >= BLOCK_8X8);
+
+  reset_xd_boundary(xd, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
+                    cm->mi_cols);
+  get_ncobmc_recon(cm, xd, mi_row, mi_col, bsize, modes);
+}
+
+static void recon_ncobmc_intrpl_pred(AV1_COMMON *const cm,
+                                     MACROBLOCKD *const xd, int mi_row,
+                                     int mi_col, BLOCK_SIZE bsize) {
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+  const int hbs = AOMMAX(mi_size_wide[bsize] / 2, mi_size_high[bsize] / 2);
+  const BLOCK_SIZE sqr_blk = bsize_2_sqr_bsize[bsize];
+  if (mi_width > mi_height) {
+    // horizontal partition
+    av1_get_ncobmc_recon(cm, xd, sqr_blk, mi_row, mi_col, mbmi->ncobmc_mode[0]);
+    xd->mi += hbs;
+    av1_get_ncobmc_recon(cm, xd, sqr_blk, mi_row, mi_col + hbs,
+                         mbmi->ncobmc_mode[1]);
+  } else if (mi_height > mi_width) {
+    // vertical partition
+    av1_get_ncobmc_recon(cm, xd, sqr_blk, mi_row, mi_col, mbmi->ncobmc_mode[0]);
+    xd->mi += hbs * xd->mi_stride;
+    av1_get_ncobmc_recon(cm, xd, sqr_blk, mi_row + hbs, mi_col,
+                         mbmi->ncobmc_mode[1]);
+  } else {
+    av1_get_ncobmc_recon(cm, xd, sqr_blk, mi_row, mi_col, mbmi->ncobmc_mode[0]);
+  }
+  set_mode_info_offsets(cm, xd, mi_row, mi_col);
+  // restore dst buffer and mode info
+  av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
+                       mi_col);
+}
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+
 static void decode_token_and_recon_block(AV1Decoder *const pbi,
                                          MACROBLOCKD *const xd, int mi_row,
                                          int mi_col, aom_reader *r,
@@ -1815,46 +1754,33 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
 
   set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis);
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8
+  CFL_CTX *const cfl = xd->cfl;
+  cfl->is_chroma_reference = is_chroma_reference(
+      mi_row, mi_col, bsize, cfl->subsampling_x, cfl->subsampling_y);
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8
 
-#if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
     int i;
     for (i = 0; i < MAX_SEGMENTS; i++) {
 #if CONFIG_EXT_DELTA_Q
-      xd->plane[0].seg_dequant[i][0] =
-          av1_dc_quant(av1_get_qindex(&cm->seg, i, xd->current_qindex),
-                       cm->y_dc_delta_q, cm->bit_depth);
-      xd->plane[0].seg_dequant[i][1] = av1_ac_quant(
-          av1_get_qindex(&cm->seg, i, xd->current_qindex), 0, cm->bit_depth);
-      xd->plane[1].seg_dequant[i][0] =
-          av1_dc_quant(av1_get_qindex(&cm->seg, i, xd->current_qindex),
-                       cm->uv_dc_delta_q, cm->bit_depth);
-      xd->plane[1].seg_dequant[i][1] =
-          av1_ac_quant(av1_get_qindex(&cm->seg, i, xd->current_qindex),
-                       cm->uv_ac_delta_q, cm->bit_depth);
-      xd->plane[2].seg_dequant[i][0] =
-          av1_dc_quant(av1_get_qindex(&cm->seg, i, xd->current_qindex),
-                       cm->uv_dc_delta_q, cm->bit_depth);
-      xd->plane[2].seg_dequant[i][1] =
-          av1_ac_quant(av1_get_qindex(&cm->seg, i, xd->current_qindex),
-                       cm->uv_ac_delta_q, cm->bit_depth);
+      const int current_qindex =
+          av1_get_qindex(&cm->seg, i, xd->current_qindex);
 #else
-      xd->plane[0].seg_dequant[i][0] =
-          av1_dc_quant(xd->current_qindex, cm->y_dc_delta_q, cm->bit_depth);
-      xd->plane[0].seg_dequant[i][1] =
-          av1_ac_quant(xd->current_qindex, 0, cm->bit_depth);
-      xd->plane[1].seg_dequant[i][0] =
-          av1_dc_quant(xd->current_qindex, cm->uv_dc_delta_q, cm->bit_depth);
-      xd->plane[1].seg_dequant[i][1] =
-          av1_ac_quant(xd->current_qindex, cm->uv_ac_delta_q, cm->bit_depth);
-      xd->plane[2].seg_dequant[i][0] =
-          av1_dc_quant(xd->current_qindex, cm->uv_dc_delta_q, cm->bit_depth);
-      xd->plane[2].seg_dequant[i][1] =
-          av1_ac_quant(xd->current_qindex, cm->uv_ac_delta_q, cm->bit_depth);
-#endif
+      const int current_qindex = xd->current_qindex;
+#endif  // CONFIG_EXT_DELTA_Q
+      int j;
+      for (j = 0; j < MAX_MB_PLANE; ++j) {
+        const int dc_delta_q = j == 0 ? cm->y_dc_delta_q : cm->uv_dc_delta_q;
+        const int ac_delta_q = j == 0 ? 0 : cm->uv_ac_delta_q;
+
+        xd->plane[j].seg_dequant[i][0] =
+            av1_dc_quant(current_qindex, dc_delta_q, cm->bit_depth);
+        xd->plane[j].seg_dequant[i][1] =
+            av1_ac_quant(current_qindex, ac_delta_q, cm->bit_depth);
+      }
     }
   }
-#endif
 
 #if CONFIG_CB4X4
   if (mbmi->skip) av1_reset_skip_context(xd, mi_row, mi_col, bsize);
@@ -1898,12 +1824,13 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
       int row_y, col_y, row_c, col_c;
       int plane;
 
-#if CONFIG_PALETTE
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
       for (plane = 0; plane <= 1; ++plane) {
         if (mbmi->palette_mode_info.palette_size[plane])
           av1_decode_palette_tokens(xd, plane, r);
       }
-#endif
+#endif  // !CONFIG_PVQ
 
       for (row_y = 0; row_y < tu_num_h_y; row_y++) {
         for (col_y = 0; col_y < tu_num_w_y; col_y++) {
@@ -1983,12 +1910,15 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
 #else  // CONFIG_COEF_INTERLEAVE
   if (!is_inter_block(mbmi)) {
     int plane;
-#if CONFIG_PALETTE
+
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
     for (plane = 0; plane <= 1; ++plane) {
       if (mbmi->palette_mode_info.palette_size[plane])
         av1_decode_palette_tokens(xd, plane, r);
     }
-#endif  // CONFIG_PALETTE
+#endif  // #if !CONFIG_PVQ
+
     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
       const struct macroblockd_plane *const pd = &xd->plane[plane];
       const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
@@ -2035,14 +1965,18 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
   } else {
     int ref;
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-    for (ref = 0; ref < 1 + is_inter_anyref_comp_mode(mbmi->mode); ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+    for (ref = 0; ref < 1 + is_inter_anyref_comp_mode(mbmi->mode); ++ref)
+#else
+    for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+    {
       const MV_REFERENCE_FRAME frame =
+#if CONFIG_COMPOUND_SINGLEREF
           has_second_ref(mbmi) ? mbmi->ref_frame[ref] : mbmi->ref_frame[0];
 #else
-    for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
-      const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+          mbmi->ref_frame[ref];
+#endif  // CONFIG_COMPOUND_SINGLEREF
       if (frame < LAST_FRAME) {
 #if CONFIG_INTRABC
         assert(is_intrabc_block(mbmi));
@@ -2079,7 +2013,15 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
 #endif
     }
 #endif  // CONFIG_MOTION_VAR
-
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+    if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT) {
+      int plane;
+      recon_ncobmc_intrpl_pred(cm, xd, mi_row, mi_col, bsize);
+      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+        get_pred_from_intrpl_buf(xd, mi_row, mi_col, bsize, plane);
+      }
+    }
+#endif
     // Reconstruction
     if (!mbmi->skip) {
       int eobtotal = 0;
@@ -2093,8 +2035,8 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
 #elif CONFIG_CB4X4
         const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 #else
-      const BLOCK_SIZE plane_bsize =
-          get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
+        const BLOCK_SIZE plane_bsize =
+            get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
 #endif
         const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
         const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
@@ -2116,7 +2058,8 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
         mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide);
         mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high);
 
-        const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
+        const TX_SIZE max_tx_size = get_vartx_max_txsize(
+            mbmi, plane_bsize, pd->subsampling_x || pd->subsampling_y);
         const int bh_var_tx = tx_size_high_unit[max_tx_size];
         const int bw_var_tx = tx_size_wide_unit[max_tx_size];
         int block = 0;
@@ -2152,13 +2095,25 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
       }
     }
   }
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8
+  if (mbmi->uv_mode != UV_CFL_PRED) {
+#if CONFIG_DEBUG
+    if (cfl->is_chroma_reference) {
+      cfl_clear_sub8x8_val(cfl);
+    }
+#endif
+    if (!cfl->is_chroma_reference && is_inter_block(mbmi)) {
+      cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
+    }
+  }
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8
 #endif  // CONFIG_COEF_INTERLEAVE
 
   int reader_corrupted_flag = aom_reader_has_error(r);
   aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
 }
 
-#if (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) && CONFIG_MOTION_VAR
+#if NC_MODE_INFO && CONFIG_MOTION_VAR
 static void detoken_and_recon_sb(AV1Decoder *const pbi, MACROBLOCKD *const xd,
                                  int mi_row, int mi_col, aom_reader *r,
                                  BLOCK_SIZE bsize) {
@@ -2210,6 +2165,9 @@ static void detoken_and_recon_sb(AV1Decoder *const pbi, MACROBLOCKD *const xd,
         detoken_and_recon_sb(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize);
         break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error NC_MODE_INFO+MOTION_VAR not yet supported for new HORZ/VERT_AB partitions
+#endif
       case PARTITION_HORZ_A:
         decode_token_and_recon_block(pbi, xd, mi_row, mi_col, r, bsize2);
         decode_token_and_recon_block(pbi, xd, mi_row, mi_col + hbs, r, bsize2);
@@ -2258,7 +2216,7 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
 #endif
                     bsize);
 
-#if !(CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT))
+#if !(CONFIG_MOTION_VAR && NC_MODE_INFO)
 #if CONFIG_SUPERTX
   if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
@@ -2273,13 +2231,8 @@ static PARTITION_TYPE read_partition(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_UNPOISON_PARTITION_CTX
   const int ctx =
       partition_plane_context(xd, mi_row, mi_col, has_rows, has_cols, bsize);
-  const aom_prob *const probs =
-      ctx < PARTITION_CONTEXTS ? cm->fc->partition_prob[ctx] : NULL;
-  FRAME_COUNTS *const counts = ctx < PARTITION_CONTEXTS ? xd->counts : NULL;
 #else
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  const aom_prob *const probs = cm->fc->partition_prob[ctx];
-  FRAME_COUNTS *const counts = xd->counts;
 #endif
   PARTITION_TYPE p;
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -2287,26 +2240,33 @@ static PARTITION_TYPE read_partition(AV1_COMMON *cm, MACROBLOCKD *xd,
 
   aom_cdf_prob *partition_cdf = (ctx >= 0) ? ec_ctx->partition_cdf[ctx] : NULL;
 
-  if (has_rows && has_cols)
+  if (has_rows && has_cols) {
 #if CONFIG_EXT_PARTITION_TYPES
-    if (bsize <= BLOCK_8X8)
-      p = (PARTITION_TYPE)aom_read_symbol(r, partition_cdf, PARTITION_TYPES,
-                                          ACCT_STR);
-    else
-      p = (PARTITION_TYPE)aom_read_symbol(r, partition_cdf, EXT_PARTITION_TYPES,
-                                          ACCT_STR);
+    const int num_partition_types =
+        (mi_width_log2_lookup[bsize] > mi_width_log2_lookup[BLOCK_8X8])
+            ? EXT_PARTITION_TYPES
+            : PARTITION_TYPES;
 #else
-    p = (PARTITION_TYPE)aom_read_symbol(r, partition_cdf, PARTITION_TYPES,
-                                        ACCT_STR);
+    const int num_partition_types = PARTITION_TYPES;
 #endif  // CONFIG_EXT_PARTITION_TYPES
-  else if (!has_rows && has_cols)
-    p = aom_read(r, probs[1], ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
-  else if (has_rows && !has_cols)
-    p = aom_read(r, probs[2], ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
-  else
+    p = (PARTITION_TYPE)aom_read_symbol(r, partition_cdf, num_partition_types,
+                                        ACCT_STR);
+  } else if (!has_rows && has_cols) {
+    assert(bsize > BLOCK_8X8);
+    aom_cdf_prob cdf[2];
+    partition_gather_vert_alike(cdf, partition_cdf);
+    assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
+    p = aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
+    // gather cols
+  } else if (has_rows && !has_cols) {
+    assert(bsize > BLOCK_8X8);
+    aom_cdf_prob cdf[2];
+    partition_gather_horz_alike(cdf, partition_cdf);
+    assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
+    p = aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
+  } else {
     p = PARTITION_SPLIT;
-
-  if (counts) ++counts->partition[ctx][p];
+  }
 
   return p;
 }
@@ -2341,6 +2301,9 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
   AV1_COMMON *const cm = &pbi->common;
   const int num_8x8_wh = mi_size_wide[bsize];
   const int hbs = num_8x8_wh >> 1;
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_EXT_PARTITION_TYPES_AB
+  const int qbs = num_8x8_wh >> 2;
+#endif
 #if CONFIG_CB4X4
   const int unify_bsize = 1;
 #else
@@ -2349,9 +2312,11 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
 #if CONFIG_EXT_PARTITION_TYPES
-  BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
   const int quarter_step = num_8x8_wh / 4;
   int i;
+#if !CONFIG_EXT_PARTITION_TYPES_AB
+  BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
 #endif
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -2370,6 +2335,15 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
                                                    has_rows, has_cols, bsize);
   subsize = subsize_lookup[partition][bsize];  // get_subsize(bsize, partition);
 
+  // Check the bitstream is conformant: if there is subsampling on the
+  // chroma planes, subsize must subsample to a valid block size.
+  const struct macroblockd_plane *const pd_u = &xd->plane[1];
+  if (get_plane_block_size(subsize, pd_u) == BLOCK_INVALID) {
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Block size %dx%d invalid with this subsampling mode",
+                       block_size_wide[subsize], block_size_high[subsize]);
+  }
+
 #if CONFIG_PVQ
   assert(partition < PARTITION_TYPES);
   assert(subsize < BLOCK_SIZES_ALL);
@@ -2387,187 +2361,105 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
 #endif
   }
 #endif  // CONFIG_SUPERTX
+
+#if CONFIG_SUPERTX
+#define DEC_BLOCK_STX_ARG supertx_enabled,
+#else
+#define DEC_BLOCK_STX_ARG
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+#define DEC_BLOCK_EPT_ARG partition,
+#else
+#define DEC_BLOCK_EPT_ARG
+#endif
+#define DEC_BLOCK(db_r, db_c, db_subsize)                   \
+  decode_block(pbi, xd, DEC_BLOCK_STX_ARG(db_r), (db_c), r, \
+               DEC_BLOCK_EPT_ARG(db_subsize))
+#define DEC_PARTITION(db_r, db_c, db_subsize) \
+  decode_partition(pbi, xd, DEC_BLOCK_STX_ARG(db_r), (db_c), r, (db_subsize))
+
   if (!hbs && !unify_bsize) {
     // calculate bmode block dimensions (log 2)
     xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
     xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
-    decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                 supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                 mi_row, mi_col, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                 partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                 subsize);
+    DEC_BLOCK(mi_row, mi_col, subsize);
   } else {
     switch (partition) {
-      case PARTITION_NONE:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                     mi_row, mi_col, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                     partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                     subsize);
-        break;
+      case PARTITION_NONE: DEC_BLOCK(mi_row, mi_col, subsize); break;
       case PARTITION_HORZ:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                     mi_row, mi_col, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                     partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                     subsize);
-        if (has_rows)
-          decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                       mi_row + hbs, mi_col, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                       partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                       subsize);
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        if (has_rows) DEC_BLOCK(mi_row + hbs, mi_col, subsize);
         break;
       case PARTITION_VERT:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                     mi_row, mi_col, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                     partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                     subsize);
-        if (has_cols)
-          decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                       mi_row, mi_col + hbs, r,
-#if CONFIG_EXT_PARTITION_TYPES
-                       partition,
-#endif  // CONFIG_EXT_PARTITION_TYPES
-                       subsize);
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        if (has_cols) DEC_BLOCK(mi_row, mi_col + hbs, subsize);
         break;
       case PARTITION_SPLIT:
-        decode_partition(pbi, xd,
-#if CONFIG_SUPERTX
-                         supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                         mi_row, mi_col, r, subsize);
-        decode_partition(pbi, xd,
-#if CONFIG_SUPERTX
-                         supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                         mi_row, mi_col + hbs, r, subsize);
-        decode_partition(pbi, xd,
-#if CONFIG_SUPERTX
-                         supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                         mi_row + hbs, mi_col, r, subsize);
-        decode_partition(pbi, xd,
-#if CONFIG_SUPERTX
-                         supertx_enabled,
-#endif  // CONFIG_SUPERTX
-                         mi_row + hbs, mi_col + hbs, r, subsize);
+        DEC_PARTITION(mi_row, mi_col, subsize);
+        DEC_PARTITION(mi_row, mi_col + hbs, subsize);
+        DEC_PARTITION(mi_row + hbs, mi_col, subsize);
+        DEC_PARTITION(mi_row + hbs, mi_col + hbs, subsize);
         break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
       case PARTITION_HORZ_A:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col + hbs, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row + hbs, mi_col, r, partition, subsize);
+        DEC_BLOCK(mi_row, mi_col, get_subsize(bsize, PARTITION_HORZ_4));
+        DEC_BLOCK(mi_row + qbs, mi_col, get_subsize(bsize, PARTITION_HORZ_4));
+        DEC_BLOCK(mi_row + hbs, mi_col, subsize);
         break;
       case PARTITION_HORZ_B:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, partition, subsize);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row + hbs, mi_col, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row + hbs, mi_col + hbs, r, partition, bsize2);
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        DEC_BLOCK(mi_row + hbs, mi_col, get_subsize(bsize, PARTITION_HORZ_4));
+        if (mi_row + 3 * qbs < cm->mi_rows)
+          DEC_BLOCK(mi_row + 3 * qbs, mi_col,
+                    get_subsize(bsize, PARTITION_HORZ_4));
         break;
       case PARTITION_VERT_A:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row + hbs, mi_col, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col + hbs, r, partition, subsize);
+        DEC_BLOCK(mi_row, mi_col, get_subsize(bsize, PARTITION_VERT_4));
+        DEC_BLOCK(mi_row, mi_col + qbs, get_subsize(bsize, PARTITION_VERT_4));
+        DEC_BLOCK(mi_row, mi_col + hbs, subsize);
         break;
       case PARTITION_VERT_B:
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col, r, partition, subsize);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row, mi_col + hbs, r, partition, bsize2);
-        decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                     supertx_enabled,
-#endif
-                     mi_row + hbs, mi_col + hbs, r, partition, bsize2);
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        DEC_BLOCK(mi_row, mi_col + hbs, get_subsize(bsize, PARTITION_VERT_4));
+        if (mi_col + 3 * qbs < cm->mi_cols)
+          DEC_BLOCK(mi_row, mi_col + 3 * qbs,
+                    get_subsize(bsize, PARTITION_VERT_4));
         break;
+#else
+      case PARTITION_HORZ_A:
+        DEC_BLOCK(mi_row, mi_col, bsize2);
+        DEC_BLOCK(mi_row, mi_col + hbs, bsize2);
+        DEC_BLOCK(mi_row + hbs, mi_col, subsize);
+        break;
+      case PARTITION_HORZ_B:
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        DEC_BLOCK(mi_row + hbs, mi_col, bsize2);
+        DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2);
+        break;
+      case PARTITION_VERT_A:
+        DEC_BLOCK(mi_row, mi_col, bsize2);
+        DEC_BLOCK(mi_row + hbs, mi_col, bsize2);
+        DEC_BLOCK(mi_row, mi_col + hbs, subsize);
+        break;
+      case PARTITION_VERT_B:
+        DEC_BLOCK(mi_row, mi_col, subsize);
+        DEC_BLOCK(mi_row, mi_col + hbs, bsize2);
+        DEC_BLOCK(mi_row + hbs, mi_col + hbs, bsize2);
+        break;
+#endif
       case PARTITION_HORZ_4:
         for (i = 0; i < 4; ++i) {
           int this_mi_row = mi_row + i * quarter_step;
           if (i > 0 && this_mi_row >= cm->mi_rows) break;
-
-          decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif
-                       this_mi_row, mi_col, r, partition, subsize);
+          DEC_BLOCK(this_mi_row, mi_col, subsize);
         }
         break;
       case PARTITION_VERT_4:
         for (i = 0; i < 4; ++i) {
           int this_mi_col = mi_col + i * quarter_step;
           if (i > 0 && this_mi_col >= cm->mi_cols) break;
-
-          decode_block(pbi, xd,
-#if CONFIG_SUPERTX
-                       supertx_enabled,
-#endif
-                       mi_row, this_mi_col, r, partition, subsize);
+          DEC_BLOCK(mi_row, this_mi_col, subsize);
         }
         break;
 #endif  // CONFIG_EXT_PARTITION_TYPES
@@ -2575,6 +2467,11 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
     }
   }
 
+#undef DEC_PARTITION
+#undef DEC_BLOCK
+#undef DEC_BLOCK_EPT_ARG
+#undef DEC_BLOCK_STX_ARG
+
 #if CONFIG_SUPERTX
   if (supertx_enabled && read_token) {
     uint8_t *dst_buf[3];
@@ -2583,24 +2480,20 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
 
     set_segment_id_supertx(cm, mi_row, mi_col, bsize);
 
-#if CONFIG_DELTA_Q
     if (cm->delta_q_present_flag) {
       for (i = 0; i < MAX_SEGMENTS; i++) {
-        xd->plane[0].seg_dequant[i][0] =
-            av1_dc_quant(xd->current_qindex, cm->y_dc_delta_q, cm->bit_depth);
-        xd->plane[0].seg_dequant[i][1] =
-            av1_ac_quant(xd->current_qindex, 0, cm->bit_depth);
-        xd->plane[1].seg_dequant[i][0] =
-            av1_dc_quant(xd->current_qindex, cm->uv_dc_delta_q, cm->bit_depth);
-        xd->plane[1].seg_dequant[i][1] =
-            av1_ac_quant(xd->current_qindex, cm->uv_ac_delta_q, cm->bit_depth);
-        xd->plane[2].seg_dequant[i][0] =
-            av1_dc_quant(xd->current_qindex, cm->uv_dc_delta_q, cm->bit_depth);
-        xd->plane[2].seg_dequant[i][1] =
-            av1_ac_quant(xd->current_qindex, cm->uv_ac_delta_q, cm->bit_depth);
+        int j;
+        for (j = 0; j < MAX_MB_PLANE; ++j) {
+          const int dc_delta_q = j == 0 ? cm->y_dc_delta_q : cm->uv_dc_delta_q;
+          const int ac_delta_q = j == 0 ? 0 : cm->uv_ac_delta_q;
+
+          xd->plane[j].seg_dequant[i][0] =
+              av1_dc_quant(xd->current_qindex, dc_delta_q, cm->bit_depth);
+          xd->plane[j].seg_dequant[i][1] =
+              av1_ac_quant(xd->current_qindex, ac_delta_q, cm->bit_depth);
+        }
       }
     }
-#endif
 
     xd->mi = cm->mi_grid_visible + offset;
     xd->mi[0] = cm->mi + offset;
@@ -2622,18 +2515,24 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
         const int eset =
             get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used);
         if (eset > 0) {
+          const TxSetType tx_set_type = get_ext_tx_set_type(
+              supertx_size, bsize, 1, cm->reduced_tx_set_used);
           const int packed_sym =
               aom_read_symbol(r, ec_ctx->inter_ext_tx_cdf[eset][supertx_size],
-                              ext_tx_cnt_inter[eset], ACCT_STR);
-          txfm = av1_ext_tx_inter_inv[eset][packed_sym];
+                              av1_num_ext_tx_set[tx_set_type], ACCT_STR);
+          txfm = av1_ext_tx_inv[tx_set_type][packed_sym];
+#if CONFIG_ENTROPY_STATS
           if (xd->counts) ++xd->counts->inter_ext_tx[eset][supertx_size][txfm];
+#endif  // CONFIG_ENTROPY_STATS
         }
       }
 #else
       if (supertx_size < TX_32X32) {
         txfm = aom_read_symbol(r, ec_ctx->inter_ext_tx_cdf[supertx_size],
                                TX_TYPES, ACCT_STR);
+#if CONFIG_ENTROPY_STATS
         if (xd->counts) ++xd->counts->inter_ext_tx[supertx_size][txfm];
+#endif  // CONFIG_ENTROPY_STATS
       }
 #endif  // CONFIG_EXT_TX
     }
@@ -2684,6 +2583,63 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
+#if CONFIG_LPF_SB
+  if (bsize == cm->sb_size) {
+    int filt_lvl;
+    if (mi_row == 0 && mi_col == 0) {
+      filt_lvl = aom_read_literal(r, 6, ACCT_STR);
+      cm->mi_grid_visible[0]->mbmi.reuse_sb_lvl = 0;
+      cm->mi_grid_visible[0]->mbmi.delta = 0;
+      cm->mi_grid_visible[0]->mbmi.sign = 0;
+    } else {
+      int prev_mi_row, prev_mi_col;
+      if (mi_col - MAX_MIB_SIZE < 0) {
+        prev_mi_row = mi_row - MAX_MIB_SIZE;
+        prev_mi_col = mi_col;
+      } else {
+        prev_mi_row = mi_row;
+        prev_mi_col = mi_col - MAX_MIB_SIZE;
+      }
+
+      MB_MODE_INFO *curr_mbmi =
+          &cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi;
+      MB_MODE_INFO *prev_mbmi =
+          &cm->mi_grid_visible[prev_mi_row * cm->mi_stride + prev_mi_col]->mbmi;
+      const uint8_t prev_lvl = prev_mbmi->filt_lvl;
+
+      const int reuse_ctx = prev_mbmi->reuse_sb_lvl;
+      const int reuse_prev_lvl = aom_read_symbol(
+          r, xd->tile_ctx->lpf_reuse_cdf[reuse_ctx], 2, ACCT_STR);
+      curr_mbmi->reuse_sb_lvl = reuse_prev_lvl;
+
+      if (reuse_prev_lvl) {
+        filt_lvl = prev_lvl;
+        curr_mbmi->delta = 0;
+        curr_mbmi->sign = 0;
+      } else {
+        const int delta_ctx = prev_mbmi->delta;
+        unsigned int delta = aom_read_symbol(
+            r, xd->tile_ctx->lpf_delta_cdf[delta_ctx], DELTA_RANGE, ACCT_STR);
+        curr_mbmi->delta = delta;
+        delta *= LPF_STEP;
+
+        if (delta) {
+          const int sign_ctx = prev_mbmi->sign;
+          const int sign = aom_read_symbol(
+              r, xd->tile_ctx->lpf_sign_cdf[reuse_ctx][sign_ctx], 2, ACCT_STR);
+          curr_mbmi->sign = sign;
+          filt_lvl = sign ? prev_lvl + delta : prev_lvl - delta;
+        } else {
+          filt_lvl = prev_lvl;
+          curr_mbmi->sign = 0;
+        }
+      }
+    }
+
+    av1_loop_filter_sb_level_init(cm, mi_row, mi_col, filt_lvl);
+  }
+#endif
+
 #if CONFIG_CDEF
   if (bsize == cm->sb_size) {
     int width_step = mi_size_wide[BLOCK_64X64];
@@ -2704,6 +2660,21 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
     }
   }
 #endif  // CONFIG_CDEF
+#if CONFIG_LOOP_RESTORATION
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    int rcol0, rcol1, rrow0, rrow1, nhtiles;
+    if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
+                                           &rcol0, &rcol1, &rrow0, &rrow1,
+                                           &nhtiles)) {
+      for (int rrow = rrow0; rrow < rrow1; ++rrow) {
+        for (int rcol = rcol0; rcol < rcol1; ++rcol) {
+          int rtile_idx = rcol + rrow * nhtiles;
+          loop_restoration_read_sb_coeffs(cm, xd, r, plane, rtile_idx);
+        }
+      }
+    }
+  }
+#endif
 }
 
 static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end,
@@ -2736,6 +2707,7 @@ static void setup_segmentation(AV1_COMMON *const cm,
 
   seg->update_map = 0;
   seg->update_data = 0;
+  seg->temporal_update = 0;
 
   seg->enabled = aom_rb_read_bit(rb);
   if (!seg->enabled) return;
@@ -2820,16 +2792,26 @@ static void decode_restoration_mode(AV1_COMMON *cm,
     cm->rst_info[1].restoration_tilesize = cm->rst_info[0].restoration_tilesize;
   }
   cm->rst_info[2].restoration_tilesize = cm->rst_info[1].restoration_tilesize;
+
+  cm->rst_info[0].procunit_width = cm->rst_info[0].procunit_height =
+      RESTORATION_PROC_UNIT_SIZE;
+  cm->rst_info[1].procunit_width = cm->rst_info[2].procunit_width =
+      RESTORATION_PROC_UNIT_SIZE >> cm->subsampling_x;
+  cm->rst_info[1].procunit_height = cm->rst_info[2].procunit_height =
+      RESTORATION_PROC_UNIT_SIZE >> cm->subsampling_y;
 }
 
-static void read_wiener_filter(WienerInfo *wiener_info,
+static void read_wiener_filter(int wiener_win, WienerInfo *wiener_info,
                                WienerInfo *ref_wiener_info, aom_reader *rb) {
-  wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] =
-      aom_read_primitive_refsubexpfin(
-          rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-          WIENER_FILT_TAP0_SUBEXP_K,
-          ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
-      WIENER_FILT_TAP0_MINV;
+  if (wiener_win == WIENER_WIN)
+    wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] =
+        aom_read_primitive_refsubexpfin(
+            rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+            WIENER_FILT_TAP0_SUBEXP_K,
+            ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
+        WIENER_FILT_TAP0_MINV;
+  else
+    wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] = 0;
   wiener_info->vfilter[1] = wiener_info->vfilter[WIENER_WIN - 2] =
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
@@ -2847,12 +2829,15 @@ static void read_wiener_filter(WienerInfo *wiener_info,
       -2 * (wiener_info->vfilter[0] + wiener_info->vfilter[1] +
             wiener_info->vfilter[2]);
 
-  wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] =
-      aom_read_primitive_refsubexpfin(
-          rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-          WIENER_FILT_TAP0_SUBEXP_K,
-          ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
-      WIENER_FILT_TAP0_MINV;
+  if (wiener_win == WIENER_WIN)
+    wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] =
+        aom_read_primitive_refsubexpfin(
+            rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+            WIENER_FILT_TAP0_SUBEXP_K,
+            ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
+        WIENER_FILT_TAP0_MINV;
+  else
+    wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] = 0;
   wiener_info->hfilter[1] = wiener_info->hfilter[WIENER_WIN - 2] =
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
@@ -2888,90 +2873,43 @@ static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
 
-static void decode_restoration(AV1_COMMON *cm, aom_reader *rb) {
-  int i, p;
-#if CONFIG_FRAME_SUPERRES
-  const int width = cm->superres_upscaled_width;
-  const int height = cm->superres_upscaled_height;
-#else
-  const int width = cm->width;
-  const int height = cm->height;
-#endif  // CONFIG_FRAME_SUPERRES
-  SgrprojInfo ref_sgrproj_info;
-  WienerInfo ref_wiener_info;
-  set_default_wiener(&ref_wiener_info);
-  set_default_sgrproj(&ref_sgrproj_info);
-  const int ntiles =
-      av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
-                          NULL, NULL, NULL, NULL);
-  const int ntiles_uv = av1_get_rest_ntiles(
-      ROUND_POWER_OF_TWO(width, cm->subsampling_x),
-      ROUND_POWER_OF_TWO(height, cm->subsampling_y),
-      cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL);
-  RestorationInfo *rsi = &cm->rst_info[0];
-  if (rsi->frame_restoration_type != RESTORE_NONE) {
-    if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
-      for (i = 0; i < ntiles; ++i) {
-        rsi->restoration_type[i] =
-            aom_read_tree(rb, av1_switchable_restore_tree,
-                          cm->fc->switchable_restore_prob, ACCT_STR);
-        if (rsi->restoration_type[i] == RESTORE_WIENER) {
-          read_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, rb);
-        } else if (rsi->restoration_type[i] == RESTORE_SGRPROJ) {
-          read_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, rb);
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
-      for (i = 0; i < ntiles; ++i) {
-        if (aom_read(rb, RESTORE_NONE_WIENER_PROB, ACCT_STR)) {
-          rsi->restoration_type[i] = RESTORE_WIENER;
-          read_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, rb);
-        } else {
-          rsi->restoration_type[i] = RESTORE_NONE;
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
-      for (i = 0; i < ntiles; ++i) {
-        if (aom_read(rb, RESTORE_NONE_SGRPROJ_PROB, ACCT_STR)) {
-          rsi->restoration_type[i] = RESTORE_SGRPROJ;
-          read_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, rb);
-        } else {
-          rsi->restoration_type[i] = RESTORE_NONE;
-        }
-      }
+static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
+                                            MACROBLOCKD *xd,
+                                            aom_reader *const r, int plane,
+                                            int rtile_idx) {
+  const RestorationInfo *rsi = cm->rst_info + plane;
+  if (rsi->frame_restoration_type == RESTORE_NONE) return;
+
+  const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
+  WienerInfo *wiener_info = xd->wiener_info + plane;
+  SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane;
+
+  if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
+    assert(plane == 0);
+    rsi->restoration_type[rtile_idx] =
+        aom_read_tree(r, av1_switchable_restore_tree,
+                      cm->fc->switchable_restore_prob, ACCT_STR);
+
+    if (rsi->restoration_type[rtile_idx] == RESTORE_WIENER) {
+      read_wiener_filter(wiener_win, &rsi->wiener_info[rtile_idx], wiener_info,
+                         r);
+    } else if (rsi->restoration_type[rtile_idx] == RESTORE_SGRPROJ) {
+      read_sgrproj_filter(&rsi->sgrproj_info[rtile_idx], sgrproj_info, r);
     }
-  }
-  for (p = 1; p < MAX_MB_PLANE; ++p) {
-    set_default_wiener(&ref_wiener_info);
-    set_default_sgrproj(&ref_sgrproj_info);
-    rsi = &cm->rst_info[p];
-    if (rsi->frame_restoration_type == RESTORE_WIENER) {
-      for (i = 0; i < ntiles_uv; ++i) {
-        if (ntiles_uv > 1)
-          rsi->restoration_type[i] =
-              aom_read(rb, RESTORE_NONE_WIENER_PROB, ACCT_STR) ? RESTORE_WIENER
-                                                               : RESTORE_NONE;
-        else
-          rsi->restoration_type[i] = RESTORE_WIENER;
-        if (rsi->restoration_type[i] == RESTORE_WIENER) {
-          read_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, rb);
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
-      for (i = 0; i < ntiles_uv; ++i) {
-        if (ntiles_uv > 1)
-          rsi->restoration_type[i] =
-              aom_read(rb, RESTORE_NONE_SGRPROJ_PROB, ACCT_STR)
-                  ? RESTORE_SGRPROJ
-                  : RESTORE_NONE;
-        else
-          rsi->restoration_type[i] = RESTORE_SGRPROJ;
-        if (rsi->restoration_type[i] == RESTORE_SGRPROJ) {
-          read_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, rb);
-        }
-      }
-    } else if (rsi->frame_restoration_type != RESTORE_NONE) {
-      assert(0);
+  } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
+    if (aom_read(r, RESTORE_NONE_WIENER_PROB, ACCT_STR)) {
+      rsi->restoration_type[rtile_idx] = RESTORE_WIENER;
+      read_wiener_filter(wiener_win, &rsi->wiener_info[rtile_idx], wiener_info,
+                         r);
+    } else {
+      rsi->restoration_type[rtile_idx] = RESTORE_NONE;
+    }
+  } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
+    if (aom_read(r, RESTORE_NONE_SGRPROJ_PROB, ACCT_STR)) {
+      rsi->restoration_type[rtile_idx] = RESTORE_SGRPROJ;
+      read_sgrproj_filter(&rsi->sgrproj_info[rtile_idx], sgrproj_info, r);
+    } else {
+      rsi->restoration_type[rtile_idx] = RESTORE_NONE;
     }
   }
 }
@@ -2979,13 +2917,18 @@ static void decode_restoration(AV1_COMMON *cm, aom_reader *rb) {
 
 static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
   struct loopfilter *lf = &cm->lf;
-  lf->filter_level = aom_rb_read_literal(rb, 6);
-#if CONFIG_UV_LVL
-  if (lf->filter_level > 0) {
+#if !CONFIG_LPF_SB
+#if CONFIG_LOOPFILTER_LEVEL
+  lf->filter_level[0] = aom_rb_read_literal(rb, 6);
+  lf->filter_level[1] = aom_rb_read_literal(rb, 6);
+  if (lf->filter_level[0] || lf->filter_level[1]) {
     lf->filter_level_u = aom_rb_read_literal(rb, 6);
     lf->filter_level_v = aom_rb_read_literal(rb, 6);
   }
+#else
+  lf->filter_level = aom_rb_read_literal(rb, 6);
 #endif
+#endif  // CONFIG_LPF_SB
   lf->sharpness_level = aom_rb_read_literal(rb, 3);
 
   // Read in loop filter deltas applied at the MB level based on mode or ref
@@ -3012,13 +2955,19 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
 #if CONFIG_CDEF
 static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
   int i;
-  cm->cdef_dering_damping = aom_rb_read_literal(rb, 1) + 5;
-  cm->cdef_clpf_damping = aom_rb_read_literal(rb, 2) + 3;
+#if CONFIG_CDEF_SINGLEPASS
+  cm->cdef_pri_damping = cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
+#else
+  cm->cdef_pri_damping = aom_rb_read_literal(rb, 1) + 5;
+  cm->cdef_sec_damping = aom_rb_read_literal(rb, 2) + 3;
+#endif
   cm->cdef_bits = aom_rb_read_literal(rb, 2);
   cm->nb_cdef_strengths = 1 << cm->cdef_bits;
   for (i = 0; i < cm->nb_cdef_strengths; i++) {
     cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
-    cm->cdef_uv_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
+    cm->cdef_uv_strengths[i] = cm->subsampling_x == cm->subsampling_y
+                                   ? aom_rb_read_literal(rb, CDEF_STRENGTH_BITS)
+                                   : 0;
   }
 }
 #endif  // CONFIG_CDEF
@@ -3116,28 +3065,20 @@ static void setup_superres(AV1_COMMON *const cm, struct aom_read_bit_buffer *rb,
   cm->superres_upscaled_width = *width;
   cm->superres_upscaled_height = *height;
   if (aom_rb_read_bit(rb)) {
-    cm->superres_scale_numerator =
+    cm->superres_scale_denominator =
         (uint8_t)aom_rb_read_literal(rb, SUPERRES_SCALE_BITS);
-    cm->superres_scale_numerator += SUPERRES_SCALE_NUMERATOR_MIN;
+    cm->superres_scale_denominator += SUPERRES_SCALE_DENOMINATOR_MIN;
     // Don't edit cm->width or cm->height directly, or the buffers won't get
     // resized correctly
-    av1_calculate_scaled_size(width, height, cm->superres_scale_numerator);
+    av1_calculate_scaled_superres_size(width, height,
+                                       cm->superres_scale_denominator);
   } else {
     // 1:1 scaling - ie. no scaling, scale not provided
-    cm->superres_scale_numerator = SCALE_DENOMINATOR;
+    cm->superres_scale_denominator = SCALE_NUMERATOR;
   }
 }
 #endif  // CONFIG_FRAME_SUPERRES
 
-static void resize_mv_buffer(AV1_COMMON *cm) {
-  aom_free(cm->cur_frame->mvs);
-  cm->cur_frame->mi_rows = cm->mi_rows;
-  cm->cur_frame->mi_cols = cm->mi_cols;
-  CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
-                  (MV_REF *)aom_calloc(cm->mi_rows * cm->mi_cols,
-                                       sizeof(*cm->cur_frame->mvs)));
-}
-
 static void resize_context_buffers(AV1_COMMON *cm, int width, int height) {
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
@@ -3164,10 +3105,10 @@ static void resize_context_buffers(AV1_COMMON *cm, int width, int height) {
     cm->width = width;
     cm->height = height;
   }
-  if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows ||
-      cm->mi_cols > cm->cur_frame->mi_cols) {
-    resize_mv_buffer(cm);
-  }
+
+  ensure_mv_buffer(cm->cur_frame, cm);
+  cm->cur_frame->width = cm->width;
+  cm->cur_frame->height = cm->height;
 }
 
 static void setup_frame_size(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
@@ -3211,6 +3152,15 @@ static void setup_frame_size(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
   pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
+static void setup_sb_size(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+  (void)rb;
+#if CONFIG_EXT_PARTITION
+  set_sb_size(cm, aom_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
+#else
+  set_sb_size(cm, BLOCK_64X64);
+#endif  // CONFIG_EXT_PARTITION
+}
+
 static INLINE int valid_ref_frame_img_fmt(aom_bit_depth_t ref_bit_depth,
                                           int ref_xss, int ref_yss,
                                           aom_bit_depth_t this_bit_depth,
@@ -3306,6 +3256,89 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm,
   pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
+static void read_tile_group_range(AV1Decoder *pbi,
+                                  struct aom_read_bit_buffer *const rb) {
+  AV1_COMMON *const cm = &pbi->common;
+  const int num_bits = cm->log2_tile_rows + cm->log2_tile_cols;
+  const int num_tiles =
+      cm->tile_rows * cm->tile_cols;  // Note: May be < (1<<num_bits)
+  pbi->tg_start = aom_rb_read_literal(rb, num_bits);
+  pbi->tg_size = 1 + aom_rb_read_literal(rb, num_bits);
+  if (pbi->tg_start + pbi->tg_size > num_tiles)
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Tile group extends past last tile in frame");
+}
+
+#if CONFIG_MAX_TILE
+
+// Same function as av1_read_uniform but reading from uncompresses header wb
+static int rb_read_uniform(struct aom_read_bit_buffer *const rb, int n) {
+  const int l = get_unsigned_bits(n);
+  const int m = (1 << l) - n;
+  const int v = aom_rb_read_literal(rb, l - 1);
+  assert(l != 0);
+  if (v < m)
+    return v;
+  else
+    return (v << 1) - m + aom_rb_read_literal(rb, 1);
+}
+
+static void read_tile_info_max_tile(AV1_COMMON *const cm,
+                                    struct aom_read_bit_buffer *const rb) {
+  int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  int width_sb = width_mi >> MAX_MIB_SIZE_LOG2;
+  int height_sb = height_mi >> MAX_MIB_SIZE_LOG2;
+  int start_sb, size_sb, i;
+
+  av1_get_tile_limits(cm);
+  cm->uniform_tile_spacing_flag = aom_rb_read_bit(rb);
+
+  // Read tile columns
+  if (cm->uniform_tile_spacing_flag) {
+    cm->log2_tile_cols = cm->min_log2_tile_cols;
+    while (cm->log2_tile_cols < cm->max_log2_tile_cols) {
+      if (!aom_rb_read_bit(rb)) {
+        break;
+      }
+      cm->log2_tile_cols++;
+    }
+  } else {
+    for (i = 0, start_sb = 0; width_sb > 0 && i < MAX_TILE_COLS; i++) {
+      size_sb = 1 + rb_read_uniform(rb, AOMMIN(width_sb, MAX_TILE_WIDTH_SB));
+      cm->tile_col_start_sb[i] = start_sb;
+      start_sb += size_sb;
+      width_sb -= size_sb;
+    }
+    cm->tile_cols = i;
+    cm->tile_col_start_sb[i] = start_sb + width_sb;
+  }
+  av1_calculate_tile_cols(cm);
+
+  // Read tile rows
+  if (cm->uniform_tile_spacing_flag) {
+    cm->log2_tile_rows = cm->min_log2_tile_rows;
+    while (cm->log2_tile_rows < cm->max_log2_tile_rows) {
+      if (!aom_rb_read_bit(rb)) {
+        break;
+      }
+      cm->log2_tile_rows++;
+    }
+  } else {
+    for (i = 0, start_sb = 0; height_sb > 0 && i < MAX_TILE_ROWS; i++) {
+      size_sb =
+          1 + rb_read_uniform(rb, AOMMIN(height_sb, cm->max_tile_height_sb));
+      cm->tile_row_start_sb[i] = start_sb;
+      start_sb += size_sb;
+      height_sb -= size_sb;
+    }
+    cm->tile_rows = i;
+    cm->tile_row_start_sb[i] = start_sb + height_sb;
+  }
+  av1_calculate_tile_rows(cm);
+}
+#endif
+
 static void read_tile_info(AV1Decoder *const pbi,
                            struct aom_read_bit_buffer *const rb) {
   AV1_COMMON *const cm = &pbi->common;
@@ -3357,23 +3390,34 @@ static void read_tile_info(AV1Decoder *const pbi,
 #endif
   } else {
 #endif  // CONFIG_EXT_TILE
-    int min_log2_tile_cols, max_log2_tile_cols, max_ones;
-    av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
-    // columns
-    max_ones = max_log2_tile_cols - min_log2_tile_cols;
-    cm->log2_tile_cols = min_log2_tile_cols;
-    while (max_ones-- && aom_rb_read_bit(rb)) cm->log2_tile_cols++;
+#if CONFIG_MAX_TILE
+    read_tile_info_max_tile(cm, rb);
+#else
+  int min_log2_tile_cols, max_log2_tile_cols, max_ones;
+  av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+  // columns
+  max_ones = max_log2_tile_cols - min_log2_tile_cols;
+  cm->log2_tile_cols = min_log2_tile_cols;
+  while (max_ones-- && aom_rb_read_bit(rb)) cm->log2_tile_cols++;
 
-    if (cm->log2_tile_cols > 6)
-      aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
-                         "Invalid number of tile columns");
+  if (cm->log2_tile_cols > 6)
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Invalid number of tile columns");
 
-    // rows
-    cm->log2_tile_rows = aom_rb_read_bit(rb);
-    if (cm->log2_tile_rows) cm->log2_tile_rows += aom_rb_read_bit(rb);
+  // rows
+  cm->log2_tile_rows = aom_rb_read_bit(rb);
+  if (cm->log2_tile_rows) cm->log2_tile_rows += aom_rb_read_bit(rb);
+
+  cm->tile_width =
+      get_tile_size(cm->mi_cols, cm->log2_tile_cols, &cm->tile_cols);
+  cm->tile_height =
+      get_tile_size(cm->mi_rows, cm->log2_tile_rows, &cm->tile_rows);
+
+#endif  // CONFIG_MAX_TILE
 #if CONFIG_DEPENDENT_HORZTILES
-    if (cm->log2_tile_rows != 0)
+    if (cm->tile_rows > 1)
       cm->dependent_horz_tiles = aom_rb_read_bit(rb);
     else
       cm->dependent_horz_tiles = 0;
@@ -3382,33 +3426,18 @@ static void read_tile_info(AV1Decoder *const pbi,
     cm->loop_filter_across_tiles_enabled = aom_rb_read_bit(rb);
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
-    cm->tile_cols = 1 << cm->log2_tile_cols;
-    cm->tile_rows = 1 << cm->log2_tile_rows;
-
-    cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
-    cm->tile_width >>= cm->log2_tile_cols;
-    cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
-    cm->tile_height >>= cm->log2_tile_rows;
-
-    // round to integer multiples of superblock size
-    cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
-    cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
-
     // tile size magnitude
     pbi->tile_size_bytes = aom_rb_read_literal(rb, 2) + 1;
 #if CONFIG_EXT_TILE
   }
 #endif  // CONFIG_EXT_TILE
 
+// each tile group header is in its own tile group OBU
+#if !CONFIG_OBU
   // Store an index to the location of the tile group information
   pbi->tg_size_bit_offset = rb->bit_offset;
-  pbi->tg_size = 1 << (cm->log2_tile_rows + cm->log2_tile_cols);
-  if (cm->log2_tile_rows + cm->log2_tile_cols > 0) {
-    pbi->tg_start =
-        aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
-    pbi->tg_size =
-        1 + aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
-  }
+  read_tile_group_range(pbi, rb);
+#endif
 }
 
 static int mem_get_varsize(const uint8_t *src, int sz) {
@@ -3605,9 +3634,10 @@ static void get_tile_buffer(const uint8_t *const data_end,
   *data += size;
 }
 
-static void get_tile_buffers(
-    AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end,
-    TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
+static void get_tile_buffers(AV1Decoder *pbi, const uint8_t *data,
+                             const uint8_t *data_end,
+                             TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
+                             int startTile, int endTile) {
   AV1_COMMON *const cm = &pbi->common;
   int r, c;
   const int tile_cols = cm->tile_cols;
@@ -3616,10 +3646,13 @@ static void get_tile_buffers(
   int first_tile_in_tg = 0;
   struct aom_read_bit_buffer rb_tg_hdr;
   uint8_t clear_data[MAX_AV1_HEADER_SIZE];
-  const int num_tiles = tile_rows * tile_cols;
-  const int num_bits = OD_ILOG(num_tiles) - 1;
+#if !CONFIG_OBU
   const size_t hdr_size = pbi->uncomp_hdr_size + pbi->first_partition_size;
   const int tg_size_bit_offset = pbi->tg_size_bit_offset;
+#else
+  const int tg_size_bit_offset = 0;
+#endif
+
 #if CONFIG_DEPENDENT_HORZTILES
   int tile_group_start_col = 0;
   int tile_group_start_row = 0;
@@ -3628,21 +3661,28 @@ static void get_tile_buffers(
   for (r = 0; r < tile_rows; ++r) {
     for (c = 0; c < tile_cols; ++c, ++tc) {
       TileBufferDec *const buf = &tile_buffers[r][c];
+#if CONFIG_OBU
+      const int is_last = (tc == endTile);
+      const size_t hdr_offset = 0;
+#else
       const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1);
       const size_t hdr_offset = (tc && tc == first_tile_in_tg) ? hdr_size : 0;
+#endif
+
+      if (tc < startTile || tc > endTile) continue;
 
+      if (data + hdr_offset >= data_end)
+        aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                           "Data ended before all tiles were read.");
       buf->col = c;
       if (hdr_offset) {
         init_read_bit_buffer(pbi, &rb_tg_hdr, data, data_end, clear_data);
         rb_tg_hdr.bit_offset = tg_size_bit_offset;
-        if (num_tiles) {
-          pbi->tg_start = aom_rb_read_literal(&rb_tg_hdr, num_bits);
-          pbi->tg_size = 1 + aom_rb_read_literal(&rb_tg_hdr, num_bits);
+        read_tile_group_range(pbi, &rb_tg_hdr);
 #if CONFIG_DEPENDENT_HORZTILES
-          tile_group_start_row = r;
-          tile_group_start_col = c;
+        tile_group_start_row = r;
+        tile_group_start_col = c;
 #endif
-        }
       }
       first_tile_in_tg += tc == first_tile_in_tg ? pbi->tg_size : 0;
       data += hdr_offset;
@@ -3665,10 +3705,6 @@ static void daala_dec_init(AV1_COMMON *const cm, daala_dec_ctx *daala_dec,
   // TODO(yushin) : activity masking info needs be signaled by a bitstream
   daala_dec->use_activity_masking = AV1_PVQ_ENABLE_ACTIVITY_MASKING;
 
-#if !CONFIG_DAALA_DIST
-  daala_dec->use_activity_masking = 0;
-#endif
-
   if (daala_dec->use_activity_masking)
     daala_dec->qm = OD_HVS_QM;
   else
@@ -3707,8 +3743,22 @@ static void daala_dec_init(AV1_COMMON *const cm, daala_dec_ctx *daala_dec,
 }
 #endif  // #if CONFIG_PVQ
 
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+static void dec_setup_across_tile_boundary_info(
+    const AV1_COMMON *const cm, const TileInfo *const tile_info) {
+  if (tile_info->mi_row_start >= tile_info->mi_row_end ||
+      tile_info->mi_col_start >= tile_info->mi_col_end)
+    return;
+
+  if (!cm->loop_filter_across_tiles_enabled) {
+    av1_setup_across_tile_boundary_info(cm, tile_info);
+  }
+}
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
 static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
-                                   const uint8_t *data_end) {
+                                   const uint8_t *data_end, int startTile,
+                                   int endTile) {
   AV1_COMMON *const cm = &pbi->common;
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   const int tile_cols = cm->tile_cols;
@@ -3776,7 +3826,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
     get_ls_tile_buffers(pbi, data, data_end, tile_buffers);
   else
 #endif  // CONFIG_EXT_TILE
-    get_tile_buffers(pbi, data, data_end, tile_buffers);
+    get_tile_buffers(pbi, data, data_end, tile_buffers, startTile, endTile);
 
   if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
     aom_free(pbi->tile_data);
@@ -3795,6 +3845,10 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
       const TileBufferDec *const buf = &tile_buffers[tile_row][tile_col];
       TileData *const td = pbi->tile_data + tile_cols * tile_row + tile_col;
 
+      if (tile_row * cm->tile_cols + tile_col < startTile ||
+          tile_row * cm->tile_cols + tile_col > endTile)
+        continue;
+
       td->cm = cm;
       td->xd = pbi->mb;
       td->xd.corrupted = 0;
@@ -3838,10 +3892,11 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
       td->xd.daala_dec.state.adapt = &td->tctx.pvq_context;
 #endif
 
-#if CONFIG_PALETTE
       td->xd.plane[0].color_index_map = td->color_index_map[0];
       td->xd.plane[1].color_index_map = td->color_index_map[1];
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+      td->xd.mrc_mask = td->mrc_mask;
+#endif  // CONFIG_MRC_TX
     }
   }
 
@@ -3855,6 +3910,11 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
     for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
       const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
       TileData *const td = pbi->tile_data + tile_cols * row + col;
+
+      if (tile_row * cm->tile_cols + tile_col < startTile ||
+          tile_row * cm->tile_cols + tile_col > endTile)
+        continue;
+
 #if CONFIG_ACCOUNTING
       if (pbi->acct_enabled) {
         td->bit_reader.accounting->last_tell_frac =
@@ -3874,8 +3934,16 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
 #else
       av1_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end);
 #endif
+#if CONFIG_LOOP_RESTORATION
+      for (int p = 0; p < MAX_MB_PLANE; ++p) {
+        set_default_wiener(td->xd.wiener_info + p);
+        set_default_sgrproj(td->xd.sgrproj_info + p);
+      }
+#endif  // CONFIG_LOOP_RESTORATION
 
-      av1_setup_across_tile_boundary_info(cm, &tile_info);
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      dec_setup_across_tile_boundary_info(cm, &tile_info);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
       for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
            mi_row += cm->mib_size) {
@@ -3885,15 +3953,22 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
 
         for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
              mi_col += cm->mib_size) {
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+          alloc_ncobmc_pred_buffer(&td->xd);
+          set_sb_mi_boundaries(cm, &td->xd, mi_row, mi_col);
+#endif
           decode_partition(pbi, &td->xd,
 #if CONFIG_SUPERTX
                            0,
 #endif  // CONFIG_SUPERTX
                            mi_row, mi_col, &td->bit_reader, cm->sb_size);
-#if (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) && CONFIG_MOTION_VAR
+#if NC_MODE_INFO && CONFIG_MOTION_VAR
           detoken_and_recon_sb(pbi, &td->xd, mi_row, mi_col, &td->bit_reader,
                                cm->sb_size);
 #endif
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+          free_ncobmc_pred_buffer(&td->xd);
+#endif
         }
         aom_merge_corrupted_flag(&pbi->mb.corrupted, td->xd.corrupted);
         if (pbi->mb.corrupted)
@@ -3902,7 +3977,9 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
       }
     }
 
+#if !CONFIG_OBU
     assert(mi_row > 0);
+#endif
 
 // when Parallel deblocking is enabled, deblocking should not
 // be interleaved with decoding. Instead, deblocking should be done
@@ -3942,19 +4019,27 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
 
 #if CONFIG_VAR_TX || CONFIG_CB4X4
 // Loopfilter the whole frame.
-#if CONFIG_UV_LVL
-  if (cm->lf.filter_level > 0) {
+#if CONFIG_LPF_SB
+  av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+                        cm->lf.filter_level, 0, 0, 0, 0);
+#else
+#if CONFIG_LOOPFILTER_LEVEL
+  if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level, 0, 0);
+                          cm->lf.filter_level[0], cm->lf.filter_level[1], 0, 0);
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level_u, 1, 0);
+                          cm->lf.filter_level_u, cm->lf.filter_level_u, 1, 0);
     av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                          cm->lf.filter_level_v, 2, 0);
+                          cm->lf.filter_level_v, cm->lf.filter_level_v, 2, 0);
   }
 #else
-  av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
-                        cm->lf.filter_level, 0, 0);
-#endif  // CONFIG_UV_LVL
+#if CONFIG_OBU
+  if (endTile == cm->tile_rows * cm->tile_cols - 1)
+#endif
+    av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+                          cm->lf.filter_level, 0, 0);
+#endif  // CONFIG_LOOPFILTER_LEVEL
+#endif  // CONFIG_LPF_SB
 #else
 #if CONFIG_PARALLEL_DEBLOCKING
   // Loopfilter all rows in the frame in the frame.
@@ -3997,11 +4082,16 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
 #if CONFIG_ANS
     return data_end;
 #else
+#if !CONFIG_OBU
   {
     // Get last tile data.
     TileData *const td = pbi->tile_data + tile_cols * tile_rows - 1;
     return aom_reader_find_end(&td->bit_reader);
   }
+#else
+  TileData *const td = pbi->tile_data + endTile;
+  return aom_reader_find_end(&td->bit_reader);
+#endif
 #endif  // CONFIG_ANS
 #if CONFIG_EXT_TILE
   }
@@ -4041,7 +4131,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
                        0,
 #endif
                        mi_row, mi_col, &tile_data->bit_reader, cm->sb_size);
-#if (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) && CONFIG_MOTION_VAR
+#if NC_MODE_INFO && CONFIG_MOTION_VAR
       detoken_and_recon_sb(pbi, &tile_data->xd, mi_row, mi_col,
                            &tile_data->bit_reader, cm->sb_size);
 #endif
@@ -4152,7 +4242,8 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
     get_ls_tile_buffers(pbi, data, data_end, tile_buffers);
   else
 #endif  // CONFIG_EXT_TILE
-    get_tile_buffers(pbi, data, data_end, tile_buffers);
+    get_tile_buffers(pbi, data, data_end, tile_buffers, 0,
+                     cm->tile_rows * cm->tile_cols - 1);
 
   for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
     // Sort the buffers in this tile row based on size in descending order.
@@ -4197,7 +4288,9 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
         av1_tile_init(tile_info, cm, tile_row, buf->col);
         av1_tile_init(&twd->xd.tile, cm, tile_row, buf->col);
 
-        av1_setup_across_tile_boundary_info(cm, tile_info);
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+        dec_setup_across_tile_boundary_info(cm, tile_info);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
         setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
                            &twd->bit_reader,
@@ -4220,10 +4313,8 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
         // Initialise the tile context from the frame context
         twd->tctx = *cm->fc;
         twd->xd.tile_ctx = &twd->tctx;
-#if CONFIG_PALETTE
         twd->xd.plane[0].color_index_map = twd->color_index_map[0];
         twd->xd.plane[1].color_index_map = twd->color_index_map[1];
-#endif  // CONFIG_PALETTE
 
         worker->had_error = 0;
         if (i == num_workers - 1 || tile_col == tile_cols_end - 1) {
@@ -4341,15 +4432,17 @@ static void read_bitdepth_colorspace_sampling(AV1_COMMON *cm,
 }
 
 #if CONFIG_REFERENCE_BUFFER
-void read_sequence_header(SequenceHeader *seq_params) {
+void read_sequence_header(SequenceHeader *seq_params,
+                          struct aom_read_bit_buffer *rb) {
   /* Placeholder for actually reading from the bitstream */
-  seq_params->frame_id_numbers_present_flag = FRAME_ID_NUMBERS_PRESENT_FLAG;
-  seq_params->frame_id_length_minus7 = FRAME_ID_LENGTH_MINUS7;
-  seq_params->delta_frame_id_length_minus2 = DELTA_FRAME_ID_LENGTH_MINUS2;
+  seq_params->frame_id_numbers_present_flag = aom_rb_read_bit(rb);
+  if (seq_params->frame_id_numbers_present_flag) {
+    seq_params->frame_id_length_minus7 = aom_rb_read_literal(rb, 4);
+    seq_params->delta_frame_id_length_minus2 = aom_rb_read_literal(rb, 4);
+  }
 }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 
-#if CONFIG_EXT_INTER
 static void read_compound_tools(AV1_COMMON *cm,
                                 struct aom_read_bit_buffer *rb) {
   (void)cm;
@@ -4373,7 +4466,6 @@ static void read_compound_tools(AV1_COMMON *cm,
   }
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 }
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_VAR_REFS
 static void check_valid_ref_frames(AV1_COMMON *cm) {
@@ -4407,6 +4499,142 @@ static void check_valid_ref_frames(AV1_COMMON *cm) {
 }
 #endif  // CONFIG_VAR_REFS
 
+#if CONFIG_GLOBAL_MOTION
+static int read_global_motion_params(WarpedMotionParams *params,
+                                     const WarpedMotionParams *ref_params,
+                                     struct aom_read_bit_buffer *rb,
+                                     int allow_hp) {
+  TransformationType type = aom_rb_read_bit(rb);
+  if (type != IDENTITY) {
+#if GLOBAL_TRANS_TYPES > 4
+    type += aom_rb_read_literal(rb, GLOBAL_TYPE_BITS);
+#else
+    if (aom_rb_read_bit(rb))
+      type = ROTZOOM;
+    else
+      type = aom_rb_read_bit(rb) ? TRANSLATION : AFFINE;
+#endif  // GLOBAL_TRANS_TYPES > 4
+  }
+
+  int trans_bits;
+  int trans_dec_factor;
+  int trans_prec_diff;
+  *params = default_warp_params;
+  params->wmtype = type;
+  switch (type) {
+    case HOMOGRAPHY:
+    case HORTRAPEZOID:
+    case VERTRAPEZOID:
+      if (type != HORTRAPEZOID)
+        params->wmmat[6] =
+            aom_rb_read_signed_primitive_refsubexpfin(
+                rb, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
+                (ref_params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF)) *
+            GM_ROW3HOMO_DECODE_FACTOR;
+      if (type != VERTRAPEZOID)
+        params->wmmat[7] =
+            aom_rb_read_signed_primitive_refsubexpfin(
+                rb, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
+                (ref_params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF)) *
+            GM_ROW3HOMO_DECODE_FACTOR;
+    case AFFINE:
+    case ROTZOOM:
+      params->wmmat[2] = aom_rb_read_signed_primitive_refsubexpfin(
+                             rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+                             (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
+                                 (1 << GM_ALPHA_PREC_BITS)) *
+                             GM_ALPHA_DECODE_FACTOR +
+                         (1 << WARPEDMODEL_PREC_BITS);
+      if (type != VERTRAPEZOID)
+        params->wmmat[3] = aom_rb_read_signed_primitive_refsubexpfin(
+                               rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+                               (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF)) *
+                           GM_ALPHA_DECODE_FACTOR;
+      if (type >= AFFINE) {
+        if (type != HORTRAPEZOID)
+          params->wmmat[4] = aom_rb_read_signed_primitive_refsubexpfin(
+                                 rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+                                 (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF)) *
+                             GM_ALPHA_DECODE_FACTOR;
+        params->wmmat[5] = aom_rb_read_signed_primitive_refsubexpfin(
+                               rb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+                               (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
+                                   (1 << GM_ALPHA_PREC_BITS)) *
+                               GM_ALPHA_DECODE_FACTOR +
+                           (1 << WARPEDMODEL_PREC_BITS);
+      } else {
+        params->wmmat[4] = -params->wmmat[3];
+        params->wmmat[5] = params->wmmat[2];
+      }
+    // fallthrough intended
+    case TRANSLATION:
+      trans_bits = (type == TRANSLATION) ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
+                                         : GM_ABS_TRANS_BITS;
+      trans_dec_factor = (type == TRANSLATION)
+                             ? GM_TRANS_ONLY_DECODE_FACTOR * (1 << !allow_hp)
+                             : GM_TRANS_DECODE_FACTOR;
+      trans_prec_diff = (type == TRANSLATION)
+                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
+                            : GM_TRANS_PREC_DIFF;
+      params->wmmat[0] = aom_rb_read_signed_primitive_refsubexpfin(
+                             rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+                             (ref_params->wmmat[0] >> trans_prec_diff)) *
+                         trans_dec_factor;
+      params->wmmat[1] = aom_rb_read_signed_primitive_refsubexpfin(
+                             rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+                             (ref_params->wmmat[1] >> trans_prec_diff)) *
+                         trans_dec_factor;
+    case IDENTITY: break;
+    default: assert(0);
+  }
+  if (params->wmtype <= AFFINE) {
+    int good_shear_params = get_shear_params(params);
+    if (!good_shear_params) return 0;
+  }
+
+  return 1;
+}
+
+static void read_global_motion(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+  int frame;
+  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+    const WarpedMotionParams *ref_params =
+        cm->error_resilient_mode ? &default_warp_params
+                                 : &cm->prev_frame->global_motion[frame];
+    int good_params = read_global_motion_params(
+        &cm->global_motion[frame], ref_params, rb, cm->allow_high_precision_mv);
+    if (!good_params)
+      aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                         "Invalid shear parameters for global motion.");
+
+    // TODO(sarahparker, debargha): The logic in the commented out code below
+    // does not work currently and causes mismatches when resize is on. Fix it
+    // before turning the optimization back on.
+    /*
+    YV12_BUFFER_CONFIG *ref_buf = get_ref_frame(cm, frame);
+    if (cm->width == ref_buf->y_crop_width &&
+        cm->height == ref_buf->y_crop_height) {
+      read_global_motion_params(&cm->global_motion[frame],
+                                &cm->prev_frame->global_motion[frame], rb,
+                                cm->allow_high_precision_mv);
+    } else {
+      cm->global_motion[frame] = default_warp_params;
+    }
+    */
+    /*
+    printf("Dec Ref %d [%d/%d]: %d %d %d %d\n",
+           frame, cm->current_video_frame, cm->show_frame,
+           cm->global_motion[frame].wmmat[0],
+           cm->global_motion[frame].wmmat[1],
+           cm->global_motion[frame].wmmat[2],
+           cm->global_motion[frame].wmmat[3]);
+           */
+  }
+  memcpy(cm->cur_frame->global_motion, cm->global_motion,
+         TOTAL_REFS_PER_FRAME * sizeof(WarpedMotionParams));
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 static size_t read_uncompressed_header(AV1Decoder *pbi,
                                        struct aom_read_bit_buffer *rb) {
   AV1_COMMON *const cm = &pbi->common;
@@ -4416,11 +4644,6 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
   int i, mask, ref_index = 0;
   size_t sz;
 
-#if CONFIG_REFERENCE_BUFFER
-  /* TODO: Move outside frame loop or inside key-frame branch */
-  read_sequence_header(&pbi->seq_params);
-#endif
-
   cm->last_frame_type = cm->frame_type;
   cm->last_intra_only = cm->intra_only;
 
@@ -4429,6 +4652,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
   cm->is_reference_frame = 1;
 #endif  // CONFIG_EXT_REFS
 
+#if !CONFIG_OBU
   if (aom_rb_read_literal(rb, 2) != AOM_FRAME_MARKER)
     aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
                        "Invalid frame marker");
@@ -4441,11 +4665,12 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
   if (cm->profile >= MAX_SUPPORTED_PROFILE)
     aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
                        "Unsupported bitstream profile");
+#endif
 
 #if CONFIG_EXT_TILE
   cm->large_scale_tile = aom_rb_read_literal(rb, 1);
 #if CONFIG_REFERENCE_BUFFER
-  if (cm->large_scale_tile) pbi->seq_params.frame_id_numbers_present_flag = 0;
+  if (cm->large_scale_tile) cm->seq_params.frame_id_numbers_present_flag = 0;
 #endif  // CONFIG_REFERENCE_BUFFER
 #endif  // CONFIG_EXT_TILE
 
@@ -4456,11 +4681,11 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
     const int existing_frame_idx = aom_rb_read_literal(rb, 3);
     const int frame_to_show = cm->ref_frame_map[existing_frame_idx];
 #if CONFIG_REFERENCE_BUFFER
-    if (pbi->seq_params.frame_id_numbers_present_flag) {
-      int frame_id_length = pbi->seq_params.frame_id_length_minus7 + 7;
+    if (cm->seq_params.frame_id_numbers_present_flag) {
+      int frame_id_length = cm->seq_params.frame_id_length_minus7 + 7;
       int display_frame_id = aom_rb_read_literal(rb, frame_id_length);
       /* Compare display_frame_id with ref_frame_id and check valid for
-      * referencing */
+       * referencing */
       if (display_frame_id != cm->ref_frame_id[existing_frame_idx] ||
           cm->valid_for_referencing[existing_frame_idx] == 0)
         aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
@@ -4477,7 +4702,12 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
     ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
     unlock_buffer_pool(pool);
 
+#if CONFIG_LOOPFILTER_LEVEL
+    cm->lf.filter_level[0] = 0;
+    cm->lf.filter_level[1] = 0;
+#else
     cm->lf.filter_level = 0;
+#endif
     cm->show_frame = 1;
     pbi->refresh_frame_flags = 0;
 
@@ -4489,13 +4719,24 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
     return 0;
   }
 
+#if !CONFIG_OBU
   cm->frame_type = (FRAME_TYPE)aom_rb_read_bit(rb);
   cm->show_frame = aom_rb_read_bit(rb);
+  if (cm->frame_type != KEY_FRAME)
+    cm->intra_only = cm->show_frame ? 0 : aom_rb_read_bit(rb);
+#else
+  cm->frame_type = (FRAME_TYPE)aom_rb_read_literal(rb, 2);  // 2 bits
+  cm->show_frame = aom_rb_read_bit(rb);
+  cm->intra_only = cm->frame_type == INTRA_ONLY_FRAME;
+#endif
   cm->error_resilient_mode = aom_rb_read_bit(rb);
 #if CONFIG_REFERENCE_BUFFER
-  if (pbi->seq_params.frame_id_numbers_present_flag) {
-    int frame_id_length = pbi->seq_params.frame_id_length_minus7 + 7;
-    int diff_len = pbi->seq_params.delta_frame_id_length_minus2 + 2;
+#if !CONFIG_OBU
+  if (frame_is_intra_only(cm)) read_sequence_header(&cm->seq_params, rb);
+#endif  // !CONFIG_OBU
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    int frame_id_length = cm->seq_params.frame_id_length_minus7 + 7;
+    int diff_len = cm->seq_params.delta_frame_id_length_minus2 + 2;
     int prev_frame_id = 0;
     if (cm->frame_type != KEY_FRAME) {
       prev_frame_id = cm->current_frame_id;
@@ -4533,13 +4774,11 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       }
     }
   }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
   if (cm->frame_type == KEY_FRAME) {
-    if (!av1_read_sync_code(rb))
-      aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-                         "Invalid frame sync code");
-
+#if !CONFIG_OBU
     read_bitdepth_colorspace_sampling(cm, rb, pbi->allow_lowbitdepth);
+#endif
     pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
 
     for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
@@ -4551,6 +4790,8 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
     }
 
     setup_frame_size(cm, rb);
+    setup_sb_size(cm, rb);
+
     if (pbi->need_resync) {
       memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
       pbi->need_resync = 0;
@@ -4558,20 +4799,30 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
 #if CONFIG_ANS && ANS_MAX_SYMBOLS
     cm->ans_window_size_log2 = aom_rb_read_literal(rb, 4) + 8;
 #endif  // CONFIG_ANS && ANS_MAX_SYMBOLS
-#if CONFIG_PALETTE || CONFIG_INTRABC
     cm->allow_screen_content_tools = aom_rb_read_bit(rb);
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
+#if CONFIG_AMVR
+    if (cm->allow_screen_content_tools) {
+      if (aom_rb_read_bit(rb)) {
+        cm->seq_mv_precision_level = 2;
+      } else {
+        cm->seq_mv_precision_level = aom_rb_read_bit(rb) ? 0 : 1;
+      }
+    } else {
+      cm->seq_mv_precision_level = 0;
+    }
+#endif
 #if CONFIG_TEMPMV_SIGNALING
     cm->use_prev_frame_mvs = 0;
 #endif
   } else {
-    cm->intra_only = cm->show_frame ? 0 : aom_rb_read_bit(rb);
-#if CONFIG_PALETTE || CONFIG_INTRABC
     if (cm->intra_only) cm->allow_screen_content_tools = aom_rb_read_bit(rb);
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
 #if CONFIG_TEMPMV_SIGNALING
     if (cm->intra_only || cm->error_resilient_mode) cm->use_prev_frame_mvs = 0;
 #endif
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+// The only way to reset all frame contexts to their default values is with a
+// keyframe.
+#else
     if (cm->error_resilient_mode) {
       cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL;
     } else {
@@ -4589,16 +4840,16 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
                                         : RESET_FRAME_CONTEXT_CURRENT;
       }
     }
+#endif
 
     if (cm->intra_only) {
-      if (!av1_read_sync_code(rb))
-        aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
-                           "Invalid frame sync code");
-
+#if !CONFIG_OBU
       read_bitdepth_colorspace_sampling(cm, rb, pbi->allow_lowbitdepth);
+#endif
 
       pbi->refresh_frame_flags = aom_rb_read_literal(rb, REF_FRAMES);
       setup_frame_size(cm, rb);
+      setup_sb_size(cm, rb);
       if (pbi->need_resync) {
         memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
         pbi->need_resync = 0;
@@ -4607,7 +4858,13 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       cm->ans_window_size_log2 = aom_rb_read_literal(rb, 4) + 8;
 #endif
     } else if (pbi->need_resync != 1) { /* Skip if need resync */
+#if CONFIG_OBU
+      pbi->refresh_frame_flags = (cm->frame_type == S_FRAME)
+                                     ? ~(1 << REF_FRAMES)
+                                     : aom_rb_read_literal(rb, REF_FRAMES);
+#else
       pbi->refresh_frame_flags = aom_rb_read_literal(rb, REF_FRAMES);
+#endif
 
 #if CONFIG_EXT_REFS
       if (!pbi->refresh_frame_flags) {
@@ -4620,27 +4877,51 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
         const int ref = aom_rb_read_literal(rb, REF_FRAMES_LOG2);
         const int idx = cm->ref_frame_map[ref];
+
+        // Most of the time, streams start with a keyframe. In that case,
+        // ref_frame_map will have been filled in at that point and will not
+        // contain any -1's. However, streams are explicitly allowed to start
+        // with an intra-only frame, so long as they don't then signal a
+        // reference to a slot that hasn't been set yet. That's what we are
+        // checking here.
+        if (idx == -1)
+          aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                             "Inter frame requests nonexistent reference");
+
         RefBuffer *const ref_frame = &cm->frame_refs[i];
         ref_frame->idx = idx;
         ref_frame->buf = &frame_bufs[idx].buf;
+#if CONFIG_FRAME_SIGN_BIAS
+#if CONFIG_OBU
+        // NOTE: For the scenario of (cm->frame_type != S_FRAME),
+        // ref_frame_sign_bias will be reset based on frame offsets.
+        cm->ref_frame_sign_bias[LAST_FRAME + i] = 0;
+#endif  // CONFIG_OBU
+#else   // !CONFIG_FRAME_SIGN_BIAS
+#if CONFIG_OBU
+        cm->ref_frame_sign_bias[LAST_FRAME + i] =
+            (cm->frame_type == S_FRAME) ? 0 : aom_rb_read_bit(rb);
+#else   // !CONFIG_OBU
         cm->ref_frame_sign_bias[LAST_FRAME + i] = aom_rb_read_bit(rb);
+#endif  // CONFIG_OBU
+#endif  // CONFIG_FRAME_SIGN_BIAS
 #if CONFIG_REFERENCE_BUFFER
-        if (pbi->seq_params.frame_id_numbers_present_flag) {
-          int frame_id_length = pbi->seq_params.frame_id_length_minus7 + 7;
-          int diff_len = pbi->seq_params.delta_frame_id_length_minus2 + 2;
+        if (cm->seq_params.frame_id_numbers_present_flag) {
+          int frame_id_length = cm->seq_params.frame_id_length_minus7 + 7;
+          int diff_len = cm->seq_params.delta_frame_id_length_minus2 + 2;
           int delta_frame_id_minus1 = aom_rb_read_literal(rb, diff_len);
           int ref_frame_id =
               ((cm->current_frame_id - (delta_frame_id_minus1 + 1) +
                 (1 << frame_id_length)) %
                (1 << frame_id_length));
           /* Compare values derived from delta_frame_id_minus1 and
-          * refresh_frame_flags. Also, check valid for referencing */
+           * refresh_frame_flags. Also, check valid for referencing */
           if (ref_frame_id != cm->ref_frame_id[ref] ||
               cm->valid_for_referencing[ref] == 0)
             aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                                "Reference buffer frame ID mismatch");
         }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
       }
 
 #if CONFIG_VAR_REFS
@@ -4657,12 +4938,20 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       setup_frame_size_with_refs(cm, rb);
 #endif
 
+#if CONFIG_AMVR
+      if (cm->seq_mv_precision_level == 2) {
+        cm->cur_frame_mv_precision_level = aom_rb_read_bit(rb) ? 0 : 1;
+      } else {
+        cm->cur_frame_mv_precision_level = cm->seq_mv_precision_level;
+      }
+#endif
       cm->allow_high_precision_mv = aom_rb_read_bit(rb);
       cm->interp_filter = read_frame_interp_filter(rb);
 #if CONFIG_TEMPMV_SIGNALING
-      if (!cm->error_resilient_mode) {
+      if (frame_might_use_prev_frame_mvs(cm))
         cm->use_prev_frame_mvs = aom_rb_read_bit(rb);
-      }
+      else
+        cm->use_prev_frame_mvs = 0;
 #endif
       for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
         RefBuffer *const ref_buf = &cm->frame_refs[i];
@@ -4679,14 +4968,45 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       }
     }
   }
+
+#if CONFIG_FRAME_MARKER
+  if (cm->show_frame == 0) {
+    cm->frame_offset = cm->current_video_frame + aom_rb_read_literal(rb, 4);
+  } else {
+    cm->frame_offset = cm->current_video_frame;
+  }
+  av1_setup_frame_buf_refs(cm);
+
+#if CONFIG_FRAME_SIGN_BIAS
+#if CONFIG_OBU
+  if (cm->frame_type != S_FRAME)
+#endif  // CONFIG_OBU
+    av1_setup_frame_sign_bias(cm);
+#define FRAME_SIGN_BIAS_DEBUG 0
+#if FRAME_SIGN_BIAS_DEBUG
+  {
+    printf("\n\nDECODER: Frame=%d, show_frame=%d:", cm->current_video_frame,
+           cm->show_frame);
+    MV_REFERENCE_FRAME ref_frame;
+    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+      printf(" sign_bias[%d]=%d", ref_frame,
+             cm->ref_frame_sign_bias[ref_frame]);
+    }
+    printf("\n");
+  }
+#endif  // FRAME_SIGN_BIAS_DEBUG
+#undef FRAME_SIGN_BIAS_DEBUG
+#endif  // CONFIG_FRAME_SIGN_BIAS
+#endif  // CONFIG_FRAME_MARKER
+
 #if CONFIG_TEMPMV_SIGNALING
   cm->cur_frame->intra_only = cm->frame_type == KEY_FRAME || cm->intra_only;
 #endif
 
 #if CONFIG_REFERENCE_BUFFER
-  if (pbi->seq_params.frame_id_numbers_present_flag) {
+  if (cm->seq_params.frame_id_numbers_present_flag) {
     /* If bitmask is set, update reference frame id values and
-    mark frames as valid for reference */
+       mark frames as valid for reference */
     int refresh_frame_flags =
         cm->frame_type == KEY_FRAME ? 0xFF : pbi->refresh_frame_flags;
     for (i = 0; i < REF_FRAMES; i++) {
@@ -4696,7 +5016,7 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       }
     }
   }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 
   get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
   get_frame_new_buffer(cm)->color_space = cm->color_space;
@@ -4721,10 +5041,11 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
   } else {
     cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_FORWARD;
   }
-
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   // This flag will be overridden by the call to av1_setup_past_independence
   // below, forcing the use of context 0 for those frame types.
   cm->frame_context_idx = aom_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
+#endif
 
   // Generate next_ref_frame_map.
   lock_buffer_pool(pool);
@@ -4754,12 +5075,6 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
   if (frame_is_intra_only(cm) || cm->error_resilient_mode)
     av1_setup_past_independence(cm);
 
-#if CONFIG_EXT_PARTITION
-  set_sb_size(cm, aom_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
-#else
-  set_sb_size(cm, BLOCK_64X64);
-#endif  // CONFIG_EXT_PARTITION
-
   setup_loopfilter(cm, rb);
   setup_quantization(cm, rb);
   xd->bd = (int)cm->bit_depth;
@@ -4770,13 +5085,18 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
     for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
   } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    if (cm->frame_refs[0].idx <= 0) {
+      cm->frame_contexts[cm->frame_refs[0].idx] = *cm->fc;
+    }
+#else
     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
   }
 #endif  // CONFIG_Q_ADAPT_PROBS
 
   setup_segmentation(cm, rb);
 
-#if CONFIG_DELTA_Q
   {
     struct segmentation *const seg = &cm->seg;
     int segment_quantizer_active = 0;
@@ -4789,6 +5109,10 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
     cm->delta_q_res = 1;
 #if CONFIG_EXT_DELTA_Q
     cm->delta_lf_res = 1;
+    cm->delta_lf_present_flag = 0;
+#if CONFIG_LOOPFILTER_LEVEL
+    cm->delta_lf_multi = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
     if (segment_quantizer_active == 0 && cm->base_qindex > 0) {
       cm->delta_q_present_flag = aom_rb_read_bit(rb);
@@ -4804,10 +5128,17 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
       if (cm->delta_lf_present_flag) {
         xd->prev_delta_lf_from_base = 0;
         cm->delta_lf_res = 1 << aom_rb_read_literal(rb, 2);
+#if CONFIG_LOOPFILTER_LEVEL
+        cm->delta_lf_multi = aom_rb_read_bit(rb);
+        for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+          xd->prev_delta_lf[lf_id] = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
       }
 #endif  // CONFIG_EXT_DELTA_Q
     }
   }
+#if CONFIG_AMVR
+  xd->cur_frame_mv_precision_level = cm->cur_frame_mv_precision_level;
 #endif
 
   for (i = 0; i < MAX_SEGMENTS; ++i) {
@@ -4830,20 +5161,72 @@ static size_t read_uncompressed_header(AV1Decoder *pbi,
 #endif  // CONFIG_LOOP_RESTORATION
   cm->tx_mode = read_tx_mode(cm, rb);
   cm->reference_mode = read_frame_reference_mode(cm, rb);
-#if CONFIG_EXT_INTER
+  if (cm->reference_mode != SINGLE_REFERENCE) setup_compound_reference_mode(cm);
   read_compound_tools(cm, rb);
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_EXT_TX
   cm->reduced_tx_set_used = aom_rb_read_bit(rb);
 #endif  // CONFIG_EXT_TX
 
-  read_tile_info(pbi, rb);
-  sz = aom_rb_read_literal(rb, 16);
+#if CONFIG_ADAPT_SCAN
+  cm->use_adapt_scan = aom_rb_read_bit(rb);
+  // TODO(angiebird): call av1_init_scan_order only when use_adapt_scan
+  // switches from 1 to 0
+  if (cm->use_adapt_scan == 0) av1_init_scan_order(cm);
+#endif  // CONFIG_ADAPT_SCAN
+
+#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
+  // NOTE(zoeliu): As cm->prev_frame can take neither a frame of
+  //               show_exisiting_frame=1, nor can it take a frame not used as
+  //               a reference, it is probable that by the time it is being
+  //               referred to, the frame buffer it originally points to may
+  //               already get expired and have been reassigned to the current
+  //               newly coded frame. Hence, we need to check whether this is
+  //               the case, and if yes, we have 2 choices:
+  //               (1) Simply disable the use of previous frame mvs; or
+  //               (2) Have cm->prev_frame point to one reference frame buffer,
+  //                   e.g. LAST_FRAME.
+  if (!dec_is_ref_frame_buf(pbi, cm->prev_frame)) {
+    // Reassign the LAST_FRAME buffer to cm->prev_frame.
+    cm->prev_frame =
+        cm->frame_refs[LAST_FRAME - LAST_FRAME].idx != INVALID_IDX
+            ? &cm->buffer_pool
+                   ->frame_bufs[cm->frame_refs[LAST_FRAME - LAST_FRAME].idx]
+            : NULL;
+  }
+#endif  // CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
 
-  if (sz == 0)
+#if CONFIG_TEMPMV_SIGNALING
+  if (cm->use_prev_frame_mvs && !frame_can_use_prev_frame_mvs(cm)) {
     aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
-                       "Invalid header size");
+                       "Frame wrongly requests previous frame MVs");
+  }
+#else
+  cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame &&
+#if CONFIG_FRAME_SUPERRES
+                           cm->width == cm->last_width &&
+                           cm->height == cm->last_height &&
+#else
+                           cm->width == cm->prev_frame->buf.y_crop_width &&
+                           cm->height == cm->prev_frame->buf.y_crop_height &&
+#endif  // CONFIG_FRAME_SUPERRES
+                           !cm->last_intra_only && cm->last_show_frame &&
+                           (cm->last_frame_type != KEY_FRAME);
+#endif  // CONFIG_TEMPMV_SIGNALING
+
+#if CONFIG_GLOBAL_MOTION
+  if (!frame_is_intra_only(cm)) read_global_motion(cm, rb);
+#endif
+
+  read_tile_info(pbi, rb);
+  if (use_compressed_header(cm)) {
+    sz = aom_rb_read_literal(rb, 16);
+    if (sz == 0)
+      aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                         "Invalid header size");
+  } else {
+    sz = 0;
+  }
   return sz;
 }
 
@@ -4860,122 +5243,14 @@ static void read_supertx_probs(FRAME_CONTEXT *fc, aom_reader *r) {
 }
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_GLOBAL_MOTION
-static void read_global_motion_params(WarpedMotionParams *params,
-                                      WarpedMotionParams *ref_params,
-                                      aom_reader *r, int allow_hp) {
-  TransformationType type = aom_read_bit(r, ACCT_STR);
-  if (type != IDENTITY) type += aom_read_literal(r, GLOBAL_TYPE_BITS, ACCT_STR);
-  int trans_bits;
-  int trans_dec_factor;
-  int trans_prec_diff;
-  set_default_warp_params(params);
-  params->wmtype = type;
-  switch (type) {
-    case HOMOGRAPHY:
-    case HORTRAPEZOID:
-    case VERTRAPEZOID:
-      if (type != HORTRAPEZOID)
-        params->wmmat[6] =
-            aom_read_signed_primitive_refsubexpfin(
-                r, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
-                (ref_params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF), ACCT_STR) *
-            GM_ROW3HOMO_DECODE_FACTOR;
-      if (type != VERTRAPEZOID)
-        params->wmmat[7] =
-            aom_read_signed_primitive_refsubexpfin(
-                r, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
-                (ref_params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF), ACCT_STR) *
-            GM_ROW3HOMO_DECODE_FACTOR;
-    case AFFINE:
-    case ROTZOOM:
-      params->wmmat[2] = aom_read_signed_primitive_refsubexpfin(
-                             r, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-                             (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
-                                 (1 << GM_ALPHA_PREC_BITS),
-                             ACCT_STR) *
-                             GM_ALPHA_DECODE_FACTOR +
-                         (1 << WARPEDMODEL_PREC_BITS);
-      if (type != VERTRAPEZOID)
-        params->wmmat[3] =
-            aom_read_signed_primitive_refsubexpfin(
-                r, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-                (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF), ACCT_STR) *
-            GM_ALPHA_DECODE_FACTOR;
-      if (type >= AFFINE) {
-        if (type != HORTRAPEZOID)
-          params->wmmat[4] =
-              aom_read_signed_primitive_refsubexpfin(
-                  r, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-                  (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF), ACCT_STR) *
-              GM_ALPHA_DECODE_FACTOR;
-        params->wmmat[5] = aom_read_signed_primitive_refsubexpfin(
-                               r, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-                               (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
-                                   (1 << GM_ALPHA_PREC_BITS),
-                               ACCT_STR) *
-                               GM_ALPHA_DECODE_FACTOR +
-                           (1 << WARPEDMODEL_PREC_BITS);
-      } else {
-        params->wmmat[4] = -params->wmmat[3];
-        params->wmmat[5] = params->wmmat[2];
-      }
-    // fallthrough intended
-    case TRANSLATION:
-      trans_bits = (type == TRANSLATION) ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
-                                         : GM_ABS_TRANS_BITS;
-      trans_dec_factor = (type == TRANSLATION)
-                             ? GM_TRANS_ONLY_DECODE_FACTOR * (1 << !allow_hp)
-                             : GM_TRANS_DECODE_FACTOR;
-      trans_prec_diff = (type == TRANSLATION)
-                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
-                            : GM_TRANS_PREC_DIFF;
-      params->wmmat[0] =
-          aom_read_signed_primitive_refsubexpfin(
-              r, (1 << trans_bits) + 1, SUBEXPFIN_K,
-              (ref_params->wmmat[0] >> trans_prec_diff), ACCT_STR) *
-          trans_dec_factor;
-      params->wmmat[1] =
-          aom_read_signed_primitive_refsubexpfin(
-              r, (1 << trans_bits) + 1, SUBEXPFIN_K,
-              (ref_params->wmmat[1] >> trans_prec_diff), ACCT_STR) *
-          trans_dec_factor;
-    case IDENTITY: break;
-    default: assert(0);
-  }
-  if (params->wmtype <= AFFINE)
-    if (!get_shear_params(params)) assert(0);
-}
-
-static void read_global_motion(AV1_COMMON *cm, aom_reader *r) {
-  int frame;
-  YV12_BUFFER_CONFIG *ref_buf;
-  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
-    ref_buf = get_ref_frame(cm, frame);
-    if (cm->width == ref_buf->y_crop_width &&
-        cm->height == ref_buf->y_crop_height) {
-      read_global_motion_params(&cm->global_motion[frame],
-                                &cm->prev_frame->global_motion[frame], r,
-                                cm->allow_high_precision_mv);
-    } else {
-      set_default_warp_params(&cm->global_motion[frame]);
-    }
-    /*
-    printf("Dec Ref %d [%d/%d]: %d %d %d %d\n",
-           frame, cm->current_video_frame, cm->show_frame,
-           cm->global_motion[frame].wmmat[0],
-           cm->global_motion[frame].wmmat[1],
-           cm->global_motion[frame].wmmat[2],
-           cm->global_motion[frame].wmmat[3]);
-           */
-  }
-  memcpy(cm->cur_frame->global_motion, cm->global_motion,
-         TOTAL_REFS_PER_FRAME * sizeof(WarpedMotionParams));
-}
-#endif  // CONFIG_GLOBAL_MOTION
-
 static int read_compressed_header(AV1Decoder *pbi, const uint8_t *data,
                                   size_t partition_size) {
+#if CONFIG_RESTRICT_COMPRESSED_HDR
+  (void)pbi;
+  (void)data;
+  (void)partition_size;
+  return 0;
+#else
   AV1_COMMON *const cm = &pbi->common;
 #if CONFIG_SUPERTX
   MACROBLOCKD *const xd = &pbi->mb;
@@ -4994,46 +5269,30 @@ static int read_compressed_header(AV1Decoder *pbi, const uint8_t *data,
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate bool decoder 0");
 
-#if CONFIG_LOOP_RESTORATION
-  if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
-      cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
-      cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-    av1_alloc_restoration_buffers(cm);
-    decode_restoration(cm, &r);
-  }
-#endif
-
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   if (cm->tx_mode == TX_MODE_SELECT)
     av1_diff_update_prob(&r, &fc->quarter_tx_size_prob, ACCT_STR);
 #endif
 
-#if CONFIG_LV_MAP
-  av1_read_txb_probs(fc, cm->tx_mode, &r);
-#endif  // CONFIG_LV_MAP
+#if CONFIG_LV_MAP && !LV_MAP_PROB
+  av1_read_txb_probs(fc, cm->tx_mode, &r, &cm->counts);
+#endif  // CONFIG_LV_MAP && !LV_MAP_PROB
 
 #if !CONFIG_NEW_MULTISYMBOL
 #if CONFIG_VAR_TX
-  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
-    av1_diff_update_prob(&r, &fc->txfm_partition_prob[i], ACCT_STR);
+  if (cm->tx_mode == TX_MODE_SELECT)
+    for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+      av1_diff_update_prob(&r, &fc->txfm_partition_prob[i], ACCT_STR);
 #endif  // CONFIG_VAR_TX
   for (i = 0; i < SKIP_CONTEXTS; ++i)
     av1_diff_update_prob(&r, &fc->skip_probs[i], ACCT_STR);
 #endif
 
-  if (frame_is_intra_only(cm)) {
-    av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf);
-#if CONFIG_INTRABC
-    if (cm->allow_screen_content_tools) {
-      av1_diff_update_prob(&r, &fc->intrabc_prob, ACCT_STR);
-    }
-#endif
-  } else {
+  if (!frame_is_intra_only(cm)) {
 #if !CONFIG_NEW_MULTISYMBOL
     read_inter_mode_probs(fc, &r);
 #endif
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
     if (cm->reference_mode != COMPOUND_REFERENCE &&
         cm->allow_interintra_compound) {
@@ -5058,43 +5317,40 @@ static int read_compressed_header(AV1Decoder *pbi, const uint8_t *data,
 #endif  // CONFIG_WEDGE
     }
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
-
-#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
-    for (i = 0; i < ADAPT_OVERLAP_BLOCKS; ++i) {
-      for (int j = 0; j < MAX_NCOBMC_MODES - 1; ++j)
-        av1_diff_update_prob(&r, &fc->ncobmc_mode_prob[i][j], ACCT_STR);
-    }
-#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
 
 #if !CONFIG_NEW_MULTISYMBOL
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
       av1_diff_update_prob(&r, &fc->intra_inter_prob[i], ACCT_STR);
 #endif
 
-    if (cm->reference_mode != SINGLE_REFERENCE)
-      setup_compound_reference_mode(cm);
+#if !CONFIG_NEW_MULTISYMBOL
     read_frame_reference_mode_probs(cm, &r);
+#endif
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
       av1_diff_update_prob(&r, &fc->comp_inter_mode_prob[i], ACCT_STR);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if !CONFIG_NEW_MULTISYMBOL
-    for (i = 0; i < NMV_CONTEXTS; ++i)
-      read_mv_probs(&fc->nmvc[i], cm->allow_high_precision_mv, &r);
+#if CONFIG_AMVR
+    if (cm->cur_frame_mv_precision_level == 0) {
+#endif
+      for (i = 0; i < NMV_CONTEXTS; ++i)
+        read_mv_probs(&fc->nmvc[i], cm->allow_high_precision_mv, &r);
+#if CONFIG_AMVR
+    }
+#endif
 #endif
 #if CONFIG_SUPERTX
     if (!xd->lossless[0]) read_supertx_probs(fc, &r);
 #endif
-#if CONFIG_GLOBAL_MOTION
-    read_global_motion(cm, &r);
-#endif
   }
 
   return aom_reader_has_error(&r);
+#endif  // CONFIG_RESTRICT_COMPRESSED_HDR
 }
+
 #ifdef NDEBUG
 #define debug_check_frame_counts(cm) (void)0
 #else  // !NDEBUG
@@ -5105,22 +5361,10 @@ static void debug_check_frame_counts(const AV1_COMMON *const cm) {
   av1_zero(zero_counts);
   assert(cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD ||
          cm->error_resilient_mode);
-#if CONFIG_ENTROPY_STATS
-  assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
-                 sizeof(cm->counts.y_mode)));
-  assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
-                 sizeof(cm->counts.uv_mode)));
-#endif
   assert(!memcmp(cm->counts.partition, zero_counts.partition,
                  sizeof(cm->counts.partition)));
-  assert(!memcmp(cm->counts.coef, zero_counts.coef, sizeof(cm->counts.coef)));
-  assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
-                 sizeof(cm->counts.eob_branch)));
-  assert(!memcmp(cm->counts.blockz_count, zero_counts.blockz_count,
-                 sizeof(cm->counts.blockz_count)));
   assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
                  sizeof(cm->counts.switchable_interp)));
-#if CONFIG_EXT_INTER
   assert(!memcmp(cm->counts.inter_compound_mode,
                  zero_counts.inter_compound_mode,
                  sizeof(cm->counts.inter_compound_mode)));
@@ -5135,7 +5379,6 @@ static void debug_check_frame_counts(const AV1_COMMON *const cm) {
   assert(!memcmp(cm->counts.compound_interinter,
                  zero_counts.compound_interinter,
                  sizeof(cm->counts.compound_interinter)));
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   assert(!memcmp(cm->counts.motion_mode, zero_counts.motion_mode,
                  sizeof(cm->counts.motion_mode)));
@@ -5146,10 +5389,10 @@ static void debug_check_frame_counts(const AV1_COMMON *const cm) {
 #endif
   assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
                  sizeof(cm->counts.intra_inter)));
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   assert(!memcmp(cm->counts.comp_inter_mode, zero_counts.comp_inter_mode,
                  sizeof(cm->counts.comp_inter_mode)));
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
                  sizeof(cm->counts.comp_inter)));
 #if CONFIG_EXT_COMP_REFS
@@ -5173,10 +5416,6 @@ static void debug_check_frame_counts(const AV1_COMMON *const cm) {
       !memcmp(&cm->counts.mv[0], &zero_counts.mv[0], sizeof(cm->counts.mv[0])));
   assert(
       !memcmp(&cm->counts.mv[1], &zero_counts.mv[1], sizeof(cm->counts.mv[0])));
-  assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
-                 sizeof(cm->counts.inter_ext_tx)));
-  assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
-                 sizeof(cm->counts.intra_ext_tx)));
 }
 #endif  // NDEBUG
 
@@ -5200,12 +5439,6 @@ static struct aom_read_bit_buffer *init_read_bit_buffer(
 
 //------------------------------------------------------------------------------
 
-int av1_read_sync_code(struct aom_read_bit_buffer *const rb) {
-  return aom_rb_read_literal(rb, 8) == AV1_SYNC_CODE_0 &&
-         aom_rb_read_literal(rb, 8) == AV1_SYNC_CODE_1 &&
-         aom_rb_read_literal(rb, 8) == AV1_SYNC_CODE_2;
-}
-
 void av1_read_frame_size(struct aom_read_bit_buffer *rb, int *width,
                          int *height) {
   *width = aom_rb_read_literal(rb, 16) + 1;
@@ -5239,12 +5472,34 @@ void superres_post_decode(AV1Decoder *pbi) {
 }
 #endif  // CONFIG_FRAME_SUPERRES
 
-void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
-                      const uint8_t *data_end, const uint8_t **p_data_end) {
+static void dec_setup_frame_boundary_info(AV1_COMMON *const cm) {
+// Note: When LOOPFILTERING_ACROSS_TILES is enabled, we need to clear the
+// boundary information every frame, since the tile boundaries may
+// change every frame (particularly when dependent-horztiles is also
+// enabled); when it is disabled, the only information stored is the frame
+// boundaries, which only depend on the frame size.
+#if !CONFIG_LOOPFILTERING_ACROSS_TILES
+  if (cm->width != cm->last_width || cm->height != cm->last_height)
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+  {
+    int row, col;
+    for (row = 0; row < cm->mi_rows; ++row) {
+      MODE_INFO *mi = cm->mi + row * cm->mi_stride;
+      for (col = 0; col < cm->mi_cols; ++col) {
+        mi->mbmi.boundary_info = 0;
+        mi++;
+      }
+    }
+    av1_setup_frame_boundary_info(cm);
+  }
+}
+
+size_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi, const uint8_t *data,
+                                          const uint8_t *data_end,
+                                          const uint8_t **p_data_end) {
   AV1_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
   struct aom_read_bit_buffer rb;
-  int context_updated = 0;
   uint8_t clear_data[MAX_AV1_HEADER_SIZE];
   size_t first_partition_size;
   YV12_BUFFER_CONFIG *new_fb;
@@ -5259,6 +5514,15 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
   bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame);
 #endif
 
+#if CONFIG_GLOBAL_MOTION
+  int i;
+  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+    cm->global_motion[i] = default_warp_params;
+    cm->cur_frame->global_motion[i] = default_warp_params;
+  }
+  xd->global_motion = cm->global_motion;
+#endif  // CONFIG_GLOBAL_MOTION
+
   first_partition_size = read_uncompressed_header(
       pbi, init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
 
@@ -5288,25 +5552,18 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
       xd->cur_buf->y_crop_width, xd->cur_buf->y_crop_height);
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_INTRABC
-#if CONFIG_GLOBAL_MOTION
-  int i;
-  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-    set_default_warp_params(&cm->global_motion[i]);
-    set_default_warp_params(&cm->cur_frame->global_motion[i]);
-  }
-  xd->global_motion = cm->global_motion;
-#endif  // CONFIG_GLOBAL_MOTION
 
-  if (!first_partition_size) {
+  if (cm->show_existing_frame) {
     // showing a frame directly
     *p_data_end = data + aom_rb_bytes_read(&rb);
-    return;
+    return 0;
   }
 
   data += aom_rb_bytes_read(&rb);
-  if (!read_is_valid(data, first_partition_size, data_end))
-    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
-                       "Truncated packet or corrupt header length");
+  if (first_partition_size)
+    if (!read_is_valid(data, first_partition_size, data_end))
+      aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                         "Truncated packet or corrupt header length");
 
   cm->setup_mi(cm);
 
@@ -5330,15 +5587,9 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
 #endif  // CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
 
 #if CONFIG_TEMPMV_SIGNALING
-  if (cm->use_prev_frame_mvs) {
-    assert(!cm->error_resilient_mode && cm->prev_frame);
-#if CONFIG_FRAME_SUPERRES
-    assert(cm->width == cm->last_width && cm->height == cm->last_height);
-#else
-    assert(cm->width == last_fb_ref_buf->buf->y_crop_width &&
-           cm->height == last_fb_ref_buf->buf->y_crop_height);
-#endif  // CONFIG_FRAME_SUPERRES
-    assert(!cm->prev_frame->intra_only);
+  if (cm->use_prev_frame_mvs && !frame_can_use_prev_frame_mvs(cm)) {
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Frame wrongly requests previous frame MVs");
   }
 #else
   cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame &&
@@ -5353,10 +5604,24 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
                            (cm->last_frame_type != KEY_FRAME);
 #endif  // CONFIG_TEMPMV_SIGNALING
 
-  av1_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
+#if CONFIG_MFMV
+  av1_setup_motion_field(cm);
+#endif  // CONFIG_MFMV
 
+  av1_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  if (cm->error_resilient_mode || frame_is_intra_only(cm)) {
+    // use the default frame context values
+    *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
+    cm->pre_fc = &cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
+  } else {
+    *cm->fc = cm->frame_contexts[cm->frame_refs[0].idx];
+    cm->pre_fc = &cm->frame_contexts[cm->frame_refs[0].idx];
+  }
+#else
   *cm->fc = cm->frame_contexts[cm->frame_context_idx];
   cm->pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
   if (!cm->fc->initialized)
     aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                        "Uninitialized entropy context.");
@@ -5364,24 +5629,50 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
   av1_zero(cm->counts);
 
   xd->corrupted = 0;
-  new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
-  if (new_fb->corrupted)
-    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
-                       "Decode failed. Frame data header is corrupted.");
+  if (first_partition_size) {
+    new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
+    if (new_fb->corrupted)
+      aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                         "Decode failed. Frame data header is corrupted.");
+  }
+  return first_partition_size;
+}
 
+void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
+                                    const uint8_t *data_end,
+                                    const uint8_t **p_data_end, int startTile,
+                                    int endTile, int initialize_flag) {
+  AV1_COMMON *const cm = &pbi->common;
+  MACROBLOCKD *const xd = &pbi->mb;
+  int context_updated = 0;
+
+#if CONFIG_LOOP_RESTORATION
+  if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
+      cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+      cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+    av1_alloc_restoration_buffers(cm);
+  }
+#endif
+
+#if !CONFIG_LOOPFILTER_LEVEL
   if (cm->lf.filter_level && !cm->skip_loop_filter) {
-    av1_loop_filter_frame_init(cm, cm->lf.filter_level);
+    av1_loop_filter_frame_init(cm, cm->lf.filter_level, cm->lf.filter_level);
   }
+#endif
 
   // If encoded in frame parallel mode, frame context is ready after decoding
   // the frame header.
-  if (cm->frame_parallel_decode &&
+  if (cm->frame_parallel_decode && initialize_flag &&
       cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD) {
     AVxWorker *const worker = pbi->frame_worker_owner;
     FrameWorkerData *const frame_worker_data = worker->data1;
     if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD) {
       context_updated = 1;
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+      cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
+#else
       cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
     }
     av1_frameworker_lock_stats(worker);
     pbi->cur_buf->row = -1;
@@ -5392,7 +5683,7 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
     av1_frameworker_unlock_stats(worker);
   }
 
-  av1_setup_frame_boundary_info(cm);
+  dec_setup_frame_boundary_info(cm);
 
   if (pbi->max_threads > 1 && !CONFIG_CB4X4 &&
 #if CONFIG_EXT_TILE
@@ -5400,22 +5691,49 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
 #endif                          // CONFIG_EXT_TILE
       cm->tile_cols > 1) {
     // Multi-threaded tile decoder
-    *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
+    *p_data_end =
+        decode_tiles_mt(pbi, data + pbi->first_partition_size, data_end);
     if (!xd->corrupted) {
       if (!cm->skip_loop_filter) {
-        // If multiple threads are used to decode tiles, then we use those
-        // threads to do parallel loopfiltering.
-        av1_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
-                                 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+// If multiple threads are used to decode tiles, then we use those
+// threads to do parallel loopfiltering.
+#if CONFIG_LOOPFILTER_LEVEL
+        av1_loop_filter_frame_mt(
+            (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, pbi->mb.plane,
+            cm->lf.filter_level[0], cm->lf.filter_level[1], 0, 0,
+            pbi->tile_workers, pbi->num_tile_workers, &pbi->lf_row_sync);
+#else
+        av1_loop_filter_frame_mt((YV12_BUFFER_CONFIG *)xd->cur_buf, cm,
+                                 pbi->mb.plane, cm->lf.filter_level, 0, 0,
+                                 pbi->tile_workers, pbi->num_tile_workers,
                                  &pbi->lf_row_sync);
+#endif  // CONFIG_LOOPFILTER_LEVEL
       }
     } else {
       aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                          "Decode failed. Frame data is corrupted.");
     }
   } else {
-    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
+#if CONFIG_OBU
+    *p_data_end = decode_tiles(pbi, data, data_end, startTile, endTile);
+#else
+    *p_data_end = decode_tiles(
+        pbi, data + pbi->uncomp_hdr_size + pbi->first_partition_size, data_end,
+        startTile, endTile);
+#endif
+  }
+
+  if (endTile != cm->tile_rows * cm->tile_cols - 1) {
+    return;
+  }
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
+      cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+      cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+    av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm);
   }
+#endif
 
 #if CONFIG_CDEF
   if (!cm->skip_loop_filter && !cm->all_lossless) {
@@ -5431,7 +5749,9 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-    av1_loop_restoration_frame(new_fb, cm, cm->rst_info, 7, 0, NULL);
+    aom_extend_frame_borders((YV12_BUFFER_CONFIG *)xd->cur_buf);
+    av1_loop_restoration_frame((YV12_BUFFER_CONFIG *)xd->cur_buf, cm,
+                               cm->rst_info, 7, 0, NULL);
   }
 #endif  // CONFIG_LOOP_RESTORATION
 
@@ -5443,7 +5763,12 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
           aom_malloc(cm->tile_rows * cm->tile_cols *
                      sizeof(&pbi->tile_data[0].tctx.partition_cdf[0][0]));
       make_update_tile_list_dec(pbi, cm->tile_rows, cm->tile_cols, tile_ctxs);
+#if CONFIG_LV_MAP
       av1_adapt_coef_probs(cm);
+#endif  // CONFIG_LV_MAP
+#if CONFIG_SYMBOLRATE
+      av1_dump_symbol_rate(cm);
+#endif
       av1_adapt_intra_frame_probs(cm);
       av1_average_tile_coef_cdfs(pbi->common.fc, tile_ctxs, cdf_ptrs,
                                  cm->tile_rows * cm->tile_cols);
@@ -5459,7 +5784,9 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
 
       if (!frame_is_intra_only(cm)) {
         av1_adapt_inter_frame_probs(cm);
+#if !CONFIG_NEW_MULTISYMBOL
         av1_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+#endif
         av1_average_tile_inter_cdfs(&pbi->common, pbi->common.fc, tile_ctxs,
                                     cdf_ptrs, cm->tile_rows * cm->tile_cols);
         av1_average_tile_mv_cdfs(pbi->common.fc, tile_ctxs, cdf_ptrs,
@@ -5481,7 +5808,153 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
   }
 #endif
 
-  // Non frame parallel update frame context here.
+// Non frame parallel update frame context here.
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  if (!context_updated) cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
+#else
   if (!cm->error_resilient_mode && !context_updated)
     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif
+}
+
+#if CONFIG_OBU
+
+static OBU_TYPE read_obu_header(struct aom_read_bit_buffer *rb,
+                                uint32_t *header_size) {
+  OBU_TYPE obu_type;
+  int obu_extension_flag;
+
+  *header_size = 1;
+
+  obu_type = (OBU_TYPE)aom_rb_read_literal(rb, 5);
+  aom_rb_read_literal(rb, 2);  // reserved
+  obu_extension_flag = aom_rb_read_bit(rb);
+  if (obu_extension_flag) {
+    *header_size += 1;
+    aom_rb_read_literal(rb, 3);  // temporal_id
+    aom_rb_read_literal(rb, 2);
+    aom_rb_read_literal(rb, 2);
+    aom_rb_read_literal(rb, 1);  // reserved
+  }
+
+  return obu_type;
+}
+
+static uint32_t read_temporal_delimiter_obu() { return 0; }
+
+static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
+                                         struct aom_read_bit_buffer *rb) {
+  AV1_COMMON *const cm = &pbi->common;
+  SequenceHeader *const seq_params = &cm->seq_params;
+  uint32_t saved_bit_offset = rb->bit_offset;
+
+  cm->profile = av1_read_profile(rb);
+  aom_rb_read_literal(rb, 4);  // level
+
+  seq_params->frame_id_numbers_present_flag = aom_rb_read_bit(rb);
+  if (seq_params->frame_id_numbers_present_flag) {
+    seq_params->frame_id_length_minus7 = aom_rb_read_literal(rb, 4);
+    seq_params->delta_frame_id_length_minus2 = aom_rb_read_literal(rb, 4);
+  }
+
+  read_bitdepth_colorspace_sampling(cm, rb, pbi->allow_lowbitdepth);
+
+  return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
+}
+
+static uint32_t read_frame_header_obu(AV1Decoder *pbi, const uint8_t *data,
+                                      const uint8_t *data_end,
+                                      const uint8_t **p_data_end) {
+  size_t header_size;
+
+  header_size =
+      av1_decode_frame_headers_and_setup(pbi, data, data_end, p_data_end);
+  return (uint32_t)(pbi->uncomp_hdr_size + header_size);
+}
+
+static uint32_t read_tile_group_header(AV1Decoder *pbi,
+                                       struct aom_read_bit_buffer *rb,
+                                       int *startTile, int *endTile) {
+  AV1_COMMON *const cm = &pbi->common;
+  uint32_t saved_bit_offset = rb->bit_offset;
+
+  *startTile = aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
+  *endTile = aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols);
+
+  return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
+}
+
+static uint32_t read_one_tile_group_obu(AV1Decoder *pbi,
+                                        struct aom_read_bit_buffer *rb,
+                                        int is_first_tg, const uint8_t *data,
+                                        const uint8_t *data_end,
+                                        const uint8_t **p_data_end,
+                                        int *is_last_tg) {
+  AV1_COMMON *const cm = &pbi->common;
+  int startTile, endTile;
+  uint32_t header_size, tg_payload_size;
+
+  header_size = read_tile_group_header(pbi, rb, &startTile, &endTile);
+  data += header_size;
+  av1_decode_tg_tiles_and_wrapup(pbi, data, data_end, p_data_end, startTile,
+                                 endTile, is_first_tg);
+  tg_payload_size = (uint32_t)(*p_data_end - data);
+
+  // TODO(shan):  For now, assume all tile groups received in order
+  *is_last_tg = endTile == cm->tile_rows * cm->tile_cols - 1;
+
+  return header_size + tg_payload_size;
+}
+
+void av1_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
+                                const uint8_t *data_end,
+                                const uint8_t **p_data_end) {
+  AV1_COMMON *const cm = &pbi->common;
+  int frame_decoding_finished = 0;
+  int is_first_tg_obu_received = 1;
+  int frame_header_received = 0;
+  int frame_header_size = 0;
+
+  // decode frame as a series of OBUs
+  while (!frame_decoding_finished && !cm->error.error_code) {
+    struct aom_read_bit_buffer rb;
+    uint8_t clear_data[80];
+    uint32_t obu_size, obu_header_size, obu_payload_size = 0;
+    OBU_TYPE obu_type;
+
+    init_read_bit_buffer(pbi, &rb, data + 4, data_end, clear_data);
+
+    // every obu is preceded by 4-byte size of obu (obu header + payload size)
+    // The obu size is only needed for tile group OBUs
+    obu_size = mem_get_le32(data);
+    obu_type = read_obu_header(&rb, &obu_header_size);
+    data += (4 + obu_header_size);
+
+    switch (obu_type) {
+      case OBU_TD: obu_payload_size = read_temporal_delimiter_obu(); break;
+      case OBU_SEQUENCE_HEADER:
+        obu_payload_size = read_sequence_header_obu(pbi, &rb);
+        break;
+      case OBU_FRAME_HEADER:
+        // Only decode first frame header received
+        if (!frame_header_received) {
+          frame_header_size = obu_payload_size =
+              read_frame_header_obu(pbi, data, data_end, p_data_end);
+          frame_header_received = 1;
+        } else {
+          obu_payload_size = frame_header_size;
+        }
+        if (cm->show_existing_frame) frame_decoding_finished = 1;
+        break;
+      case OBU_TILE_GROUP:
+        obu_payload_size = read_one_tile_group_obu(
+            pbi, &rb, is_first_tg_obu_received, data, data + obu_size - 1,
+            p_data_end, &frame_decoding_finished);
+        is_first_tg_obu_received = 0;
+        break;
+      default: break;
+    }
+    data += obu_payload_size;
+  }
 }
+#endif
diff --git a/third_party/aom/av1/decoder/decodeframe.h b/third_party/aom/av1/decoder/decodeframe.h
index a904658b0..0e7eb6a1d 100644
--- a/third_party/aom/av1/decoder/decodeframe.h
+++ b/third_party/aom/av1/decoder/decodeframe.h
@@ -21,16 +21,33 @@ struct aom_read_bit_buffer;
 
 #if CONFIG_REFERENCE_BUFFER
 /* Placeholder for now */
-void read_sequence_header(SequenceHeader *seq_params);
+void read_sequence_header(SequenceHeader *seq_params,
+                          struct aom_read_bit_buffer *rb);
 #endif
 
-int av1_read_sync_code(struct aom_read_bit_buffer *const rb);
 void av1_read_frame_size(struct aom_read_bit_buffer *rb, int *width,
                          int *height);
 BITSTREAM_PROFILE av1_read_profile(struct aom_read_bit_buffer *rb);
 
+// This function is now obsolete
 void av1_decode_frame(struct AV1Decoder *pbi, const uint8_t *data,
                       const uint8_t *data_end, const uint8_t **p_data_end);
+size_t av1_decode_frame_headers_and_setup(struct AV1Decoder *pbi,
+                                          const uint8_t *data,
+                                          const uint8_t *data_end,
+                                          const uint8_t **p_data_end);
+
+void av1_decode_tg_tiles_and_wrapup(struct AV1Decoder *pbi, const uint8_t *data,
+                                    const uint8_t *data_end,
+                                    const uint8_t **p_data_end, int startTile,
+                                    int endTile, int initialize_flag);
+
+#if CONFIG_OBU
+// replaces av1_decode_frame
+void av1_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
+                                const uint8_t *data_end,
+                                const uint8_t **p_data_end);
+#endif
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/aom/av1/decoder/decodemv.c b/third_party/aom/av1/decoder/decodemv.c
index 7c8544283..cac27e9a6 100644
--- a/third_party/aom/av1/decoder/decodemv.c
+++ b/third_party/aom/av1/decoder/decodemv.c
@@ -36,11 +36,9 @@
 #define DEC_MISMATCH_DEBUG 0
 
 static PREDICTION_MODE read_intra_mode(aom_reader *r, aom_cdf_prob *cdf) {
-  return (PREDICTION_MODE)
-      av1_intra_mode_inv[aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR)];
+  return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR);
 }
 
-#if CONFIG_DELTA_Q
 static int read_delta_qindex(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
                              MB_MODE_INFO *const mbmi, int mi_col, int mi_row) {
   FRAME_COUNTS *counts = xd->counts;
@@ -63,7 +61,7 @@ static int read_delta_qindex(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
     }
 
     if (!smallval) {
-      rem_bits = aom_read_literal(r, 3, ACCT_STR);
+      rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
       thr = (1 << rem_bits) + 1;
       abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
     }
@@ -80,6 +78,9 @@ static int read_delta_qindex(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
 }
 #if CONFIG_EXT_DELTA_Q
 static int read_delta_lflevel(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
+#if CONFIG_LOOPFILTER_LEVEL
+                              int lf_id,
+#endif
                               MB_MODE_INFO *const mbmi, int mi_col,
                               int mi_row) {
   FRAME_COUNTS *counts = xd->counts;
@@ -93,16 +94,37 @@ static int read_delta_lflevel(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   (void)cm;
 
-  if ((bsize != BLOCK_64X64 || mbmi->skip == 0) && read_delta_lf_flag) {
+  if ((bsize != cm->sb_size || mbmi->skip == 0) && read_delta_lf_flag) {
+#if CONFIG_LOOPFILTER_LEVEL
+    if (cm->delta_lf_multi) {
+      assert(lf_id >= 0 && lf_id < FRAME_LF_COUNT);
+      abs = aom_read_symbol(r, ec_ctx->delta_lf_multi_cdf[lf_id],
+                            DELTA_LF_PROBS + 1, ACCT_STR);
+    } else {
+      abs = aom_read_symbol(r, ec_ctx->delta_lf_cdf, DELTA_LF_PROBS + 1,
+                            ACCT_STR);
+    }
+#else
     abs =
         aom_read_symbol(r, ec_ctx->delta_lf_cdf, DELTA_LF_PROBS + 1, ACCT_STR);
+#endif  // CONFIG_LOOPFILTER_LEVEL
     smallval = (abs < DELTA_LF_SMALL);
     if (counts) {
+#if CONFIG_LOOPFILTER_LEVEL
+      if (cm->delta_lf_multi) {
+        for (i = 0; i < abs; ++i) counts->delta_lf_multi[lf_id][i][1]++;
+        if (smallval) counts->delta_lf_multi[lf_id][abs][0]++;
+      } else {
+        for (i = 0; i < abs; ++i) counts->delta_lf[i][1]++;
+        if (smallval) counts->delta_lf[abs][0]++;
+      }
+#else
       for (i = 0; i < abs; ++i) counts->delta_lf[i][1]++;
       if (smallval) counts->delta_lf[abs][0]++;
+#endif  // CONFIG_LOOPFILTER_LEVEL
     }
     if (!smallval) {
-      rem_bits = aom_read_literal(r, 3, ACCT_STR);
+      rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
       thr = (1 << rem_bits) + 1;
       abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
     }
@@ -118,57 +140,41 @@ static int read_delta_lflevel(AV1_COMMON *cm, MACROBLOCKD *xd, aom_reader *r,
   return reduced_delta_lflevel;
 }
 #endif
-#endif
-
-static PREDICTION_MODE read_intra_mode_y(FRAME_CONTEXT *ec_ctx, MACROBLOCKD *xd,
-                                         aom_reader *r, int size_group) {
-  const PREDICTION_MODE y_mode =
-      read_intra_mode(r, ec_ctx->y_mode_cdf[size_group]);
-#if CONFIG_ENTROPY_STATS
-  FRAME_COUNTS *counts = xd->counts;
-  if (counts) ++counts->y_mode[size_group][y_mode];
-#else
-  /* TODO(negge): Can we remove this parameter? */
-  (void)xd;
-#endif  // CONFIG_ENTROPY_STATS
-  return y_mode;
-}
 
 static UV_PREDICTION_MODE read_intra_mode_uv(FRAME_CONTEXT *ec_ctx,
-                                             MACROBLOCKD *xd, aom_reader *r,
+                                             aom_reader *r,
                                              PREDICTION_MODE y_mode) {
   const UV_PREDICTION_MODE uv_mode =
-      read_intra_mode(r, ec_ctx->uv_mode_cdf[y_mode]);
-#if CONFIG_ENTROPY_STATS
-  FRAME_COUNTS *counts = xd->counts;
-  if (counts) ++counts->uv_mode[y_mode][uv_mode];
+#if CONFIG_CFL
+      aom_read_symbol(r, ec_ctx->uv_mode_cdf[y_mode], UV_INTRA_MODES, ACCT_STR);
 #else
-  /* TODO(negge): Can we remove this parameter? */
-  (void)xd;
-#endif  // CONFIG_ENTROPY_STATS
+      read_intra_mode(r, ec_ctx->uv_mode_cdf[y_mode]);
+#endif  // CONFIG_CFL
   return uv_mode;
 }
 
 #if CONFIG_CFL
 static int read_cfl_alphas(FRAME_CONTEXT *const ec_ctx, aom_reader *r,
-                           CFL_SIGN_TYPE signs_out[CFL_PRED_PLANES]) {
-  const int ind =
-      aom_read_symbol(r, ec_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE, "cfl:alpha");
-  // Signs are only coded for nonzero values
-  // sign == 0 implies negative alpha
-  // sign == 1 implies positive alpha
-  signs_out[CFL_PRED_U] = cfl_alpha_codes[ind][CFL_PRED_U]
-                              ? aom_read_bit(r, "cfl:sign")
-                              : CFL_SIGN_POS;
-  signs_out[CFL_PRED_V] = cfl_alpha_codes[ind][CFL_PRED_V]
-                              ? aom_read_bit(r, "cfl:sign")
-                              : CFL_SIGN_POS;
-
-  return ind;
+                           int *signs_out) {
+  const int joint_sign =
+      aom_read_symbol(r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, "cfl:signs");
+  int idx = 0;
+  // Magnitudes are only coded for nonzero values
+  if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    idx = aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, "cfl:alpha_u")
+          << CFL_ALPHABET_SIZE_LOG2;
+  }
+  if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    idx += aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, "cfl:alpha_v");
+  }
+  *signs_out = joint_sign;
+  return idx;
 }
 #endif
 
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
 static INTERINTRA_MODE read_interintra_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
                                             aom_reader *r, int size_group) {
   (void)cm;
@@ -179,7 +185,7 @@ static INTERINTRA_MODE read_interintra_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
   if (counts) ++counts->interintra_mode[size_group][ii_mode];
   return ii_mode;
 }
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 
 static PREDICTION_MODE read_inter_mode(FRAME_CONTEXT *ec_ctx, MACROBLOCKD *xd,
                                        aom_reader *r, int16_t ctx) {
@@ -244,16 +250,11 @@ static void read_drl_idx(FRAME_CONTEXT *ec_ctx, MACROBLOCKD *xd,
   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   mbmi->ref_mv_idx = 0;
 
-#if CONFIG_EXT_INTER
+  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV
 #if CONFIG_COMPOUND_SINGLEREF
-  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
-      mbmi->mode == SR_NEW_NEWMV) {
-#else   // !CONFIG_COMPOUND_SINGLEREF
-  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
+      || mbmi->mode == SR_NEW_NEWMV
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-  if (mbmi->mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
+      ) {
     int idx;
     for (idx = 0; idx < 2; ++idx) {
       if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
@@ -295,21 +296,11 @@ static void read_drl_idx(FRAME_CONTEXT *ec_ctx, MACROBLOCKD *xd,
 static MOTION_MODE read_motion_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
                                     MODE_INFO *mi, aom_reader *r) {
   MB_MODE_INFO *mbmi = &mi->mbmi;
-#if CONFIG_NEW_MULTISYMBOL
+#if !CONFIG_MOTION_VAR || !CONFIG_WARPED_MOTION || CONFIG_NEW_MULTISYMBOL || \
+    CONFIG_NCOBMC_ADAPT_WEIGHT
   (void)cm;
 #endif
 
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  const MOTION_MODE last_motion_mode_allowed =
-      motion_mode_allowed_wrapper(0,
-#if CONFIG_GLOBAL_MOTION
-                                  0, xd->global_motion,
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
-                                  xd,
-#endif
-                                  mi);
-#else
   const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
       0, xd->global_motion,
@@ -318,12 +309,24 @@ static MOTION_MODE read_motion_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
       xd,
 #endif
       mi);
-#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
   int motion_mode;
   FRAME_COUNTS *counts = xd->counts;
 
   if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return SIMPLE_TRANSLATION;
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  if (last_motion_mode_allowed == NCOBMC_ADAPT_WEIGHT) {
+    motion_mode = aom_read_symbol(r, xd->tile_ctx->ncobmc_cdf[mbmi->sb_type],
+                                  OBMC_FAMILY_MODES, ACCT_STR);
+    if (counts) ++counts->ncobmc[mbmi->sb_type][motion_mode];
+    return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
+  } else if (last_motion_mode_allowed == OBMC_CAUSAL) {
+    motion_mode =
+        aom_read_symbol(r, xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2, ACCT_STR);
+    if (counts) ++counts->obmc[mbmi->sb_type][motion_mode];
+    return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
+  } else {
+#else
   if (last_motion_mode_allowed == OBMC_CAUSAL) {
 #if CONFIG_NEW_MULTISYMBOL
     motion_mode =
@@ -334,6 +337,7 @@ static MOTION_MODE read_motion_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
     if (counts) ++counts->obmc[mbmi->sb_type][motion_mode];
     return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
   } else {
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
     motion_mode =
         aom_read_symbol(r, xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
@@ -347,18 +351,12 @@ static MOTION_MODE read_motion_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
 static void read_ncobmc_mode(MACROBLOCKD *xd, MODE_INFO *mi,
-#ifndef TRAINING_WEIGHTS
-                             NCOBMC_MODE ncobmc_mode[2],
-#else
-                             NCOBMC_MODE ncobmc_mode[][4],
-#endif
-                             aom_reader *r) {
+                             NCOBMC_MODE ncobmc_mode[2], aom_reader *r) {
   MB_MODE_INFO *mbmi = &mi->mbmi;
   FRAME_COUNTS *counts = xd->counts;
   ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[mbmi->sb_type];
   if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT) return;
 
-#ifndef TRAINING_WEIGHTS
   ncobmc_mode[0] = aom_read_symbol(r, xd->tile_ctx->ncobmc_mode_cdf[ao_block],
                                    MAX_NCOBMC_MODES, ACCT_STR);
   if (counts) ++counts->ncobmc_mode[ao_block][ncobmc_mode[0]];
@@ -368,27 +366,10 @@ static void read_ncobmc_mode(MACROBLOCKD *xd, MODE_INFO *mi,
                                      MAX_NCOBMC_MODES, ACCT_STR);
     if (counts) ++counts->ncobmc_mode[ao_block][ncobmc_mode[1]];
   }
-#else
-  int i;
-  for (i = 0; i < 4; ++i) {
-    ncobmc_mode[0][i] = aom_read_symbol(
-        r, xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES, ACCT_STR);
-    if (counts) ++counts->ncobmc_mode[ao_block][ncobmc_mode[0][i]];
-  }
-  if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
-    for (i = 0; i < 4; ++i) {
-      ncobmc_mode[1][i] =
-          aom_read_symbol(r, xd->tile_ctx->ncobmc_mode_cdf[ao_block],
-                          MAX_NCOBMC_MODES, ACCT_STR);
-      if (counts) ++counts->ncobmc_mode[ao_block][ncobmc_mode[1][i]];
-    }
-  }
-#endif
 }
-#endif
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
-#if CONFIG_EXT_INTER
 static PREDICTION_MODE read_inter_compound_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
                                                 aom_reader *r, int16_t ctx) {
   (void)cm;
@@ -418,7 +399,6 @@ static PREDICTION_MODE read_inter_singleref_comp_mode(MACROBLOCKD *xd,
   return SR_NEAREST_NEARMV + mode;
 }
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#endif  // CONFIG_EXT_INTER
 
 static int read_segment_id(aom_reader *r, struct segmentation_probs *segp) {
   return aom_read_symbol(r, segp->tree_cdf, MAX_SEGMENTS, ACCT_STR);
@@ -445,6 +425,7 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
   [MAX_MIB_SIZE] =
       (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+  assert(tx_size > TX_4X4);
 
   if (depth == MAX_VARTX_DEPTH) {
     int idx, idy;
@@ -454,7 +435,6 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
         inter_tx_size[idy][idx] = tx_size;
     mbmi->tx_size = tx_size;
     mbmi->min_tx_size = AOMMIN(mbmi->min_tx_size, get_min_tx_size(tx_size));
-    if (counts) ++counts->txfm_partition[ctx][0];
     txfm_partition_update(xd->above_txfm_context + blk_col,
                           xd->left_txfm_context + blk_row, tx_size, tx_size);
     return;
@@ -473,7 +453,7 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
 
     if (counts) ++counts->txfm_partition[ctx][1];
 
-    if (tx_size == TX_8X8) {
+    if (sub_txs == TX_4X4) {
       int idx, idy;
       inter_tx_size[0][0] = sub_txs;
       for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
@@ -509,7 +489,7 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
 #endif
 
 static TX_SIZE read_selected_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                     int tx_size_cat, aom_reader *r) {
+                                     int32_t tx_size_cat, aom_reader *r) {
   FRAME_COUNTS *counts = xd->counts;
   const int ctx = get_tx_size_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -530,11 +510,8 @@ static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter,
   const TX_MODE tx_mode = cm->tx_mode;
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
-#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
-  if (bsize > BLOCK_4X4) {
-#else
-  if (bsize >= BLOCK_8X8) {
-#endif  // CONFIG_CB4X4 && CONFIG_VAR_TX
+
+  if (block_signals_txsize(bsize)) {
     if ((!is_inter || allow_select_inter) && tx_mode == TX_MODE_SELECT) {
       const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
                                            : intra_tx_size_cat_lookup[bsize];
@@ -548,10 +525,14 @@ static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter,
           int quarter_tx;
 
           if (quarter_txsize_lookup[bsize] != max_txsize_lookup[bsize]) {
+#if CONFIG_NEW_MULTISYMBOL
+            quarter_tx =
+                aom_read_symbol(r, cm->fc->quarter_tx_size_cdf, 2, ACCT_STR);
+#else
             quarter_tx = aom_read(r, cm->fc->quarter_tx_size_prob, ACCT_STR);
             FRAME_COUNTS *counts = xd->counts;
-
             if (counts) ++counts->quarter_tx_size[quarter_tx];
+#endif
           } else {
             quarter_tx = 1;
           }
@@ -707,39 +688,55 @@ static int read_skip(AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
   }
 }
 
-#if CONFIG_PALETTE
 #if CONFIG_PALETTE_DELTA_ENCODING
-static int uint16_compare(const void *a, const void *b) {
-  const uint16_t va = *(const uint16_t *)a;
-  const uint16_t vb = *(const uint16_t *)b;
-  return va - vb;
+// Merge the sorted list of cached colors(cached_colors[0...n_cached_colors-1])
+// and the sorted list of transmitted colors(colors[n_cached_colors...n-1]) into
+// one single sorted list(colors[...]).
+static void merge_colors(uint16_t *colors, uint16_t *cached_colors,
+                         int n_colors, int n_cached_colors) {
+  if (n_cached_colors == 0) return;
+  int cache_idx = 0, trans_idx = n_cached_colors;
+  for (int i = 0; i < n_colors; ++i) {
+    if (cache_idx < n_cached_colors &&
+        (trans_idx >= n_colors ||
+         cached_colors[cache_idx] <= colors[trans_idx])) {
+      colors[i] = cached_colors[cache_idx++];
+    } else {
+      assert(trans_idx < n_colors);
+      colors[i] = colors[trans_idx++];
+    }
+  }
 }
 
 static void read_palette_colors_y(MACROBLOCKD *const xd, int bit_depth,
                                   PALETTE_MODE_INFO *const pmi, aom_reader *r) {
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const MODE_INFO *const above_mi = xd->above_mi;
-  const MODE_INFO *const left_mi = xd->left_mi;
-  const int n_cache = av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
+  uint16_t cached_colors[PALETTE_MAX_SIZE];
+  const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
   const int n = pmi->palette_size[0];
   int idx = 0;
   for (int i = 0; i < n_cache && idx < n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
   if (idx < n) {
+    const int n_cached_colors = idx;
     pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
     if (idx < n) {
       const int min_bits = bit_depth - 3;
       int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1;
       for (; idx < n; ++idx) {
+        assert(range >= 0);
         const int delta = aom_read_literal(r, bits, ACCT_STR) + 1;
-        pmi->palette_colors[idx] = pmi->palette_colors[idx - 1] + delta;
-        range -= delta;
+        pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
+                                         0, (1 << bit_depth) - 1);
+        range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
         bits = AOMMIN(bits, av1_ceil_log2(range));
       }
     }
+    merge_colors(pmi->palette_colors, cached_colors, n, n_cached_colors);
+  } else {
+    memcpy(pmi->palette_colors, cached_colors, n * sizeof(cached_colors[0]));
   }
-  qsort(pmi->palette_colors, n, sizeof(pmi->palette_colors[0]), uint16_compare);
 }
 
 static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth,
@@ -748,28 +745,34 @@ static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth,
   const int n = pmi->palette_size[1];
   // U channel colors.
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const MODE_INFO *const above_mi = xd->above_mi;
-  const MODE_INFO *const left_mi = xd->left_mi;
-  const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
-  int idx = PALETTE_MAX_SIZE;
-  for (int i = 0; i < n_cache && idx < PALETTE_MAX_SIZE + n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
-  if (idx < PALETTE_MAX_SIZE + n) {
+  uint16_t cached_colors[PALETTE_MAX_SIZE];
+  const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
+  int idx = 0;
+  for (int i = 0; i < n_cache && idx < n; ++i)
+    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
+  if (idx < n) {
+    const int n_cached_colors = idx;
+    idx += PALETTE_MAX_SIZE;
     pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
     if (idx < PALETTE_MAX_SIZE + n) {
       const int min_bits = bit_depth - 3;
       int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1];
       for (; idx < PALETTE_MAX_SIZE + n; ++idx) {
+        assert(range >= 0);
         const int delta = aom_read_literal(r, bits, ACCT_STR);
-        pmi->palette_colors[idx] = pmi->palette_colors[idx - 1] + delta;
-        range -= delta;
+        pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
+                                         0, (1 << bit_depth) - 1);
+        range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
         bits = AOMMIN(bits, av1_ceil_log2(range));
       }
     }
+    merge_colors(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors, n,
+                 n_cached_colors);
+  } else {
+    memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors,
+           n * sizeof(cached_colors[0]));
   }
-  qsort(pmi->palette_colors + PALETTE_MAX_SIZE, n,
-        sizeof(pmi->palette_colors[0]), uint16_compare);
 
   // V channel colors.
   if (aom_read_bit(r, ACCT_STR)) {  // Delta encoding.
@@ -804,6 +807,10 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
 
+  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_LARGEST);
+  const int block_palette_idx = bsize - BLOCK_8X8;
+  int modev;
+
   if (mbmi->mode == DC_PRED) {
     int palette_y_mode_ctx = 0;
     if (above_mi) {
@@ -814,12 +821,21 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       palette_y_mode_ctx +=
           (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
     }
-    if (aom_read(r, av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
-                                                   [palette_y_mode_ctx],
-                 ACCT_STR)) {
+#if CONFIG_NEW_MULTISYMBOL
+    modev = aom_read_symbol(
+        r,
+        xd->tile_ctx->palette_y_mode_cdf[block_palette_idx][palette_y_mode_ctx],
+        2, ACCT_STR);
+#else
+    modev = aom_read(
+        r,
+        av1_default_palette_y_mode_prob[block_palette_idx][palette_y_mode_ctx],
+        ACCT_STR);
+#endif
+    if (modev) {
       pmi->palette_size[0] =
           aom_read_symbol(r,
-                          xd->tile_ctx->palette_y_size_cdf[bsize - BLOCK_8X8],
+                          xd->tile_ctx->palette_y_size_cdf[block_palette_idx],
                           PALETTE_SIZES, ACCT_STR) +
           2;
 #if CONFIG_PALETTE_DELTA_ENCODING
@@ -830,14 +846,19 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
     }
   }
-
   if (mbmi->uv_mode == UV_DC_PRED) {
     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
-    if (aom_read(r, av1_default_palette_uv_mode_prob[palette_uv_mode_ctx],
-                 ACCT_STR)) {
+#if CONFIG_NEW_MULTISYMBOL
+    modev = aom_read_symbol(
+        r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, ACCT_STR);
+#else
+    modev = aom_read(r, av1_default_palette_uv_mode_prob[palette_uv_mode_ctx],
+                     ACCT_STR);
+#endif
+    if (modev) {
       pmi->palette_size[1] =
           aom_read_symbol(r,
-                          xd->tile_ctx->palette_uv_size_cdf[bsize - BLOCK_8X8],
+                          xd->tile_ctx->palette_uv_size_cdf[block_palette_idx],
                           PALETTE_SIZES, ACCT_STR) +
           2;
 #if CONFIG_PALETTE_DELTA_ENCODING
@@ -853,7 +874,6 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
     }
   }
 }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
 static void read_filter_intra_mode_info(AV1_COMMON *const cm,
@@ -865,11 +885,7 @@ static void read_filter_intra_mode_info(AV1_COMMON *const cm,
   FILTER_INTRA_MODE_INFO *filter_intra_mode_info =
       &mbmi->filter_intra_mode_info;
 
-  if (mbmi->mode == DC_PRED
-#if CONFIG_PALETTE
-      && mbmi->palette_mode_info.palette_size[0] == 0
-#endif  // CONFIG_PALETTE
-      ) {
+  if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
     filter_intra_mode_info->use_filter_intra_mode[0] =
         aom_read(r, cm->fc->filter_intra_probs[0], ACCT_STR);
     if (filter_intra_mode_info->use_filter_intra_mode[0]) {
@@ -892,11 +908,8 @@ static void read_filter_intra_mode_info(AV1_COMMON *const cm,
   (void)mi_col;
 #endif  // CONFIG_CB4X4
 
-  if (mbmi->uv_mode == UV_DC_PRED
-#if CONFIG_PALETTE
-      && mbmi->palette_mode_info.palette_size[1] == 0
-#endif  // CONFIG_PALETTE
-      ) {
+  if (mbmi->uv_mode == UV_DC_PRED &&
+      mbmi->palette_mode_info.palette_size[1] == 0) {
     filter_intra_mode_info->use_filter_intra_mode[1] =
         aom_read(r, cm->fc->filter_intra_probs[1], ACCT_STR);
     if (filter_intra_mode_info->use_filter_intra_mode[1]) {
@@ -926,6 +939,9 @@ static void read_intra_angle_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 
   mbmi->angle_delta[0] = 0;
   mbmi->angle_delta[1] = 0;
+#if CONFIG_INTRA_INTERP
+  mbmi->intra_filter = INTRA_FILTER_LINEAR;
+#endif  // CONFIG_INTRA_INTERP
 
   if (!av1_use_angle_delta(bsize)) return;
 
@@ -939,8 +955,6 @@ static void read_intra_angle_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       mbmi->intra_filter = aom_read_symbol(r, ec_ctx->intra_filter_cdf[ctx],
                                            INTRA_FILTERS, ACCT_STR);
       if (counts) ++counts->intra_filter[ctx][mbmi->intra_filter];
-    } else {
-      mbmi->intra_filter = INTRA_FILTER_LINEAR;
     }
 #endif  // CONFIG_INTRA_INTERP
   }
@@ -980,6 +994,9 @@ void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
   (void)block;
   TX_TYPE *tx_type = &mbmi->txk_type[(blk_row << 4) + blk_col];
 #endif
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+#endif
 
   if (!FIXED_TX_TYPE) {
 #if CONFIG_EXT_TX
@@ -993,29 +1010,91 @@ void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
         !supertx_enabled &&
 #endif  // CONFIG_SUPERTX
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+      const TxSetType tx_set_type = get_ext_tx_set_type(
+          tx_size, mbmi->sb_type, inter_block, cm->reduced_tx_set_used);
       const int eset = get_ext_tx_set(tx_size, mbmi->sb_type, inter_block,
                                       cm->reduced_tx_set_used);
       // eset == 0 should correspond to a set with only DCT_DCT and
       // there is no need to read the tx_type
       assert(eset != 0);
-      FRAME_COUNTS *counts = xd->counts;
 
+#if !CONFIG_LGT_FROM_PRED
       if (inter_block) {
-        *tx_type = av1_ext_tx_inter_inv[eset][aom_read_symbol(
+        *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
             r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
-            ext_tx_cnt_inter[eset], ACCT_STR)];
-        if (counts) ++counts->inter_ext_tx[eset][square_tx_size][*tx_type];
+            av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
       } else if (ALLOW_INTRA_EXT_TX) {
-        *tx_type = av1_ext_tx_intra_inv[eset][aom_read_symbol(
+        *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
             r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
-            ext_tx_cnt_intra[eset], ACCT_STR)];
-        if (counts)
-          ++counts->intra_ext_tx[eset][square_tx_size][mbmi->mode][*tx_type];
+            av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
       }
+#else
+      // only signal tx_type when lgt is not allowed or not selected
+      if (inter_block) {
+        if (LGT_FROM_PRED_INTER) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used) {
+            mbmi->use_lgt =
+                aom_read(r, ec_ctx->inter_lgt_prob[square_tx_size], ACCT_STR);
+#if CONFIG_ENTROPY_STATS
+            if (counts) ++counts->inter_lgt[square_tx_size][mbmi->use_lgt];
+#endif  // CONFIG_ENTROPY_STATS
+          }
+          if (!mbmi->use_lgt) {
+            *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
+                r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
+                av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
+            if (counts) ++counts->inter_ext_tx[eset][square_tx_size][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+          } else {
+            *tx_type = DCT_DCT;  // assign a dummy tx_type
+          }
+        } else {
+          *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
+              r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
+              av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
+          if (counts) ++counts->inter_ext_tx[eset][square_tx_size][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+        }
+      } else if (ALLOW_INTRA_EXT_TX) {
+        if (LGT_FROM_PRED_INTRA) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used) {
+            mbmi->use_lgt =
+                aom_read(r, ec_ctx->intra_lgt_prob[square_tx_size][mbmi->mode],
+                         ACCT_STR);
+#if CONFIG_ENTROPY_STATS
+            if (counts)
+              ++counts->intra_lgt[square_tx_size][mbmi->mode][mbmi->use_lgt];
+#endif  // CONFIG_ENTROPY_STATS
+          }
+          if (!mbmi->use_lgt) {
+            *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
+                r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
+                av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
+            if (counts)
+              ++counts
+                    ->intra_ext_tx[eset][square_tx_size][mbmi->mode][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+          } else {
+            *tx_type = DCT_DCT;  // assign a dummy tx_type
+          }
+        } else {
+          *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
+              r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
+              av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
+          if (counts)
+            ++counts->intra_ext_tx[eset][square_tx_size][mbmi->mode][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+        }
+      }
+#endif  // CONFIG_LGT_FROM_PRED
     } else {
       *tx_type = DCT_DCT;
     }
-#else
+#else  // CONFIG_EXT_TX
 
     if (tx_size < TX_32X32 &&
         ((!cm->seg.enabled && cm->base_qindex > 0) ||
@@ -1025,18 +1104,23 @@ void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
         !supertx_enabled &&
 #endif  // CONFIG_SUPERTX
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+#if CONFIG_ENTROPY_STATS
       FRAME_COUNTS *counts = xd->counts;
-
+#endif  // CONFIG_ENTROPY_STATS
       if (inter_block) {
         *tx_type = av1_ext_tx_inv[aom_read_symbol(
             r, ec_ctx->inter_ext_tx_cdf[tx_size], TX_TYPES, ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
         if (counts) ++counts->inter_ext_tx[tx_size][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
       } else {
         const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
         *tx_type = av1_ext_tx_inv[aom_read_symbol(
             r, ec_ctx->intra_ext_tx_cdf[tx_size][tx_type_nom], TX_TYPES,
             ACCT_STR)];
+#if CONFIG_ENTROPY_STATS
         if (counts) ++counts->intra_ext_tx[tx_size][tx_type_nom][*tx_type];
+#endif  // CONFIG_ENTROPY_STATS
       }
     } else {
       *tx_type = DCT_DCT;
@@ -1091,7 +1175,6 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
   mbmi->segment_id = read_intra_segment_id(cm, xd, mi_offset, x_mis, y_mis, r);
   mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
 
-#if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
     xd->current_qindex =
         xd->prev_qindex +
@@ -1101,40 +1184,52 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
     xd->prev_qindex = xd->current_qindex;
 #if CONFIG_EXT_DELTA_Q
     if (cm->delta_lf_present_flag) {
-      mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+#if CONFIG_LOOPFILTER_LEVEL
+      if (cm->delta_lf_multi) {
+        for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+          mbmi->curr_delta_lf[lf_id] = xd->curr_delta_lf[lf_id] =
+              xd->prev_delta_lf[lf_id] +
+              read_delta_lflevel(cm, xd, r, lf_id, mbmi, mi_col, mi_row) *
+                  cm->delta_lf_res;
+          xd->prev_delta_lf[lf_id] = xd->curr_delta_lf[lf_id];
+        }
+      } else {
+        mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+            xd->prev_delta_lf_from_base +
+            read_delta_lflevel(cm, xd, r, -1, mbmi, mi_col, mi_row) *
+                cm->delta_lf_res;
+        xd->prev_delta_lf_from_base = xd->current_delta_lf_from_base;
+      }
+#else
+      const int current_delta_lf_from_base =
           xd->prev_delta_lf_from_base +
           read_delta_lflevel(cm, xd, r, mbmi, mi_col, mi_row) *
               cm->delta_lf_res;
+      mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+          clamp(current_delta_lf_from_base, 0, MAX_LOOP_FILTER);
       xd->prev_delta_lf_from_base = xd->current_delta_lf_from_base;
+#endif  // CONFIG_LOOPFILTER_LEVEL
     }
 #endif
   }
-#endif
 
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE_FRAME;
 
 #if CONFIG_INTRABC
-  if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) {
-    mbmi->use_intrabc = aom_read(r, ec_ctx->intrabc_prob, ACCT_STR);
+  if (av1_allow_intrabc(bsize, cm)) {
+    mbmi->use_intrabc = aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_STR);
     if (mbmi->use_intrabc) {
       mbmi->tx_size = read_tx_size(cm, xd, 1, !mbmi->skip, r);
       mbmi->mode = mbmi->uv_mode = UV_DC_PRED;
-#if CONFIG_DUAL_FILTER
-      for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
-#else
-      mbmi->interp_filter = BILINEAR;
-#endif
+      mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
 
       int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
-      int_mv ref_mvs[MAX_MV_REF_CANDIDATES] = {};
+      int_mv ref_mvs[MAX_MV_REF_CANDIDATES];
 
       av1_find_mv_refs(cm, xd, mi, INTRA_FRAME, &xd->ref_mv_count[INTRA_FRAME],
-                       xd->ref_mv_stack[INTRA_FRAME],
-#if CONFIG_EXT_INTER
-                       NULL,
-#endif  // CONFIG_EXT_INTER
-                       ref_mvs, mi_row, mi_col, NULL, NULL, inter_mode_ctx);
+                       xd->ref_mv_stack[INTRA_FRAME], NULL, ref_mvs, mi_row,
+                       mi_col, NULL, NULL, inter_mode_ctx);
 
       int_mv nearestmv, nearmv;
       av1_find_best_ref_mvs(0, ref_mvs, &nearestmv, &nearmv);
@@ -1201,15 +1296,18 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
 #if CONFIG_CB4X4
   if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
                           xd->plane[1].subsampling_y)) {
-    mbmi->uv_mode = read_intra_mode_uv(ec_ctx, xd, r, mbmi->mode);
-#else
-  mbmi->uv_mode = read_intra_mode_uv(ec_ctx, xd, r, mbmi->mode);
-#endif
+#if CONFIG_CFL
+    xd->cfl->is_chroma_reference = 1;
+#endif  // CONFIG_CFL
+#endif  // CONFIG_CB4X4
+    mbmi->uv_mode = read_intra_mode_uv(ec_ctx, r, mbmi->mode);
 
 #if CONFIG_CFL
-    // TODO(ltrudeau) support PALETTE
-    if (mbmi->uv_mode == UV_DC_PRED) {
-      mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, mbmi->cfl_alpha_signs);
+    if (mbmi->uv_mode == UV_CFL_PRED) {
+      mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, &mbmi->cfl_alpha_signs);
+      xd->cfl->store_y = 1;
+    } else {
+      xd->cfl->store_y = 0;
     }
 #endif  // CONFIG_CFL
 
@@ -1217,18 +1315,20 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
   } else {
     // Avoid decoding angle_info if there is is no chroma prediction
     mbmi->uv_mode = UV_DC_PRED;
+#if CONFIG_CFL
+    xd->cfl->is_chroma_reference = 0;
+    xd->cfl->store_y = 1;
+#endif
   }
 #endif
 
 #if CONFIG_EXT_INTRA
   read_intra_angle_info(cm, xd, r);
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
-  if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+  if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
     read_palette_mode_info(cm, xd, r);
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
@@ -1246,9 +1346,9 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
 }
 
 static int read_mv_component(aom_reader *r, nmv_component *mvcomp,
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
                              int use_subpel,
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
                              int usehp) {
   int mag, d, fr, hp;
 #if CONFIG_NEW_MULTISYMBOL
@@ -1271,15 +1371,19 @@ static int read_mv_component(aom_reader *r, nmv_component *mvcomp,
   } else {
     int i;
     const int n = mv_class + CLASS0_BITS - 1;  // number of bits
-
     d = 0;
+#if CONFIG_NEW_MULTISYMBOL
+    for (i = 0; i < n; ++i)
+      d |= aom_read_symbol(r, mvcomp->bits_cdf[(i + 1) / 2], 2, ACCT_STR) << i;
+#else
     for (i = 0; i < n; ++i) d |= aom_read(r, mvcomp->bits[i], ACCT_STR) << i;
+#endif
     mag = CLASS0_SIZE << (mv_class + 2);
   }
 
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
   if (use_subpel) {
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
         // Fractional part
     fr = aom_read_symbol(r, class0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
                          MV_FP_SIZE, ACCT_STR);
@@ -1294,12 +1398,12 @@ static int read_mv_component(aom_reader *r, nmv_component *mvcomp,
   hp = usehp ? aom_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp, ACCT_STR)
              : 1;
 #endif
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
   } else {
     fr = 3;
     hp = 1;
   }
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
 
   // Result
   mag += ((d << 3) | (fr << 1) | hp) + 1;
@@ -1316,16 +1420,16 @@ static INLINE void read_mv(aom_reader *r, MV *mv, const MV *ref,
 
   if (mv_joint_vertical(joint_type))
     diff.row = read_mv_component(r, &ctx->comps[0],
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
                                  precision > MV_SUBPEL_NONE,
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
                                  precision > MV_SUBPEL_LOW_PRECISION);
 
   if (mv_joint_horizontal(joint_type))
     diff.col = read_mv_component(r, &ctx->comps[1],
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
                                  precision > MV_SUBPEL_NONE,
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
                                  precision > MV_SUBPEL_LOW_PRECISION);
 
   av1_inc_mv(&diff, counts, precision);
@@ -1337,9 +1441,7 @@ static INLINE void read_mv(aom_reader *r, MV *mv, const MV *ref,
 static REFERENCE_MODE read_block_reference_mode(AV1_COMMON *cm,
                                                 const MACROBLOCKD *xd,
                                                 aom_reader *r) {
-#if !SUB8X8_COMP_REF
-  if (xd->mi[0]->mbmi.sb_type == BLOCK_4X4) return SINGLE_REFERENCE;
-#endif
+  if (!is_comp_ref_allowed(xd->mi[0]->mbmi.sb_type)) return SINGLE_REFERENCE;
   if (cm->reference_mode == REFERENCE_MODE_SELECT) {
     const int ctx = av1_get_reference_mode_context(cm, xd);
 #if CONFIG_NEW_MULTISYMBOL
@@ -1360,29 +1462,41 @@ static REFERENCE_MODE read_block_reference_mode(AV1_COMMON *cm,
 #if CONFIG_NEW_MULTISYMBOL
 #define READ_REF_BIT(pname) \
   aom_read_symbol(r, av1_get_pred_cdf_##pname(cm, xd), 2, ACCT_STR)
+#define READ_REF_BIT2(pname) \
+  aom_read_symbol(r, av1_get_pred_cdf_##pname(xd), 2, ACCT_STR)
 #else
 #define READ_REF_BIT(pname) \
   aom_read(r, av1_get_pred_prob_##pname(cm, xd), ACCT_STR)
+#define READ_REF_BIT2(pname) \
+  aom_read(r, av1_get_pred_prob_##pname(cm, xd), ACCT_STR)
 #endif
 
 #if CONFIG_EXT_COMP_REFS
-static REFERENCE_MODE read_comp_reference_type(AV1_COMMON *cm,
-                                               const MACROBLOCKD *xd,
-                                               aom_reader *r) {
+static COMP_REFERENCE_TYPE read_comp_reference_type(AV1_COMMON *cm,
+                                                    const MACROBLOCKD *xd,
+                                                    aom_reader *r) {
   const int ctx = av1_get_comp_reference_type_context(xd);
 #if USE_UNI_COMP_REFS
   COMP_REFERENCE_TYPE comp_ref_type;
 #if CONFIG_VAR_REFS
-  if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
-    if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm))
+  if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm)) {
+    if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm)) {
 #endif  // CONFIG_VAR_REFS
-      comp_ref_type = (COMP_REFERENCE_TYPE)aom_read(
-          r, cm->fc->comp_ref_type_prob[ctx], ACCT_STR);
+#if CONFIG_NEW_MULTISYMBOL
+      (void)cm;
+      comp_ref_type = (COMP_REFERENCE_TYPE)aom_read_symbol(
+          r, xd->tile_ctx->comp_ref_type_cdf[ctx], 2, ACCT_STR);
+#else
+  comp_ref_type = (COMP_REFERENCE_TYPE)aom_read(
+      r, cm->fc->comp_ref_type_prob[ctx], ACCT_STR);
+#endif
 #if CONFIG_VAR_REFS
-    else
+    } else {
       comp_ref_type = BIDIR_COMP_REFERENCE;
-  else
+    }
+  } else {
     comp_ref_type = UNIDIR_COMP_REFERENCE;
+  }
 #endif  // CONFIG_VAR_REFS
 #else   // !USE_UNI_COMP_REFS
   // TODO(zoeliu): Temporarily turn off uni-directional comp refs
@@ -1398,9 +1512,6 @@ static REFERENCE_MODE read_comp_reference_type(AV1_COMMON *cm,
 static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                             aom_reader *r, int segment_id,
                             MV_REFERENCE_FRAME ref_frame[2]) {
-#if CONFIG_EXT_COMP_REFS
-  FRAME_CONTEXT *const fc = cm->fc;
-#endif
   FRAME_COUNTS *counts = xd->counts;
 
   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
@@ -1426,7 +1537,7 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 #if CONFIG_VAR_REFS
         if ((L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm)) && BWD_AND_ALT(cm))
 #endif  // CONFIG_VAR_REFS
-          bit = aom_read(r, fc->uni_comp_ref_prob[ctx][0], ACCT_STR);
+          bit = READ_REF_BIT2(uni_comp_ref_p);
 #if CONFIG_VAR_REFS
         else
           bit = BWD_AND_ALT(cm);
@@ -1442,7 +1553,7 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 #if CONFIG_VAR_REFS
           if (L_AND_L2(cm) && (L_AND_L3(cm) || L_AND_G(cm)))
 #endif  // CONFIG_VAR_REFS
-            bit1 = aom_read(r, fc->uni_comp_ref_prob[ctx1][1], ACCT_STR);
+            bit1 = READ_REF_BIT2(uni_comp_ref_p1);
 #if CONFIG_VAR_REFS
           else
             bit1 = L_AND_L3(cm) || L_AND_G(cm);
@@ -1455,7 +1566,7 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 #if CONFIG_VAR_REFS
             if (L_AND_L3(cm) && L_AND_G(cm))
 #endif  // CONFIG_VAR_REFS
-              bit2 = aom_read(r, fc->uni_comp_ref_prob[ctx2][2], ACCT_STR);
+              bit2 = READ_REF_BIT2(uni_comp_ref_p2);
 #if CONFIG_VAR_REFS
             else
               bit2 = L_AND_G(cm);
@@ -1482,15 +1593,15 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
 #endif  // CONFIG_EXT_COMP_REFS
 
 // Normative in decoder (for low delay)
-#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#if CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS
       const int idx = 1;
-#else  // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
+#else  // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS)
 #if CONFIG_EXT_REFS
       const int idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
 #else   // !CONFIG_EXT_REFS
       const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
 #endif  // CONFIG_EXT_REFS
-#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
+#endif  // CONFIG_ONE_SIDED_COMPOUND || CONFIG_FRAME_SIGN_BIAS)
 
       const int ctx = av1_get_pred_context_comp_ref_p(cm, xd);
 #if CONFIG_VAR_REFS
@@ -1541,12 +1652,8 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       const int ctx_bwd = av1_get_pred_context_comp_bwdref_p(cm, xd);
 #if CONFIG_VAR_REFS
       int bit_bwd;
-// Test need to explicitly code (BWD/ALT2) vs (ALT) branch node in tree
-#if CONFIG_ALTREF2
+      // Test need to explicitly code (BWD/ALT2) vs (ALT) branch node in tree
       const int bit_bwd_uncertain = BWD_OR_ALT2(cm) && ALTREF_IS_VALID(cm);
-#else   // !CONFIG_ALTREF2
-      const int bit_bwd_uncertain = BWD_AND_ALT(cm);
-#endif  // CONFIG_ALTREF2
       if (bit_bwd_uncertain)
         bit_bwd = READ_REF_BIT(comp_bwdref_p);
       else
@@ -1555,7 +1662,6 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       const int bit_bwd = READ_REF_BIT(comp_bwdref_p);
 #endif  // CONFIG_VAR_REFS
       if (counts) ++counts->comp_bwdref[ctx_bwd][0][bit_bwd];
-#if CONFIG_ALTREF2
       if (!bit_bwd) {
         const int ctx1_bwd = av1_get_pred_context_comp_bwdref_p1(cm, xd);
 #if CONFIG_VAR_REFS
@@ -1572,9 +1678,6 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       } else {
         ref_frame[idx] = cm->comp_bwd_ref[2];
       }
-#else   // !CONFIG_ALTREF2
-      ref_frame[idx] = cm->comp_bwd_ref[bit_bwd];
-#endif  // CONFIG_ALTREF2
 #else   // !CONFIG_EXT_REFS
       ref_frame[!idx] = cm->comp_var_ref[bit];
       ref_frame[idx] = cm->comp_fixed_ref;
@@ -1584,12 +1687,13 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
       const int ctx0 = av1_get_pred_context_single_ref_p1(xd);
 #if CONFIG_VAR_REFS
       int bit0;
-      // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT) branch node in
-      // tree
-      if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
+      // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT2,ALT) branch node
+      // in tree
+      if ((L_OR_L2(cm) || L3_OR_G(cm)) &&
+          (BWD_OR_ALT2(cm) || ALTREF_IS_VALID(cm)))
         bit0 = READ_REF_BIT(single_ref_p1);
       else
-        bit0 = BWD_OR_ALT(cm);
+        bit0 = (BWD_OR_ALT2(cm) || ALTREF_IS_VALID(cm));
 #else   // !CONFIG_VAR_REFS
       const int bit0 = READ_REF_BIT(single_ref_p1);
 #endif  // CONFIG_VAR_REFS
@@ -1599,12 +1703,8 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
         const int ctx1 = av1_get_pred_context_single_ref_p2(xd);
 #if CONFIG_VAR_REFS
         int bit1;
-// Test need to explicitly code (BWD/ALT2) vs (ALT) branch node in tree
-#if CONFIG_ALTREF2
+        // Test need to explicitly code (BWD/ALT2) vs (ALT) branch node in tree
         const int bit1_uncertain = BWD_OR_ALT2(cm) && ALTREF_IS_VALID(cm);
-#else   // !CONFIG_ALTREF2
-        const int bit1_uncertain = BWD_AND_ALT(cm);
-#endif  // CONFIG_ALTREF2
         if (bit1_uncertain)
           bit1 = READ_REF_BIT(single_ref_p2);
         else
@@ -1613,7 +1713,6 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
         const int bit1 = READ_REF_BIT(single_ref_p2);
 #endif  // CONFIG_VAR_REFS
         if (counts) ++counts->single_ref[ctx1][1][bit1];
-#if CONFIG_ALTREF2
         if (!bit1) {
           const int ctx5 = av1_get_pred_context_single_ref_p6(xd);
 #if CONFIG_VAR_REFS
@@ -1630,9 +1729,6 @@ static void read_ref_frames(AV1_COMMON *const cm, MACROBLOCKD *const xd,
         } else {
           ref_frame[0] = ALTREF_FRAME;
         }
-#else  // !CONFIG_ALTREF2
-        ref_frame[0] = bit1 ? ALTREF_FRAME : BWDREF_FRAME;
-#endif  // CONFIG_ALTREF2
       } else {
         const int ctx2 = av1_get_pred_context_single_ref_p3(xd);
 #if CONFIG_VAR_REFS
@@ -1710,46 +1806,33 @@ static INLINE void read_mb_interp_filter(AV1_COMMON *const cm,
     return;
   }
 
-#if CONFIG_DUAL_FILTER
   if (cm->interp_filter != SWITCHABLE) {
-    int dir;
-
-    for (dir = 0; dir < 4; ++dir) mbmi->interp_filter[dir] = cm->interp_filter;
+    mbmi->interp_filters = av1_broadcast_interp_filter(cm->interp_filter);
   } else {
-    int dir;
-
-    for (dir = 0; dir < 2; ++dir) {
-      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-      mbmi->interp_filter[dir] = EIGHTTAP_REGULAR;
-
+#if CONFIG_DUAL_FILTER
+    InterpFilter ref0_filter[2] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
+    for (int dir = 0; dir < 2; ++dir) {
       if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
           (mbmi->ref_frame[1] > INTRA_FRAME &&
            has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
-        mbmi->interp_filter[dir] =
-            (InterpFilter)av1_switchable_interp_inv[aom_read_symbol(
-                r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS,
-                ACCT_STR)];
-        if (counts) ++counts->switchable_interp[ctx][mbmi->interp_filter[dir]];
+        const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
+        ref0_filter[dir] =
+            (InterpFilter)aom_read_symbol(r, ec_ctx->switchable_interp_cdf[ctx],
+                                          SWITCHABLE_FILTERS, ACCT_STR);
+        if (counts) ++counts->switchable_interp[ctx][ref0_filter[dir]];
       }
     }
-    // The index system works as:
-    // (0, 1) -> (vertical, horizontal) filter types for the first ref frame.
-    // (2, 3) -> (vertical, horizontal) filter types for the second ref frame.
-    mbmi->interp_filter[2] = mbmi->interp_filter[0];
-    mbmi->interp_filter[3] = mbmi->interp_filter[1];
-  }
+    // The index system works as: (0, 1) -> (vertical, horizontal) filter types
+    mbmi->interp_filters =
+        av1_make_interp_filters(ref0_filter[0], ref0_filter[1]);
 #else   // CONFIG_DUAL_FILTER
-  if (cm->interp_filter != SWITCHABLE) {
-    mbmi->interp_filter = cm->interp_filter;
-  } else {
     const int ctx = av1_get_pred_context_switchable_interp(xd);
-    mbmi->interp_filter =
-        (InterpFilter)av1_switchable_interp_inv[aom_read_symbol(
-            r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS,
-            ACCT_STR)];
-    if (counts) ++counts->switchable_interp[ctx][mbmi->interp_filter];
-  }
+    InterpFilter filter = (InterpFilter)aom_read_symbol(
+        r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS, ACCT_STR);
+    mbmi->interp_filters = av1_broadcast_interp_filter(filter);
+    if (counts) ++counts->switchable_interp[ctx][filter];
 #endif  // CONFIG_DUAL_FILTER
+  }
 }
 
 static void read_intra_block_mode_info(AV1_COMMON *const cm, const int mi_row,
@@ -1766,62 +1849,74 @@ static void read_intra_block_mode_info(AV1_COMMON *const cm, const int mi_row,
 
 #if CONFIG_CB4X4
   (void)i;
-  mbmi->mode = read_intra_mode_y(ec_ctx, xd, r, size_group_lookup[bsize]);
+  mbmi->mode = read_intra_mode(r, ec_ctx->y_mode_cdf[size_group_lookup[bsize]]);
 #else
   switch (bsize) {
     case BLOCK_4X4:
       for (i = 0; i < 4; ++i)
-        mi->bmi[i].as_mode = read_intra_mode_y(ec_ctx, xd, r, 0);
+        mi->bmi[i].as_mode = read_intra_mode(r, ec_ctx->y_mode_cdf[0]);
       mbmi->mode = mi->bmi[3].as_mode;
       break;
     case BLOCK_4X8:
       mi->bmi[0].as_mode = mi->bmi[2].as_mode =
-          read_intra_mode_y(ec_ctx, xd, r, 0);
+          read_intra_mode(r, ec_ctx->y_mode_cdf[0]);
       mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode_y(ec_ctx, xd, r, 0);
+          read_intra_mode(r, ec_ctx->y_mode_cdf[0]);
       break;
     case BLOCK_8X4:
       mi->bmi[0].as_mode = mi->bmi[1].as_mode =
-          read_intra_mode_y(ec_ctx, xd, r, 0);
+          read_intra_mode(r, ec_ctx->y_mode_cdf[0]);
       mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
-          read_intra_mode_y(ec_ctx, xd, r, 0);
+          read_intra_mode(r, ec_ctx->y_mode_cdf[0]);
       break;
     default:
-      mbmi->mode = read_intra_mode_y(ec_ctx, xd, r, size_group_lookup[bsize]);
+      mbmi->mode =
+          read_intra_mode(r, ec_ctx->y_mode_cdf[size_group_lookup[bsize]]);
   }
 #endif
 
 #if CONFIG_CB4X4
   if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
                           xd->plane[1].subsampling_y)) {
-    mbmi->uv_mode = read_intra_mode_uv(ec_ctx, xd, r, mbmi->mode);
+    mbmi->uv_mode = read_intra_mode_uv(ec_ctx, r, mbmi->mode);
 #else
-  mbmi->uv_mode = read_intra_mode_uv(ec_ctx, xd, r, mbmi->mode);
+  mbmi->uv_mode = read_intra_mode_uv(ec_ctx, r, mbmi->mode);
   (void)mi_row;
   (void)mi_col;
 #endif
 
 #if CONFIG_CFL
-    // TODO(ltrudeau) support PALETTE
-    if (mbmi->uv_mode == UV_DC_PRED) {
+    if (mbmi->uv_mode == UV_CFL_PRED) {
       mbmi->cfl_alpha_idx =
-          read_cfl_alphas(xd->tile_ctx, r, mbmi->cfl_alpha_signs);
+          read_cfl_alphas(xd->tile_ctx, r, &mbmi->cfl_alpha_signs);
+      xd->cfl->store_y = 1;
+    } else {
+      xd->cfl->store_y = 0;
     }
 #endif  // CONFIG_CFL
 
 #if CONFIG_CB4X4
+  } else {
+    // Avoid decoding angle_info if there is is no chroma prediction
+    mbmi->uv_mode = UV_DC_PRED;
+#if CONFIG_CFL
+    xd->cfl->is_chroma_reference = 0;
+    xd->cfl->store_y = 1;
+#endif
   }
 #endif
 
+  // Explicitly ignore cm here to avoid a compile warning if none of
+  // ext-intra, palette and filter-intra are enabled.
+  (void)cm;
+
 #if CONFIG_EXT_INTRA
   read_intra_angle_info(cm, xd, r);
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
-  if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+  if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
     read_palette_mode_info(cm, xd, r);
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
@@ -1859,7 +1954,11 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
   (void)mi_row;
   (void)mi_col;
   (void)bsize;
-
+#if CONFIG_AMVR
+  if (cm->cur_frame_mv_precision_level) {
+    allow_hp = MV_SUBPEL_NONE;
+  }
+#endif
   switch (mode) {
     case NEWMV: {
       FRAME_COUNTS *counts = xd->counts;
@@ -1898,12 +1997,22 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_GLOBAL_MOTION
       mv[0].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame[0]],
                                           cm->allow_high_precision_mv, bsize,
-                                          mi_col, mi_row, block)
+                                          mi_col, mi_row, block
+#if CONFIG_AMVR
+                                          ,
+                                          cm->cur_frame_mv_precision_level
+#endif
+                                          )
                          .as_int;
       if (is_compound)
         mv[1].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame[1]],
                                             cm->allow_high_precision_mv, bsize,
-                                            mi_col, mi_row, block)
+                                            mi_col, mi_row, block
+#if CONFIG_AMVR
+                                            ,
+                                            cm->cur_frame_mv_precision_level
+#endif
+                                            )
                            .as_int;
 #else
       mv[0].as_int = 0;
@@ -1914,7 +2023,6 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
       if (is_compound) pred_mv[1].as_int = mv[1].as_int;
       break;
     }
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
     case SR_NEAREST_NEARMV: {
       assert(!is_compound);
@@ -2083,11 +2191,21 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_GLOBAL_MOTION
       mv[0].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame[0]],
                                           cm->allow_high_precision_mv, bsize,
-                                          mi_col, mi_row, block)
+                                          mi_col, mi_row, block
+#if CONFIG_AMVR
+                                          ,
+                                          cm->cur_frame_mv_precision_level
+#endif
+                                          )
                          .as_int;
       mv[1].as_int = gm_get_motion_vector(&cm->global_motion[ref_frame[1]],
                                           cm->allow_high_precision_mv, bsize,
-                                          mi_col, mi_row, block)
+                                          mi_col, mi_row, block
+#if CONFIG_AMVR
+                                          ,
+                                          cm->cur_frame_mv_precision_level
+#endif
+                                          )
                          .as_int;
 #else
       mv[0].as_int = 0;
@@ -2095,7 +2213,6 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
 #endif  // CONFIG_GLOBAL_MOTION
       break;
     }
-#endif  // CONFIG_EXT_INTER
     default: { return 0; }
   }
   return ret;
@@ -2120,7 +2237,7 @@ static int read_is_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd,
   }
 }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 static int read_is_inter_singleref_comp_mode(AV1_COMMON *const cm,
                                              MACROBLOCKD *const xd,
                                              int segment_id, aom_reader *r) {
@@ -2134,7 +2251,7 @@ static int read_is_inter_singleref_comp_mode(AV1_COMMON *const cm,
   if (counts) ++counts->comp_inter_mode[ctx][is_singleref_comp_mode];
   return is_singleref_comp_mode;
 }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 static void fpm_sync(void *const data, int mi_row) {
   AV1Decoder *const pbi = (AV1Decoder *)data;
@@ -2143,8 +2260,8 @@ static void fpm_sync(void *const data, int mi_row) {
 }
 
 #if DEC_MISMATCH_DEBUG
-static void dec_dump_logs(AV1_COMMON *cm, MODE_INFO *const mi,
-                          MACROBLOCKD *const xd, int mi_row, int mi_col,
+static void dec_dump_logs(AV1_COMMON *cm, MODE_INFO *const mi, int mi_row,
+                          int mi_col,
                           int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES],
                           int16_t mode_ctx) {
   int_mv mv[2] = { { 0 } };
@@ -2153,22 +2270,6 @@ static void dec_dump_logs(AV1_COMMON *cm, MODE_INFO *const mi,
   for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
     mv[ref].as_mv = mbmi->mv[ref].as_mv;
 
-  int interp_ctx[2] = { -1 };
-  int interp_filter[2] = { cm->interp_filter };
-  if (cm->interp_filter == SWITCHABLE) {
-    int dir;
-    for (dir = 0; dir < 2; ++dir) {
-      if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
-          (mbmi->ref_frame[1] > INTRA_FRAME &&
-           has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
-        interp_ctx[dir] = av1_get_pred_context_switchable_interp(xd, dir);
-        interp_filter[dir] = mbmi->interp_filter[dir];
-      } else {
-        interp_filter[dir] = EIGHTTAP_REGULAR;
-      }
-    }
-  }
-
   const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
   int16_t zeromv_ctx = -1;
   int16_t refmv_ctx = -1;
@@ -2185,20 +2286,18 @@ static void dec_dump_logs(AV1_COMMON *cm, MODE_INFO *const mi,
 
   int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
 #define FRAME_TO_CHECK 1
-  if (cm->current_video_frame == FRAME_TO_CHECK /*&& cm->show_frame == 0*/) {
+  if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
     printf(
         "=== DECODER ===: "
         "Frame=%d, (mi_row,mi_col)=(%d,%d), mode=%d, bsize=%d, "
         "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
         "ref[1]=%d, motion_mode=%d, inter_mode_ctx=%d, mode_ctx=%d, "
-        "interp_ctx=(%d,%d), interp_filter=(%d,%d), newmv_ctx=%d, "
-        "zeromv_ctx=%d, refmv_ctx=%d\n",
+        "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d\n",
         cm->current_video_frame, mi_row, mi_col, mbmi->mode, mbmi->sb_type,
         cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col, mv[1].as_mv.row,
         mv[1].as_mv.col, mbmi->ref_frame[0], mbmi->ref_frame[1],
-        mbmi->motion_mode, inter_mode_ctx[ref_frame_type], mode_ctx,
-        interp_ctx[0], interp_ctx[1], interp_filter[0], interp_filter[1],
-        newmv_ctx, zeromv_ctx, refmv_ctx);
+        mbmi->motion_mode, inter_mode_ctx[ref_frame_type], mode_ctx, newmv_ctx,
+        zeromv_ctx, refmv_ctx);
   }
 }
 #endif  // DEC_MISMATCH_DEBUG
@@ -2206,8 +2305,7 @@ static void dec_dump_logs(AV1_COMMON *cm, MODE_INFO *const mi,
 static void read_inter_block_mode_info(AV1Decoder *const pbi,
                                        MACROBLOCKD *const xd,
                                        MODE_INFO *const mi,
-#if (CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION || CONFIG_EXT_INTER) && \
-    CONFIG_SUPERTX
+#if CONFIG_SUPERTX
                                        int mi_row, int mi_col, aom_reader *r,
                                        int supertx_enabled) {
 #else
@@ -2221,13 +2319,11 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
   int_mv nearestmv[2], nearmv[2];
   int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
   int ref, is_compound;
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   int is_singleref_comp_mode = 0;
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
-#if CONFIG_EXT_INTER
   int16_t compound_inter_mode_ctx[MODE_CTX_REF_FRAMES];
-#endif  // CONFIG_EXT_INTER
   int16_t mode_ctx = 0;
 #if CONFIG_WARPED_MOTION
   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
@@ -2239,10 +2335,9 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 
   assert(NELEMENTS(mode_2_counter) == MB_MODE_COUNT);
 
-#if CONFIG_PALETTE
+  mbmi->uv_mode = UV_DC_PRED;
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
-#endif  // CONFIG_PALETTE
 
   memset(ref_mvs, 0, sizeof(ref_mvs));
 
@@ -2258,30 +2353,25 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #endif  // !USE_UNI_COMP_REFS
 #endif  // CONFIG_EXT_COMP_REFS
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!is_compound)
     is_singleref_comp_mode =
         read_is_inter_singleref_comp_mode(cm, xd, mbmi->segment_id, r);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
 
-    av1_find_mv_refs(
-        cm, xd, mi, frame, &xd->ref_mv_count[frame], xd->ref_mv_stack[frame],
-#if CONFIG_EXT_INTER
-        compound_inter_mode_ctx,
-#endif  // CONFIG_EXT_INTER
-        ref_mvs[frame], mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
+    av1_find_mv_refs(cm, xd, mi, frame, &xd->ref_mv_count[frame],
+                     xd->ref_mv_stack[frame], compound_inter_mode_ctx,
+                     ref_mvs[frame], mi_row, mi_col, fpm_sync, (void *)pbi,
+                     inter_mode_ctx);
   }
 
   if (is_compound) {
     MV_REFERENCE_FRAME ref_frame = av1_ref_frame_type(mbmi->ref_frame);
     av1_find_mv_refs(cm, xd, mi, ref_frame, &xd->ref_mv_count[ref_frame],
-                     xd->ref_mv_stack[ref_frame],
-#if CONFIG_EXT_INTER
-                     compound_inter_mode_ctx,
-#endif  // CONFIG_EXT_INTER
+                     xd->ref_mv_stack[ref_frame], compound_inter_mode_ctx,
                      ref_mvs[ref_frame], mi_row, mi_col, fpm_sync, (void *)pbi,
                      inter_mode_ctx);
 
@@ -2292,21 +2382,39 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #if CONFIG_GLOBAL_MOTION
       zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[rf[0]],
                                               cm->allow_high_precision_mv,
-                                              bsize, mi_col, mi_row, 0)
+                                              bsize, mi_col, mi_row, 0
+#if CONFIG_AMVR
+                                              ,
+                                              cm->cur_frame_mv_precision_level
+#endif
+                                              )
                              .as_int;
-      zeromv[1].as_int = (rf[1] != NONE_FRAME)
-                             ? gm_get_motion_vector(&cm->global_motion[rf[1]],
-                                                    cm->allow_high_precision_mv,
-                                                    bsize, mi_col, mi_row, 0)
-                                   .as_int
-                             : 0;
+      zeromv[1].as_int =
+          (rf[1] != NONE_FRAME)
+              ? gm_get_motion_vector(&cm->global_motion[rf[1]],
+                                     cm->allow_high_precision_mv, bsize, mi_col,
+                                     mi_row, 0
+#if CONFIG_AMVR
+                                     ,
+                                     cm->cur_frame_mv_precision_level
+#endif
+                                     )
+                    .as_int
+              : 0;
 #else
       zeromv[0].as_int = zeromv[1].as_int = 0;
 #endif
       for (ref = 0; ref < 2; ++ref) {
         if (rf[ref] == NONE_FRAME) continue;
+#if CONFIG_AMVR
+        lower_mv_precision(&ref_mvs[rf[ref]][0].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+        lower_mv_precision(&ref_mvs[rf[ref]][1].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+#else
         lower_mv_precision(&ref_mvs[rf[ref]][0].as_mv, allow_hp);
         lower_mv_precision(&ref_mvs[rf[ref]][1].as_mv, allow_hp);
+#endif
         if (ref_mvs[rf[ref]][0].as_int != zeromv[ref].as_int ||
             ref_mvs[rf[ref]][1].as_int != zeromv[ref].as_int)
           inter_mode_ctx[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
@@ -2314,7 +2422,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     }
   }
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   if (is_compound || is_singleref_comp_mode)
 #else   // !CONFIG_COMPOUND_SINGLEREF
@@ -2322,12 +2429,16 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #endif  // CONFIG_COMPOUND_SINGLEREF
     mode_ctx = compound_inter_mode_ctx[mbmi->ref_frame[0]];
   else
-#endif  // CONFIG_EXT_INTER
     mode_ctx =
         av1_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame, bsize, -1);
   mbmi->ref_mv_idx = 0;
 
+#if CONFIG_SEGMENT_ZEROMV
+  if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
+      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_ZEROMV)) {
+#else
   if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+#endif
     mbmi->mode = ZEROMV;
     if (bsize < BLOCK_8X8 && !unify_bsize) {
       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
@@ -2336,7 +2447,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     }
   } else {
     if (bsize >= BLOCK_8X8 || unify_bsize) {
-#if CONFIG_EXT_INTER
       if (is_compound)
         mbmi->mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
 #if CONFIG_COMPOUND_SINGLEREF
@@ -2344,60 +2454,53 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
         mbmi->mode = read_inter_singleref_comp_mode(xd, r, mode_ctx);
 #endif  // CONFIG_COMPOUND_SINGLEREF
       else
-#endif  // CONFIG_EXT_INTER
         mbmi->mode = read_inter_mode(ec_ctx, xd, r, mode_ctx);
-#if CONFIG_EXT_INTER
       if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
 #if CONFIG_COMPOUND_SINGLEREF
           mbmi->mode == SR_NEW_NEWMV ||
 #endif  // CONFIG_COMPOUND_SINGLEREF
           have_nearmv_in_inter_mode(mbmi->mode))
-#else  // !CONFIG_EXT_INTER
-      if (mbmi->mode == NEARMV || mbmi->mode == NEWMV)
-#endif  // CONFIG_EXT_INTER
         read_drl_idx(ec_ctx, xd, mbmi, r);
     }
   }
 
-#if CONFIG_EXT_INTER
-  if ((bsize < BLOCK_8X8 && unify_bsize) ||
+  if ((bsize < BLOCK_8X8 && !unify_bsize) ||
       (mbmi->mode != ZEROMV && mbmi->mode != ZERO_ZEROMV)) {
-#else
-  if ((bsize < BLOCK_8X8 && !unify_bsize) || mbmi->mode != ZEROMV) {
-#endif  // CONFIG_EXT_INTER
     for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_AMVR
+      av1_find_best_ref_mvs(allow_hp, ref_mvs[mbmi->ref_frame[ref]],
+                            &nearestmv[ref], &nearmv[ref],
+                            cm->cur_frame_mv_precision_level);
+#else
       av1_find_best_ref_mvs(allow_hp, ref_mvs[mbmi->ref_frame[ref]],
                             &nearestmv[ref], &nearmv[ref]);
+#endif
     }
   }
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   if ((is_compound || is_singleref_comp_mode) &&
-      (bsize >= BLOCK_8X8 || unify_bsize) && mbmi->mode != ZERO_ZEROMV) {
+      (bsize >= BLOCK_8X8 || unify_bsize) && mbmi->mode != ZERO_ZEROMV)
 #else   // !CONFIG_COMPOUND_SINGLEREF
   if (is_compound && (bsize >= BLOCK_8X8 || unify_bsize) &&
-      mbmi->mode != ZERO_ZEROMV) {
+      mbmi->mode != ZERO_ZEROMV)
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-  if (is_compound && (bsize >= BLOCK_8X8 || unify_bsize) &&
-      mbmi->mode != NEWMV && mbmi->mode != ZEROMV) {
-#endif  // CONFIG_EXT_INTER
+  {
     uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
 
-#if CONFIG_EXT_INTER
     if (xd->ref_mv_count[ref_frame_type] > 0) {
-#else
-    if (xd->ref_mv_count[ref_frame_type] == 1 && mbmi->mode == NEARESTMV) {
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_EXT_INTER
       if (mbmi->mode == NEAREST_NEARESTMV) {
-#endif  // CONFIG_EXT_INTER
         nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
         nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+#if CONFIG_AMVR
+        lower_mv_precision(&nearestmv[0].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+        lower_mv_precision(&nearestmv[1].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+#else
         lower_mv_precision(&nearestmv[0].as_mv, allow_hp);
         lower_mv_precision(&nearestmv[1].as_mv, allow_hp);
-#if CONFIG_EXT_INTER
+#endif
       } else if (mbmi->mode == NEAREST_NEWMV
 #if CONFIG_COMPOUND_SINGLEREF
                  || mbmi->mode == SR_NEAREST_NEARMV
@@ -2405,15 +2508,24 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #endif  // CONFIG_COMPOUND_SINGLEREF
                  ) {
         nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+
+#if CONFIG_AMVR
+        lower_mv_precision(&nearestmv[0].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+#else
         lower_mv_precision(&nearestmv[0].as_mv, allow_hp);
+#endif
       } else if (mbmi->mode == NEW_NEARESTMV) {
         nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+#if CONFIG_AMVR
+        lower_mv_precision(&nearestmv[1].as_mv, allow_hp,
+                           cm->cur_frame_mv_precision_level);
+#else
         lower_mv_precision(&nearestmv[1].as_mv, allow_hp);
+#endif
       }
-#endif  // CONFIG_EXT_INTER
     }
 
-#if CONFIG_EXT_INTER
     if (xd->ref_mv_count[ref_frame_type] > 1) {
       int ref_mv_idx = 1 + mbmi->ref_mv_idx;
 #if CONFIG_COMPOUND_SINGLEREF
@@ -2421,12 +2533,22 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #endif  // CONFIG_COMPOUND_SINGLEREF
         if (compound_ref0_mode(mbmi->mode) == NEARMV) {
           nearmv[0] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
-          lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+#if CONFIG_AMVR
+          lower_mv_precision(&nearmv[0].as_mv, allow_hp,
+                             cm->cur_frame_mv_precision_level);
+#else
+        lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+#endif
         }
 
         if (compound_ref1_mode(mbmi->mode) == NEARMV) {
           nearmv[1] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
-          lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+#if CONFIG_AMVR
+          lower_mv_precision(&nearmv[1].as_mv, allow_hp,
+                             cm->cur_frame_mv_precision_level);
+#else
+        lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+#endif
         }
 #if CONFIG_COMPOUND_SINGLEREF
       } else {
@@ -2439,15 +2561,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
       }
 #endif  // CONFIG_COMPOUND_SINGLEREF
     }
-#else   // !CONFIG_EXT_INTER
-    if (xd->ref_mv_count[ref_frame_type] > 1) {
-      int ref_mv_idx = 1 + mbmi->ref_mv_idx;
-      nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
-      nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
-      nearmv[0] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
-      nearmv[1] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
-    }
-#endif  // CONFIG_EXT_INTER
   } else if (mbmi->ref_mv_idx > 0 && mbmi->mode == NEARMV) {
     int_mv cur_mv =
         xd->ref_mv_stack[mbmi->ref_frame[0]][1 + mbmi->ref_mv_idx].this_mv;
@@ -2464,72 +2577,58 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     int idx, idy;
     PREDICTION_MODE b_mode;
     int_mv nearest_sub8x8[2], near_sub8x8[2];
-#if CONFIG_EXT_INTER
     int_mv ref_mv[2][2];
-#endif  // CONFIG_EXT_INTER
     for (idy = 0; idy < 2; idy += num_4x4_h) {
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         int_mv block[2];
         const int j = idy * 2 + idx;
         int_mv ref_mv_s8[2];
-#if CONFIG_EXT_INTER
         if (!is_compound)
-#endif  // CONFIG_EXT_INTER
           mode_ctx = av1_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame,
                                                bsize, j);
-#if CONFIG_EXT_INTER
         if (is_compound)
           b_mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
         else
-#endif  // CONFIG_EXT_INTER
           b_mode = read_inter_mode(ec_ctx, xd, r, mode_ctx);
 
-#if CONFIG_EXT_INTER
         if (b_mode != ZEROMV && b_mode != ZERO_ZEROMV) {
-#else
-        if (b_mode != ZEROMV) {
-#endif  // CONFIG_EXT_INTER
           CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
           uint8_t ref_mv_count[2];
-          for (ref = 0; ref < 1 + is_compound; ++ref)
-#if CONFIG_EXT_INTER
-          {
+          for (ref = 0; ref < 1 + is_compound; ++ref) {
             int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
             av1_update_mv_context(cm, xd, mi, mbmi->ref_frame[ref], mv_ref_list,
                                   j, mi_row, mi_col, NULL);
-#endif  // CONFIG_EXT_INTER
             av1_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col,
                                           ref_mv_stack[ref], &ref_mv_count[ref],
-#if CONFIG_EXT_INTER
-                                          mv_ref_list,
-#endif  // CONFIG_EXT_INTER
-                                          &nearest_sub8x8[ref],
+                                          mv_ref_list, &nearest_sub8x8[ref],
                                           &near_sub8x8[ref]);
-#if CONFIG_EXT_INTER
             if (have_newmv_in_inter_mode(b_mode)) {
               mv_ref_list[0].as_int = nearest_sub8x8[ref].as_int;
               mv_ref_list[1].as_int = near_sub8x8[ref].as_int;
+#if CONFIG_AMVR
+              av1_find_best_ref_mvs(allow_hp, mv_ref_list, &ref_mv[0][ref],
+                                    &ref_mv[1][ref],
+                                    cm->cur_frame_mv_precision_level);
+#else
               av1_find_best_ref_mvs(allow_hp, mv_ref_list, &ref_mv[0][ref],
                                     &ref_mv[1][ref]);
+#endif
             }
           }
-#endif  // CONFIG_EXT_INTER
         }
 
         for (ref = 0; ref < 1 + is_compound && b_mode != ZEROMV; ++ref) {
           ref_mv_s8[ref] = nearest_sub8x8[ref];
+#if CONFIG_AMVR
+          lower_mv_precision(&ref_mv_s8[ref].as_mv, allow_hp,
+                             cm->cur_frame_mv_precision_level);
+#else
           lower_mv_precision(&ref_mv_s8[ref].as_mv, allow_hp);
+#endif
         }
-#if CONFIG_EXT_INTER
         (void)ref_mv_s8;
-#endif
 
-        if (!assign_mv(cm, xd, b_mode, mbmi->ref_frame, j, block,
-#if CONFIG_EXT_INTER
-                       ref_mv[0],
-#else   // !CONFIG_EXT_INTER
-                       ref_mv_s8,
-#endif  // CONFIG_EXT_INTER
+        if (!assign_mv(cm, xd, b_mode, mbmi->ref_frame, j, block, ref_mv[0],
                        nearest_sub8x8, near_sub8x8, mi_row, mi_col, is_compound,
                        allow_hp, r)) {
           aom_merge_corrupted_flag(&xd->corrupted, 1);
@@ -2556,7 +2655,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     ref_mv[0] = nearestmv[0];
     ref_mv[1] = nearestmv[1];
 
-#if CONFIG_EXT_INTER
     if (is_compound) {
       int ref_mv_idx = mbmi->ref_mv_idx;
       // Special case: NEAR_NEWMV and NEW_NEARMV modes use
@@ -2604,7 +2702,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
       }
 #endif  // CONFIG_COMPOUND_SINGLEREF
     } else {
-#endif  // CONFIG_EXT_INTER
       if (mbmi->mode == NEWMV) {
         for (ref = 0; ref < 1 + is_compound; ++ref) {
           uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
@@ -2620,9 +2717,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
           nearestmv[ref] = ref_mv[ref];
         }
       }
-#if CONFIG_EXT_INTER
     }
-#endif  // CONFIG_EXT_INTER
 
     int mv_corrupted_flag =
         !assign_mv(cm, xd, mbmi->mode, mbmi->ref_frame, 0, mbmi->mv, ref_mv,
@@ -2630,7 +2725,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     aom_merge_corrupted_flag(&xd->corrupted, mv_corrupted_flag);
   }
 
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
   mbmi->use_wedge_interintra = 0;
   if (cm->reference_mode != COMPOUND_REFERENCE &&
 #if CONFIG_SUPERTX
@@ -2681,7 +2776,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
       }
     }
   }
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 
 #if CONFIG_WARPED_MOTION
   for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
@@ -2710,18 +2805,16 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #if CONFIG_SUPERTX
   if (!supertx_enabled) {
 #endif  // CONFIG_SUPERTX
-#if CONFIG_EXT_INTER
     if (mbmi->ref_frame[1] != INTRA_FRAME)
-#endif  // CONFIG_EXT_INTER
       mbmi->motion_mode = read_motion_mode(cm, xd, mi, r);
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
     read_ncobmc_mode(xd, mi, mbmi->ncobmc_mode, r);
 #endif
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (is_singleref_comp_mode) assert(mbmi->motion_mode == SIMPLE_TRANSLATION);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #if CONFIG_WARPED_MOTION
     if (mbmi->motion_mode == WARPED_CAUSAL) {
       mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
@@ -2744,7 +2837,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
 #endif  // CONFIG_SUPERTX
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
-#if CONFIG_EXT_INTER
   mbmi->interinter_compound_type = COMPOUND_AVERAGE;
   if (
 #if CONFIG_COMPOUND_SINGLEREF
@@ -2760,10 +2852,17 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     if (is_any_masked_compound_used(bsize)) {
 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
       if (cm->allow_masked_compound) {
-        mbmi->interinter_compound_type = aom_read_symbol(
-            r, ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES, ACCT_STR);
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+        if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
+          mbmi->interinter_compound_type =
+              aom_read_bit(r, ACCT_STR) ? COMPOUND_AVERAGE : COMPOUND_SEG;
+        else
+#endif  // CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          mbmi->interinter_compound_type = aom_read_symbol(
+              r, ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES, ACCT_STR);
 #if CONFIG_WEDGE
         if (mbmi->interinter_compound_type == COMPOUND_WEDGE) {
+          assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
           mbmi->wedge_index =
               aom_read_literal(r, get_wedge_bits_lookup(bsize), ACCT_STR);
           mbmi->wedge_sign = aom_read_bit(r, ACCT_STR);
@@ -2782,15 +2881,13 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
     if (xd->counts)
       xd->counts->compound_interinter[bsize][mbmi->interinter_compound_type]++;
   }
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
   read_mb_interp_filter(cm, xd, mbmi, r);
 #endif  // CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION
 
 #if DEC_MISMATCH_DEBUG
-  // NOTE(zoeliu): For debug
-  dec_dump_logs(cm, mi, xd, mi_row, mi_col, inter_mode_ctx, mode_ctx);
+  dec_dump_logs(cm, mi, mi_row, mi_col, inter_mode_ctx, mode_ctx);
 #endif  // DEC_MISMATCH_DEBUG
 }
 
@@ -2816,7 +2913,6 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
 #endif  // CONFIG_SUPERTX
     mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
 
-#if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
     xd->current_qindex =
         xd->prev_qindex +
@@ -2826,15 +2922,34 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
     xd->prev_qindex = xd->current_qindex;
 #if CONFIG_EXT_DELTA_Q
     if (cm->delta_lf_present_flag) {
-      mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+#if CONFIG_LOOPFILTER_LEVEL
+      if (cm->delta_lf_multi) {
+        for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+          mbmi->curr_delta_lf[lf_id] = xd->curr_delta_lf[lf_id] =
+              xd->prev_delta_lf[lf_id] +
+              read_delta_lflevel(cm, xd, r, lf_id, mbmi, mi_col, mi_row) *
+                  cm->delta_lf_res;
+          xd->prev_delta_lf[lf_id] = xd->curr_delta_lf[lf_id];
+        }
+      } else {
+        mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+            xd->prev_delta_lf_from_base +
+            read_delta_lflevel(cm, xd, r, -1, mbmi, mi_col, mi_row) *
+                cm->delta_lf_res;
+        xd->prev_delta_lf_from_base = xd->current_delta_lf_from_base;
+      }
+#else
+      const int current_delta_lf_from_base =
           xd->prev_delta_lf_from_base +
           read_delta_lflevel(cm, xd, r, mbmi, mi_col, mi_row) *
               cm->delta_lf_res;
+      mbmi->current_delta_lf_from_base = xd->current_delta_lf_from_base =
+          clamp(current_delta_lf_from_base, 0, MAX_LOOP_FILTER);
       xd->prev_delta_lf_from_base = xd->current_delta_lf_from_base;
+#endif  // CONFIG_LOOPFILTER_LEVEL
     }
 #endif
   }
-#endif
 
 #if CONFIG_SUPERTX
   if (!supertx_enabled) {
@@ -2853,27 +2968,34 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
 #else
         bsize >= BLOCK_8X8 &&
 #endif
-        !mbmi->skip && inter_block) {
+        !mbmi->skip && inter_block && !xd->lossless[mbmi->segment_id]) {
       const TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
       const int bh = tx_size_high_unit[max_tx_size];
       const int bw = tx_size_wide_unit[max_tx_size];
       const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
       const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
       int idx, idy;
+      int init_depth =
+          (height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
 
       mbmi->min_tx_size = TX_SIZES_ALL;
       for (idy = 0; idy < height; idy += bh)
         for (idx = 0; idx < width; idx += bw)
-          read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size,
-                             height != width, idy, idx, r);
+          read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, init_depth,
+                             idy, idx, r);
 #if CONFIG_RECT_TX_EXT
       if (is_quarter_tx_allowed(xd, mbmi, inter_block) &&
           mbmi->tx_size == max_tx_size) {
         int quarter_tx;
 
         if (quarter_txsize_lookup[bsize] != max_tx_size) {
+#if CONFIG_NEW_MULTISYMBOL
+          quarter_tx =
+              aom_read_symbol(r, cm->fc->quarter_tx_size_cdf, 2, ACCT_STR);
+#else
           quarter_tx = aom_read(r, cm->fc->quarter_tx_size_prob, ACCT_STR);
           if (xd->counts) ++xd->counts->quarter_tx_size[quarter_tx];
+#endif
         } else {
           quarter_tx = 1;
         }
@@ -2920,9 +3042,7 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
 
   if (inter_block)
     read_inter_block_mode_info(pbi, xd,
-#if (CONFIG_MOTION_VAR || CONFIG_EXT_INTER || CONFIG_WARPED_MOTION) && \
-    CONFIG_SUPERTX
-
+#if CONFIG_SUPERTX
                                mi, mi_row, mi_col, r, supertx_enabled);
 #else
                                mi, mi_row, mi_col, r);
@@ -2939,6 +3059,34 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
 #endif  // !CONFIG_TXK_SEL
 }
 
+static void av1_intra_copy_frame_mvs(AV1_COMMON *const cm, int mi_row,
+                                     int mi_col, int x_mis, int y_mis) {
+#if CONFIG_TMV
+  const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
+  MV_REF *frame_mvs = cm->cur_frame->mvs +
+                      ((mi_row & 0xfffe) >> 1) * frame_mvs_stride +
+                      ((mi_col & 0xfffe) >> 1);
+  x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
+  y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
+#else
+  const int frame_mvs_stride = cm->mi_cols;
+  MV_REF *frame_mvs = cm->cur_frame->mvs +
+                      (mi_row & 0xfffe) * frame_mvs_stride + (mi_col & 0xfffe);
+  x_mis = AOMMAX(x_mis, 2);
+  y_mis = AOMMAX(y_mis, 2);
+#endif  // CONFIG_TMV
+  int w, h;
+
+  for (h = 0; h < y_mis; h++) {
+    MV_REF *const frame_mv = frame_mvs + h * frame_mvs_stride;
+    for (w = 0; w < x_mis; w++) {
+      MV_REF *const mv = frame_mv + w;
+      mv->ref_frame[0] = NONE_FRAME;
+      mv->ref_frame[1] = NONE_FRAME;
+    }
+  }
+}
+
 void av1_read_mode_info(AV1Decoder *const pbi, MACROBLOCKD *xd,
 #if CONFIG_SUPERTX
                         int supertx_enabled,
@@ -2947,40 +3095,19 @@ void av1_read_mode_info(AV1Decoder *const pbi, MACROBLOCKD *xd,
                         int y_mis) {
   AV1_COMMON *const cm = &pbi->common;
   MODE_INFO *const mi = xd->mi[0];
-  MV_REF *frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
-  int w, h;
-
 #if CONFIG_INTRABC
   mi->mbmi.use_intrabc = 0;
 #endif  // CONFIG_INTRABC
 
   if (frame_is_intra_only(cm)) {
     read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
-    for (h = 0; h < y_mis; ++h) {
-      MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
-      for (w = 0; w < x_mis; ++w) {
-        MV_REF *const mv = frame_mv + w;
-        mv->ref_frame[0] = NONE_FRAME;
-        mv->ref_frame[1] = NONE_FRAME;
-      }
-    }
+    av1_intra_copy_frame_mvs(cm, mi_row, mi_col, x_mis, y_mis);
   } else {
     read_inter_frame_mode_info(pbi, xd,
 #if CONFIG_SUPERTX
                                supertx_enabled,
 #endif  // CONFIG_SUPERTX
                                mi_row, mi_col, r);
-    for (h = 0; h < y_mis; ++h) {
-      MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
-      for (w = 0; w < x_mis; ++w) {
-        MV_REF *const mv = frame_mv + w;
-        mv->ref_frame[0] = mi->mbmi.ref_frame[0];
-        mv->ref_frame[1] = mi->mbmi.ref_frame[1];
-        mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
-        mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
-        mv->pred_mv[0].as_int = mi->mbmi.pred_mv[0].as_int;
-        mv->pred_mv[1].as_int = mi->mbmi.pred_mv[1].as_int;
-      }
-    }
+    av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
   }
 }
diff --git a/third_party/aom/av1/decoder/decoder.c b/third_party/aom/av1/decoder/decoder.c
index 3998c20ee..cd82d5b53 100644
--- a/third_party/aom/av1/decoder/decoder.c
+++ b/third_party/aom/av1/decoder/decoder.c
@@ -33,7 +33,9 @@
 
 #include "av1/decoder/decodeframe.h"
 #include "av1/decoder/decoder.h"
-
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+#include "av1/common/ncobmc_kernels.h"
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #if !CONFIG_PVQ
 #include "av1/decoder/detokenize.h"
 #endif
@@ -46,23 +48,8 @@ static void initialize_dec(void) {
     aom_dsp_rtcd();
     aom_scale_rtcd();
     av1_init_intra_predictors();
-#if CONFIG_EXT_INTER
     av1_init_wedge_masks();
-#endif  // CONFIG_EXT_INTER
     init_done = 1;
-    av1_indices_from_tree(av1_switchable_interp_ind, av1_switchable_interp_inv,
-                          av1_switchable_interp_tree);
-#if CONFIG_EXT_TX
-    int s;
-    for (s = 1; s < EXT_TX_SETS_INTRA; ++s)
-      av1_indices_from_tree(av1_ext_tx_intra_ind[s], av1_ext_tx_intra_inv[s],
-                            av1_ext_tx_intra_tree[s]);
-    for (s = 1; s < EXT_TX_SETS_INTER; ++s)
-      av1_indices_from_tree(av1_ext_tx_inter_ind[s], av1_ext_tx_inter_inv[s],
-                            av1_ext_tx_inter_tree[s]);
-#else
-    av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, av1_ext_tx_tree);
-#endif
   }
 }
 
@@ -133,6 +120,10 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) {
 
   av1_loop_filter_init(cm);
 
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  get_default_ncobmc_kernels(cm);
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+
 #if CONFIG_AOM_QM
   aom_qm_init(cm);
 #endif
@@ -184,107 +175,36 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
          a->uv_height == b->uv_height && a->uv_width == b->uv_width;
 }
 
-aom_codec_err_t av1_copy_reference_dec(AV1Decoder *pbi,
-                                       AOM_REFFRAME ref_frame_flag,
+aom_codec_err_t av1_copy_reference_dec(AV1Decoder *pbi, int idx,
                                        YV12_BUFFER_CONFIG *sd) {
   AV1_COMMON *cm = &pbi->common;
 
-  /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
-   * encoder is using the frame buffers for. This is just a stub to keep the
-   * aomenc --test-decode functionality working, and will be replaced in a
-   * later commit that adds AV1-specific controls for this functionality.
-   */
-  if (ref_frame_flag == AOM_LAST_FLAG) {
-    const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0);
-    if (cfg == NULL) {
-      aom_internal_error(&cm->error, AOM_CODEC_ERROR,
-                         "No 'last' reference frame");
-      return AOM_CODEC_ERROR;
-    }
-    if (!equal_dimensions(cfg, sd))
-      aom_internal_error(&cm->error, AOM_CODEC_ERROR,
-                         "Incorrect buffer dimensions");
-    else
-      aom_yv12_copy_frame(cfg, sd);
-  } else {
-    aom_internal_error(&cm->error, AOM_CODEC_ERROR, "Invalid reference frame");
+  const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, idx);
+  if (cfg == NULL) {
+    aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
+    return AOM_CODEC_ERROR;
   }
+  if (!equal_dimensions(cfg, sd))
+    aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+                       "Incorrect buffer dimensions");
+  else
+    aom_yv12_copy_frame(cfg, sd);
 
   return cm->error.error_code;
 }
 
-aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm,
-                                      AOM_REFFRAME ref_frame_flag,
+aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
                                       YV12_BUFFER_CONFIG *sd) {
-  int idx;
   YV12_BUFFER_CONFIG *ref_buf = NULL;
 
-  // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
-  // encoder is using the frame buffers for. This is just a stub to keep the
-  // aomenc --test-decode functionality working, and will be replaced in a
-  // later commit that adds AV1-specific controls for this functionality.
-
-  // (Yunqing) The set_reference control depends on the following setting in
-  // encoder.
-  //   cpi->lst_fb_idx = 0;
-  // #if CONFIG_EXT_REFS
-  //   cpi->lst2_fb_idx = 1;
-  //   cpi->lst3_fb_idx = 2;
-  //   cpi->gld_fb_idx = 3;
-  //   cpi->bwd_fb_idx = 4;
-  // #if CONFIG_ALTREF2
-  //   cpi->alt2_fb_idx = 5;
-  //   cpi->alt_fb_idx = 6;
-  // #else  // !CONFIG_ALTREF2
-  //   cpi->alt_fb_idx = 5;
-  // #endif  // CONFIG_ALTREF2
-  // #else  // CONFIG_EXT_REFS
-  //   cpi->gld_fb_idx = 1;
-  //   cpi->alt_fb_idx = 2;
-  // #endif  // CONFIG_EXT_REFS
-
-  // TODO(zoeliu): To revisit following code and reconsider what assumption we
-  // may take on the reference frame buffer virtual indexes
-  if (ref_frame_flag == AOM_LAST_FLAG) {
-    idx = cm->ref_frame_map[0];
-#if CONFIG_EXT_REFS
-  } else if (ref_frame_flag == AOM_LAST2_FLAG) {
-    idx = cm->ref_frame_map[1];
-  } else if (ref_frame_flag == AOM_LAST3_FLAG) {
-    idx = cm->ref_frame_map[2];
-  } else if (ref_frame_flag == AOM_GOLD_FLAG) {
-    idx = cm->ref_frame_map[3];
-  } else if (ref_frame_flag == AOM_BWD_FLAG) {
-    idx = cm->ref_frame_map[4];
-#if CONFIG_ALTREF2
-  } else if (ref_frame_flag == AOM_ALT2_FLAG) {
-    idx = cm->ref_frame_map[5];
-  } else if (ref_frame_flag == AOM_ALT_FLAG) {
-    idx = cm->ref_frame_map[6];
-#else   // !CONFIG_ALTREF2
-  } else if (ref_frame_flag == AOM_ALT_FLAG) {
-    idx = cm->ref_frame_map[5];
-#endif  // CONFIG_ALTREF2
-#else   // !CONFIG_EXT_REFS
-  } else if (ref_frame_flag == AOM_GOLD_FLAG) {
-    idx = cm->ref_frame_map[1];
-  } else if (ref_frame_flag == AOM_ALT_FLAG) {
-    idx = cm->ref_frame_map[2];
-#endif  // CONFIG_EXT_REFS
-  } else {
-    aom_internal_error(&cm->error, AOM_CODEC_ERROR, "Invalid reference frame");
-    return cm->error.error_code;
-  }
+  // Get the destination reference buffer.
+  ref_buf = get_ref_frame(cm, idx);
 
-  if (idx < 0 || idx >= FRAME_BUFFERS) {
-    aom_internal_error(&cm->error, AOM_CODEC_ERROR,
-                       "Invalid reference frame map");
-    return cm->error.error_code;
+  if (ref_buf == NULL) {
+    aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
+    return AOM_CODEC_ERROR;
   }
 
-  // Get the destination reference buffer.
-  ref_buf = &cm->buffer_pool->frame_bufs[idx].buf;
-
   if (!equal_dimensions(ref_buf, sd)) {
     aom_internal_error(&cm->error, AOM_CODEC_ERROR,
                        "Incorrect buffer dimensions");
@@ -444,7 +364,16 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
   }
 
   cm->error.setjmp = 1;
-  av1_decode_frame(pbi, source, source + size, psource);
+
+#if !CONFIG_OBU
+  av1_decode_frame_headers_and_setup(pbi, source, source + size, psource);
+  if (!cm->show_existing_frame) {
+    av1_decode_tg_tiles_and_wrapup(pbi, source, source + size, psource, 0,
+                                   cm->tile_rows * cm->tile_cols - 1, 1);
+  }
+#else
+  av1_decode_frame_from_obus(pbi, source, source + size, psource);
+#endif
 
   swap_frame_buffers(pbi);
 
@@ -492,6 +421,8 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
   } else {
     cm->last_width = cm->width;
     cm->last_height = cm->height;
+    cm->last_tile_cols = cm->tile_cols;
+    cm->last_tile_rows = cm->tile_rows;
     if (cm->show_frame) {
       cm->current_video_frame++;
     }
diff --git a/third_party/aom/av1/decoder/decoder.h b/third_party/aom/av1/decoder/decoder.h
index 5e6afc2dc..20129b669 100644
--- a/third_party/aom/av1/decoder/decoder.h
+++ b/third_party/aom/av1/decoder/decoder.h
@@ -54,9 +54,10 @@ typedef struct TileData {
   CFL_CTX cfl;
 #endif
   DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
-#if CONFIG_PALETTE
   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  DECLARE_ALIGNED(16, uint8_t, mrc_mask[MAX_SB_SQUARE]);
+#endif  // CONFIG_MRC_TX
 } TileData;
 
 typedef struct TileWorkerData {
@@ -74,9 +75,10 @@ typedef struct TileWorkerData {
   CFL_CTX cfl;
 #endif
   FRAME_CONTEXT tctx;
-#if CONFIG_PALETTE
   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  DECLARE_ALIGNED(16, uint8_t, mrc_mask[MAX_SB_SQUARE]);
+#endif  // CONFIG_MRC_TX
   struct aom_internal_error_info error_info;
 } TileWorkerData;
 
@@ -138,9 +140,6 @@ typedef struct AV1Decoder {
   int tg_size;                  // Number of tiles in the current tilegroup
   int tg_start;                 // First tile in the current tilegroup
   int tg_size_bit_offset;
-#if CONFIG_REFERENCE_BUFFER
-  SequenceHeader seq_params;
-#endif
 #if CONFIG_INSPECTION
   aom_inspect_cb inspect_cb;
   void *inspect_ctx;
@@ -154,12 +153,10 @@ int av1_get_raw_frame(struct AV1Decoder *pbi, YV12_BUFFER_CONFIG *sd);
 
 int av1_get_frame_to_show(struct AV1Decoder *pbi, YV12_BUFFER_CONFIG *frame);
 
-aom_codec_err_t av1_copy_reference_dec(struct AV1Decoder *pbi,
-                                       AOM_REFFRAME ref_frame_flag,
+aom_codec_err_t av1_copy_reference_dec(struct AV1Decoder *pbi, int idx,
                                        YV12_BUFFER_CONFIG *sd);
 
-aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm,
-                                      AOM_REFFRAME ref_frame_flag,
+aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
                                       YV12_BUFFER_CONFIG *sd);
 
 static INLINE uint8_t read_marker(aom_decrypt_cb decrypt_cb,
@@ -213,7 +210,6 @@ static INLINE int dec_is_ref_frame_buf(AV1Decoder *const pbi,
 }
 #endif  // CONFIG_EXT_REFS
 
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
 #define ACCT_STR __func__
 static INLINE int av1_read_uniform(aom_reader *r, int n) {
   const int l = get_unsigned_bits(n);
@@ -225,7 +221,6 @@ static INLINE int av1_read_uniform(aom_reader *r, int n) {
   else
     return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
 }
-#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/aom/av1/decoder/decodetxb.c b/third_party/aom/av1/decoder/decodetxb.c
index 6e38427b3..13f944b35 100644
--- a/third_party/aom/av1/decoder/decodetxb.c
+++ b/third_party/aom/av1/decoder/decodetxb.c
@@ -15,16 +15,20 @@
 #include "av1/decoder/decodemv.h"
 #include "av1/decoder/decodetxb.h"
 #include "av1/decoder/dsubexp.h"
+#include "av1/decoder/symbolrate.h"
 
 #define ACCT_STR __func__
 
-static int read_golomb(MACROBLOCKD *xd, aom_reader *r) {
+static int read_golomb(MACROBLOCKD *xd, aom_reader *r, FRAME_COUNTS *counts) {
+#if !CONFIG_SYMBOLRATE
+  (void)counts;
+#endif
   int x = 1;
   int length = 0;
   int i = 0;
 
   while (!i) {
-    i = aom_read_bit(r, ACCT_STR);
+    i = av1_read_record_bit(counts, r, ACCT_STR);
     ++length;
     if (length >= 32) {
       aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
@@ -35,21 +39,247 @@ static int read_golomb(MACROBLOCKD *xd, aom_reader *r) {
 
   for (i = 0; i < length - 1; ++i) {
     x <<= 1;
-    x += aom_read_bit(r, ACCT_STR);
+    x += av1_read_record_bit(counts, r, ACCT_STR);
   }
 
   return x - 1;
 }
 
+static INLINE int read_nz_map(aom_reader *r, tran_low_t *tcoeffs, int plane,
+                              const int16_t *scan, TX_SIZE tx_size,
+                              TX_TYPE tx_type, FRAME_CONTEXT *fc,
+                              FRAME_COUNTS *counts) {
+  TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int height = tx_size_high[tx_size];
+#if CONFIG_CTX1D
+  const int width = tx_size_wide[tx_size];
+  const int eob_offset = width + height;
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int seg_eob =
+      (tx_class == TX_CLASS_2D) ? tx_size_2d[tx_size] : eob_offset;
+#else
+  const int seg_eob = tx_size_2d[tx_size];
+#endif
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
+      (counts) ? &counts->nz_map[txs_ctx][plane_type] : NULL;
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+  aom_prob *eob_flag = fc->eob_flag[txs_ctx][plane_type];
+#endif
+  int c;
+  for (c = 0; c < seg_eob; ++c) {
+    int is_nz;
+    int coeff_ctx = get_nz_map_ctx(tcoeffs, c, scan, bwl, height, tx_type);
+    int eob_ctx = get_eob_ctx(tcoeffs, scan[c], txs_ctx, tx_type);
+
+    if (c < seg_eob - 1) {
+#if LV_MAP_PROB
+      is_nz = av1_read_record_bin(
+          counts, r, fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2,
+          ACCT_STR);
+#else
+      is_nz = aom_read(r, nz_map[coeff_ctx], ACCT_STR);
+#endif
+    } else {
+      is_nz = 1;
+    }
+
+    // set non-zero coefficient map.
+    tcoeffs[scan[c]] = is_nz;
+
+    if (c == seg_eob - 1) {
+      ++c;
+      break;
+    }
+
+    if (counts) ++(*nz_map_count)[coeff_ctx][is_nz];
+
+    if (is_nz) {
+#if LV_MAP_PROB
+      int is_eob = av1_read_record_bin(
+          counts, r, fc->eob_flag_cdf[txs_ctx][plane_type][eob_ctx], 2,
+          ACCT_STR);
+#else
+      int is_eob = aom_read(r, eob_flag[eob_ctx], ACCT_STR);
+#endif
+      if (counts) ++counts->eob_flag[txs_ctx][plane_type][eob_ctx][is_eob];
+      if (is_eob) break;
+    }
+  }
+  return AOMMIN(seg_eob, c + 1);
+}
+
+#if CONFIG_CTX1D
+static INLINE int read_nz_map_vert(aom_reader *r, tran_low_t *tcoeffs,
+                                   int plane, const int16_t *scan,
+                                   const int16_t *iscan, TX_SIZE tx_size,
+                                   TX_TYPE tx_type, FRAME_CONTEXT *fc,
+                                   FRAME_COUNTS *counts) {
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  int eob = 0;
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+#endif
+  for (int col = 0; col < width; ++col) {
+    int el_ctx = get_empty_line_ctx(col, eob_ls);
+#if LV_MAP_PROB
+    int empty_line = av1_read_record_bin(
+        counts, r, fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx], 2,
+        ACCT_STR);
+#else
+    int empty_line = aom_read(
+        r, fc->empty_line[txs_ctx][plane_type][tx_class][el_ctx], ACCT_STR);
+#endif
+    if (counts)
+      ++counts->empty_line[txs_ctx][plane_type][tx_class][el_ctx][empty_line];
+    if (!empty_line) {
+      int row;
+      for (row = 0; row < height; ++row) {
+        if (row + 1 != height) {
+          int coeff_idx = row * width + col;
+          int scan_idx = iscan[coeff_idx];
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeffs, scan_idx, scan, bwl, height, tx_type);
+#if LV_MAP_PROB
+          int is_nz = av1_read_record_bin(
+              counts, r, fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2,
+              ACCT_STR);
+#else
+          int is_nz = aom_read(r, nz_map[coeff_ctx], ACCT_STR);
+#endif
+          if (counts) ++counts->nz_map[txs_ctx][plane_type][coeff_ctx][is_nz];
+          tcoeffs[coeff_idx] = is_nz;
+          if (is_nz) {
+            eob = AOMMAX(eob, iscan[coeff_idx] + 1);
+            if (row + 1 != height) {
+              int eob_ctx = get_hv_eob_ctx(col, row, eob_ls);
+#if LV_MAP_PROB
+              int is_eob = av1_read_record_bin(
+                  counts, r,
+                  fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx], 2,
+                  ACCT_STR);
+#else
+              int is_eob = aom_read(
+                  r, fc->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx],
+                  ACCT_STR);
+#endif
+              if (counts)
+                ++counts
+                      ->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx][is_eob];
+              if (is_eob) break;
+            }
+          }
+        } else {
+          int coeff_idx = row * width + col;
+          tcoeffs[coeff_idx] = 1;
+          eob = AOMMAX(eob, iscan[coeff_idx] + 1);
+        }
+      }
+      eob_ls[col] = AOMMIN(height, row + 1);
+    } else {
+      eob_ls[col] = 0;
+    }
+  }
+  return eob;
+}
+
+static INLINE int read_nz_map_horiz(aom_reader *r, tran_low_t *tcoeffs,
+                                    int plane, const int16_t *scan,
+                                    const int16_t *iscan, TX_SIZE tx_size,
+                                    TX_TYPE tx_type, FRAME_CONTEXT *fc,
+                                    FRAME_COUNTS *counts) {
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  int eob = 0;
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+#endif
+  for (int row = 0; row < height; ++row) {
+    int el_ctx = get_empty_line_ctx(row, eob_ls);
+#if LV_MAP_PROB
+    int empty_line = av1_read_record_bin(
+        counts, r, fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx], 2,
+        ACCT_STR);
+#else
+    int empty_line = aom_read(
+        r, fc->empty_line[txs_ctx][plane_type][tx_class][el_ctx], ACCT_STR);
+#endif
+    if (counts)
+      ++counts->empty_line[txs_ctx][plane_type][tx_class][el_ctx][empty_line];
+    if (!empty_line) {
+      int col;
+      for (col = 0; col < width; ++col) {
+        if (col + 1 != width) {
+          int coeff_idx = row * width + col;
+          int scan_idx = iscan[coeff_idx];
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeffs, scan_idx, scan, bwl, height, tx_type);
+#if LV_MAP_PROB
+          int is_nz = av1_read_record_bin(
+              counts, r, fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2,
+              ACCT_STR);
+#else
+          int is_nz = aom_read(r, nz_map[coeff_ctx], ACCT_STR);
+#endif
+          if (counts) ++counts->nz_map[txs_ctx][plane_type][coeff_ctx][is_nz];
+          tcoeffs[coeff_idx] = is_nz;
+          if (is_nz) {
+            eob = AOMMAX(eob, iscan[coeff_idx] + 1);
+            int eob_ctx = get_hv_eob_ctx(row, col, eob_ls);
+#if LV_MAP_PROB
+            int is_eob = av1_read_record_bin(
+                counts, r,
+                fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx], 2,
+                ACCT_STR);
+#else
+            int is_eob =
+                aom_read(r, fc->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx],
+                         ACCT_STR);
+#endif
+            if (counts)
+              ++counts->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx][is_eob];
+            if (is_eob) break;
+          }
+        } else {
+          int coeff_idx = row * width + col;
+          tcoeffs[coeff_idx] = 1;
+          eob = AOMMAX(eob, iscan[coeff_idx] + 1);
+        }
+      }
+      eob_ls[row] = AOMMIN(width, col + 1);
+    } else {
+      eob_ls[row] = 0;
+    }
+  }
+  return eob;
+}
+#endif
+
 uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                             aom_reader *r, int blk_row, int blk_col, int block,
                             int plane, tran_low_t *tcoeffs, TXB_CTX *txb_ctx,
                             TX_SIZE tx_size, int16_t *max_scan_line, int *eob) {
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   FRAME_COUNTS *counts = xd->counts;
   TX_SIZE txs_ctx = get_txsize_context(tx_size);
   PLANE_TYPE plane_type = get_plane_type(plane);
-  aom_prob *nz_map = cm->fc->nz_map[txs_ctx][plane_type];
-  aom_prob *eob_flag = cm->fc->eob_flag[txs_ctx][plane_type];
+#if !LV_MAP_PROB
+  aom_prob *nz_map = ec_ctx->nz_map[txs_ctx][plane_type];
+  aom_prob *eob_flag = ec_ctx->eob_flag[txs_ctx][plane_type];
+#endif
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int seg_eob = tx_size_2d[tx_size];
   int c = 0;
@@ -59,14 +289,16 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
   const int height = tx_size_high[tx_size];
   int cul_level = 0;
-  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2];
-
-  nz_map_count = (counts) ? &counts->nz_map[txs_ctx][plane_type] : NULL;
-
   memset(tcoeffs, 0, sizeof(*tcoeffs) * seg_eob);
 
+#if LV_MAP_PROB
+  int all_zero = av1_read_record_bin(
+      counts, r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2,
+      ACCT_STR);
+#else
   int all_zero =
-      aom_read(r, cm->fc->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx], ACCT_STR);
+      aom_read(r, ec_ctx->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx], ACCT_STR);
+#endif
   if (xd->counts)
     ++xd->counts->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx][all_zero];
 
@@ -89,42 +321,46 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
-
-  for (c = 0; c < seg_eob; ++c) {
-    int is_nz;
-    int coeff_ctx = get_nz_map_ctx(tcoeffs, scan[c], bwl, height, iscan);
-    int eob_ctx = get_eob_ctx(tcoeffs, scan[c], txs_ctx);
-
-    if (c < seg_eob - 1)
-      is_nz = aom_read(r, nz_map[coeff_ctx], ACCT_STR);
-    else
-      is_nz = 1;
-
-    // set non-zero coefficient map.
-    tcoeffs[scan[c]] = is_nz;
-
-    if (c == seg_eob - 1) {
-      ++c;
-      break;
-    }
-
-    if (counts) ++(*nz_map_count)[coeff_ctx][is_nz];
 
-    if (is_nz) {
-      int is_eob = aom_read(r, eob_flag[eob_ctx], ACCT_STR);
-      if (counts) ++counts->eob_flag[txs_ctx][plane_type][eob_ctx][is_eob];
-      if (is_eob) break;
+#if CONFIG_CTX1D
+  const int16_t *iscan = scan_order->iscan;
+  TX_CLASS tx_class = get_tx_class(tx_type);
+  if (tx_class == TX_CLASS_2D) {
+    *eob =
+        read_nz_map(r, tcoeffs, plane, scan, tx_size, tx_type, ec_ctx, counts);
+  } else {
+#if LV_MAP_PROB
+    const int eob_mode = av1_read_record_bin(
+        counts, r, ec_ctx->eob_mode_cdf[txs_ctx][plane_type][tx_class], 2,
+        ACCT_STR);
+#else
+    const int eob_mode =
+        aom_read(r, ec_ctx->eob_mode[txs_ctx][plane_type][tx_class], ACCT_STR);
+#endif
+    if (counts) ++counts->eob_mode[txs_ctx][plane_type][tx_class][eob_mode];
+    if (eob_mode == 0) {
+      *eob = read_nz_map(r, tcoeffs, plane, scan, tx_size, tx_type, ec_ctx,
+                         counts);
+    } else {
+      assert(tx_class == TX_CLASS_VERT || tx_class == TX_CLASS_HORIZ);
+      if (tx_class == TX_CLASS_VERT)
+        *eob = read_nz_map_vert(r, tcoeffs, plane, scan, iscan, tx_size,
+                                tx_type, ec_ctx, counts);
+      else
+        *eob = read_nz_map_horiz(r, tcoeffs, plane, scan, iscan, tx_size,
+                                 tx_type, ec_ctx, counts);
     }
   }
-
-  *eob = AOMMIN(seg_eob, c + 1);
+#else
+  *eob = read_nz_map(r, tcoeffs, plane, scan, tx_size, tx_type, ec_ctx, counts);
+#endif
   *max_scan_line = *eob;
 
   int i;
   for (i = 0; i < NUM_BASE_LEVELS; ++i) {
-    aom_prob *coeff_base = cm->fc->coeff_base[txs_ctx][plane_type][i];
-
+#if !LV_MAP_PROB
+    aom_prob *coeff_base = ec_ctx->coeff_base[txs_ctx][plane_type][i];
+#endif
     update_eob = 0;
     for (c = *eob - 1; c >= 0; --c) {
       tran_low_t *v = &tcoeffs[scan[c]];
@@ -135,7 +371,14 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
 
       ctx = get_base_ctx(tcoeffs, scan[c], bwl, height, i + 1);
 
-      if (aom_read(r, coeff_base[ctx], ACCT_STR)) {
+#if LV_MAP_PROB
+      if (av1_read_record_bin(
+              counts, r, ec_ctx->coeff_base_cdf[txs_ctx][plane_type][i][ctx], 2,
+              ACCT_STR))
+#else
+      if (aom_read(r, coeff_base[ctx], ACCT_STR))
+#endif
+      {
         *v = i + 1;
         cul_level += i + 1;
 
@@ -143,11 +386,17 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
 
         if (c == 0) {
           int dc_sign_ctx = txb_ctx->dc_sign_ctx;
+#if LV_MAP_PROB
+          sign = av1_read_record_bin(
+              counts, r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2,
+              ACCT_STR);
+#else
           sign =
-              aom_read(r, cm->fc->dc_sign[plane_type][dc_sign_ctx], ACCT_STR);
+              aom_read(r, ec_ctx->dc_sign[plane_type][dc_sign_ctx], ACCT_STR);
+#endif
           if (counts) ++counts->dc_sign[plane_type][dc_sign_ctx][sign];
         } else {
-          sign = aom_read_bit(r, ACCT_STR);
+          sign = av1_read_record_bit(counts, r, ACCT_STR);
         }
         if (sign) *v = -(*v);
         continue;
@@ -170,18 +419,74 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
 
     if (c == 0) {
       int dc_sign_ctx = txb_ctx->dc_sign_ctx;
-      sign = aom_read(r, cm->fc->dc_sign[plane_type][dc_sign_ctx], ACCT_STR);
+#if LV_MAP_PROB
+      sign = av1_read_record_bin(
+          counts, r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2, ACCT_STR);
+#else
+      sign = aom_read(r, ec_ctx->dc_sign[plane_type][dc_sign_ctx], ACCT_STR);
+#endif
       if (counts) ++counts->dc_sign[plane_type][dc_sign_ctx][sign];
     } else {
-      sign = aom_read_bit(r, ACCT_STR);
+      sign = av1_read_record_bit(counts, r, ACCT_STR);
     }
 
     ctx = get_br_ctx(tcoeffs, scan[c], bwl, height);
 
-    if (cm->fc->coeff_lps[txs_ctx][plane_type][ctx] == 0) exit(0);
+#if BR_NODE
+    for (idx = 0; idx < BASE_RANGE_SETS; ++idx) {
+#if LV_MAP_PROB
+      if (av1_read_record_bin(
+              counts, r, ec_ctx->coeff_br_cdf[txs_ctx][plane_type][idx][ctx], 2,
+              ACCT_STR))
+#else   // LV_MAP_PROB
+      if (aom_read(r, ec_ctx->coeff_br[txs_ctx][plane_type][idx][ctx],
+                   ACCT_STR))
+#endif  // LV_MAP_PROB
+      {
+        int extra_bits = (1 << br_extra_bits[idx]) - 1;
+        //        int br_offset = aom_read_literal(r, extra_bits, ACCT_STR);
+        int br_offset = 0;
+        int tok;
+        if (counts) ++counts->coeff_br[txs_ctx][plane_type][idx][ctx][1];
+        for (tok = 0; tok < extra_bits; ++tok) {
+#if LV_MAP_PROB
+          if (av1_read_record_bin(
+                  counts, r, ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx], 2,
+                  ACCT_STR))
+#else
+          if (aom_read(r, ec_ctx->coeff_lps[txs_ctx][plane_type][ctx],
+                       ACCT_STR))
+#endif
+          {
+            br_offset = tok;
+            if (counts) ++counts->coeff_lps[txs_ctx][plane_type][ctx][1];
+            break;
+          }
+          if (counts) ++counts->coeff_lps[txs_ctx][plane_type][ctx][0];
+        }
+        if (tok == extra_bits) br_offset = extra_bits;
 
+        int br_base = br_index_to_coeff[idx];
+
+        *v = NUM_BASE_LEVELS + 1 + br_base + br_offset;
+        cul_level += *v;
+        if (sign) *v = -(*v);
+        break;
+      }
+      if (counts) ++counts->coeff_br[txs_ctx][plane_type][idx][ctx][0];
+    }
+
+    if (idx < BASE_RANGE_SETS) continue;
+#else
     for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
-      if (aom_read(r, cm->fc->coeff_lps[txs_ctx][plane_type][ctx], ACCT_STR)) {
+#if LV_MAP_PROB
+      if (av1_read_record_bin(counts, r,
+                              ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx],
+                              2, ACCT_STR))
+#else
+      if (aom_read(r, ec_ctx->coeff_lps[txs_ctx][plane_type][ctx], ACCT_STR))
+#endif
+      {
         *v = (idx + 1 + NUM_BASE_LEVELS);
         if (sign) *v = -(*v);
         cul_level += abs(*v);
@@ -192,9 +497,10 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
       if (counts) ++counts->coeff_lps[txs_ctx][plane_type][ctx][0];
     }
     if (idx < COEFF_BASE_RANGE) continue;
+#endif
 
     // decode 0-th order Golomb code
-    *v = read_golomb(xd, r) + COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS;
+    *v = read_golomb(xd, r, counts) + COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS;
     if (sign) *v = -(*v);
     cul_level += abs(*v);
   }
@@ -202,6 +508,9 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
   for (c = 0; c < *eob; ++c) {
     int16_t dqv = (c == 0) ? dequant[0] : dequant[1];
     tran_low_t *v = &tcoeffs[scan[c]];
+#if CONFIG_SYMBOLRATE
+    av1_record_coeff(counts, abs(*v));
+#endif
     int sign = (*v) < 0;
     *v = (abs(*v) * dqv) >> shift;
     if (sign) *v = -(*v);
@@ -251,11 +560,15 @@ uint8_t av1_read_coeffs_txb_facade(AV1_COMMON *cm, MACROBLOCKD *xd,
   return cul_level;
 }
 
+#if !LV_MAP_PROB
 static void read_txb_probs(FRAME_CONTEXT *fc, const TX_SIZE tx_size,
-                           aom_reader *r) {
+                           aom_reader *r, FRAME_COUNTS *counts) {
+#if !CONFIG_SYMBOLRATE
+  (void)counts;
+#endif
   int plane, ctx, level;
 
-  if (aom_read_bit(r, ACCT_STR) == 0) return;
+  if (av1_read_record_bit(counts, r, ACCT_STR) == 0) return;
 
   for (ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
     av1_diff_update_prob(r, &fc->txb_skip[tx_size][ctx], ACCT_STR);
@@ -279,14 +592,17 @@ static void read_txb_probs(FRAME_CONTEXT *fc, const TX_SIZE tx_size,
       av1_diff_update_prob(r, &fc->coeff_lps[tx_size][plane][ctx], ACCT_STR);
 }
 
-void av1_read_txb_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r) {
+void av1_read_txb_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r,
+                        FRAME_COUNTS *counts) {
   const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
   TX_SIZE tx_size;
   int ctx, plane;
+
   for (plane = 0; plane < PLANE_TYPES; ++plane)
     for (ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
       av1_diff_update_prob(r, &fc->dc_sign[plane][ctx], ACCT_STR);
 
   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
-    read_txb_probs(fc, tx_size, r);
+    read_txb_probs(fc, tx_size, r, counts);
 }
+#endif  // !LV_MAP_PROB
diff --git a/third_party/aom/av1/decoder/decodetxb.h b/third_party/aom/av1/decoder/decodetxb.h
index 313476139..1c6512e97 100644
--- a/third_party/aom/av1/decoder/decodetxb.h
+++ b/third_party/aom/av1/decoder/decodetxb.h
@@ -28,5 +28,8 @@ uint8_t av1_read_coeffs_txb_facade(AV1_COMMON *cm, MACROBLOCKD *xd,
                                    int plane, tran_low_t *tcoeffs,
                                    TX_SIZE tx_size, int16_t *max_scan_line,
                                    int *eob);
-void av1_read_txb_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r);
+#if !LV_MAP_PROB
+void av1_read_txb_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r,
+                        FRAME_COUNTS *counts);
+#endif  // !LV_MAP_PROB
 #endif  //  DECODETXB_H_
diff --git a/third_party/aom/av1/decoder/detokenize.c b/third_party/aom/av1/decoder/detokenize.c
index 461494dfe..a59a7bac1 100644
--- a/third_party/aom/av1/decoder/detokenize.c
+++ b/third_party/aom/av1/decoder/detokenize.c
@@ -24,7 +24,11 @@
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
 #include "av1/common/idct.h"
+#endif
+
+#include "av1/decoder/symbolrate.h"
 
+#if !CONFIG_PVQ || CONFIG_VAR_TX
 #define EOB_CONTEXT_NODE 0
 #define ZERO_CONTEXT_NODE 1
 #define ONE_CONTEXT_NODE 2
@@ -43,31 +47,43 @@
   } while (0)
 
 #if CONFIG_NEW_MULTISYMBOL
-#define READ_COEFF(prob_name, cdf_name, num, r) read_coeff(cdf_name, num, r);
-static INLINE int read_coeff(const aom_cdf_prob *const *cdf, int n,
+#define READ_COEFF(counts, prob_name, cdf_name, num, r) \
+  read_coeff(counts, cdf_name, num, r);
+static INLINE int read_coeff(FRAME_COUNTS *counts,
+                             const aom_cdf_prob *const *cdf, int n,
                              aom_reader *r) {
+#if !CONFIG_SYMBOLRATE
+  (void)counts;
+#endif
   int val = 0;
   int i = 0;
   int count = 0;
   while (count < n) {
     const int size = AOMMIN(n - count, 4);
-    val |= aom_read_cdf(r, cdf[i++], 1 << size, ACCT_STR) << count;
+    val |= av1_read_record_cdf(counts, r, cdf[i++], 1 << size, ACCT_STR)
+           << count;
     count += size;
   }
   return val;
 }
 #else
-#define READ_COEFF(prob_name, cdf_name, num, r) read_coeff(prob_name, num, r);
-static INLINE int read_coeff(const aom_prob *probs, int n, aom_reader *r) {
+#define READ_COEFF(counts, prob_name, cdf_name, num, r) \
+  read_coeff(counts, prob_name, num, r);
+static INLINE int read_coeff(FRAME_COUNTS *counts, const aom_prob *probs, int n,
+                             aom_reader *r) {
+#if !CONFIG_SYMBOLRATE
+  (void)counts;
+#endif
   int i, val = 0;
-  for (i = 0; i < n; ++i) val = (val << 1) | aom_read(r, probs[i], ACCT_STR);
+  for (i = 0; i < n; ++i)
+    val = (val << 1) | av1_read_record(counts, r, probs[i], ACCT_STR);
   return val;
 }
 
 #endif
 
-static int token_to_value(aom_reader *const r, int token, TX_SIZE tx_size,
-                          int bit_depth) {
+static int token_to_value(FRAME_COUNTS *counts, aom_reader *const r, int token,
+                          TX_SIZE tx_size, int bit_depth) {
 #if !CONFIG_HIGHBITDEPTH
   assert(bit_depth == 8);
 #endif  // !CONFIG_HIGHBITDEPTH
@@ -79,20 +95,25 @@ static int token_to_value(aom_reader *const r, int token, TX_SIZE tx_size,
     case THREE_TOKEN:
     case FOUR_TOKEN: return token;
     case CATEGORY1_TOKEN:
-      return CAT1_MIN_VAL + READ_COEFF(av1_cat1_prob, av1_cat1_cdf, 1, r);
+      return CAT1_MIN_VAL +
+             READ_COEFF(counts, av1_cat1_prob, av1_cat1_cdf, 1, r);
     case CATEGORY2_TOKEN:
-      return CAT2_MIN_VAL + READ_COEFF(av1_cat2_prob, av1_cat2_cdf, 2, r);
+      return CAT2_MIN_VAL +
+             READ_COEFF(counts, av1_cat2_prob, av1_cat2_cdf, 2, r);
     case CATEGORY3_TOKEN:
-      return CAT3_MIN_VAL + READ_COEFF(av1_cat3_prob, av1_cat3_cdf, 3, r);
+      return CAT3_MIN_VAL +
+             READ_COEFF(counts, av1_cat3_prob, av1_cat3_cdf, 3, r);
     case CATEGORY4_TOKEN:
-      return CAT4_MIN_VAL + READ_COEFF(av1_cat4_prob, av1_cat4_cdf, 4, r);
+      return CAT4_MIN_VAL +
+             READ_COEFF(counts, av1_cat4_prob, av1_cat4_cdf, 4, r);
     case CATEGORY5_TOKEN:
-      return CAT5_MIN_VAL + READ_COEFF(av1_cat5_prob, av1_cat5_cdf, 5, r);
+      return CAT5_MIN_VAL +
+             READ_COEFF(counts, av1_cat5_prob, av1_cat5_cdf, 5, r);
     case CATEGORY6_TOKEN: {
       const int skip_bits = (int)sizeof(av1_cat6_prob) -
                             av1_get_cat6_extrabits_size(tx_size, bit_depth);
-      return CAT6_MIN_VAL + READ_COEFF(av1_cat6_prob + skip_bits, av1_cat6_cdf,
-                                       18 - skip_bits, r);
+      return CAT6_MIN_VAL + READ_COEFF(counts, av1_cat6_prob + skip_bits,
+                                       av1_cat6_cdf, 18 - skip_bits, r);
     }
     default:
       assert(0);  // Invalid token.
@@ -104,22 +125,22 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
                         TX_SIZE tx_size, TX_TYPE tx_type, const int16_t *dq,
 #if CONFIG_NEW_QUANT
                         dequant_val_type_nuq *dq_val,
-#endif  // CONFIG_NEW_QUANT
+#else
 #if CONFIG_AOM_QM
-                        const qm_val_t *iqm[2][TX_SIZES_ALL],
+                        qm_val_t *iqm[2][TX_SIZES_ALL],
 #endif  // CONFIG_AOM_QM
+#endif  // CONFIG_NEW_QUANT
                         int ctx, const int16_t *scan, const int16_t *nb,
                         int16_t *max_scan_line, aom_reader *r) {
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   const int max_eob = tx_size_2d[tx_size];
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
-#if CONFIG_AOM_QM
+#if CONFIG_AOM_QM && !CONFIG_NEW_QUANT
   const qm_val_t *iqmatrix = iqm[!ref][tx_size];
-#else
-  (void)tx_type;
 #endif  // CONFIG_AOM_QM
+  (void)tx_type;
   int band, c = 0;
-  const int tx_size_ctx = txsize_sqr_map[tx_size];
+  const TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size];
   aom_cdf_prob(*coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
       ec_ctx->coef_head_cdfs[tx_size_ctx][type][ref];
   aom_cdf_prob(*coef_tail_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
@@ -130,7 +151,7 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
   const uint8_t *band_translate = get_band_translate(tx_size);
   int dq_shift;
   int v, token;
-  int16_t dqv = dq[0];
+  int32_t dqv = dq[0];
 #if CONFIG_NEW_QUANT
   const tran_low_t *dqv_val = &dq_val[0][0];
 #endif  // CONFIG_NEW_QUANT
@@ -149,9 +170,10 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
     dqv_val = &dq_val[band][0];
 #endif  // CONFIG_NEW_QUANT
 
-    comb_token = last_pos ? 2 * aom_read_bit(r, ACCT_STR) + 2
-                          : aom_read_symbol(r, coef_head_cdfs[band][ctx],
-                                            HEAD_TOKENS + first_pos, ACCT_STR) +
+    comb_token = last_pos ? 2 * av1_read_record_bit(xd->counts, r, ACCT_STR) + 2
+                          : av1_read_record_symbol(
+                                xd->counts, r, coef_head_cdfs[band][ctx],
+                                HEAD_TOKENS + first_pos, ACCT_STR) +
                                 !first_pos;
     if (first_pos) {
       if (comb_token == 0) return 0;
@@ -161,6 +183,9 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
     while (!token) {
       *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
       token_cache[scan[c]] = 0;
+#if CONFIG_SYMBOLRATE
+      av1_record_coeff(xd->counts, 0);
+#endif
       ++c;
       dqv = dq[1];
       ctx = get_coef_context(nb, token_cache, c);
@@ -168,18 +193,20 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
 
       last_pos = (c + 1 == max_eob);
 
-      comb_token = last_pos ? 2 * aom_read_bit(r, ACCT_STR) + 2
-                            : aom_read_symbol(r, coef_head_cdfs[band][ctx],
-                                              HEAD_TOKENS, ACCT_STR) +
-                                  1;
+      comb_token =
+          last_pos
+              ? 2 * av1_read_record_bit(xd->counts, r, ACCT_STR) + 2
+              : av1_read_record_symbol(xd->counts, r, coef_head_cdfs[band][ctx],
+                                       HEAD_TOKENS, ACCT_STR) +
+                    1;
       token = comb_token >> 1;
     }
 
     more_data = comb_token & 1;
 
     if (token > ONE_TOKEN)
-      token +=
-          aom_read_symbol(r, coef_tail_cdfs[band][ctx], TAIL_TOKENS, ACCT_STR);
+      token += av1_read_record_symbol(xd->counts, r, coef_tail_cdfs[band][ctx],
+                                      TAIL_TOKENS, ACCT_STR);
 #if CONFIG_NEW_QUANT
     dqv_val = &dq_val[band][0];
 #endif  // CONFIG_NEW_QUANT
@@ -187,7 +214,10 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
     *max_scan_line = AOMMAX(*max_scan_line, scan[c]);
     token_cache[scan[c]] = av1_pt_energy_class[token];
 
-    val = token_to_value(r, token, tx_size, xd->bd);
+    val = token_to_value(xd->counts, r, token, tx_size, xd->bd);
+#if CONFIG_SYMBOLRATE
+    av1_record_coeff(xd->counts, val);
+#endif
 
 #if CONFIG_NEW_QUANT
     v = av1_dequant_abscoeff_nuq(val, dqv, dqv_val);
@@ -195,14 +225,15 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
 #else
 #if CONFIG_AOM_QM
     // Apply quant matrix only for 2D transforms
-    if (IS_2D_TRANSFORM(tx_type))
+    if (IS_2D_TRANSFORM(tx_type) && iqmatrix != NULL)
       dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
             AOM_QM_BITS;
 #endif
     v = (val * dqv) >> dq_shift;
 #endif
 
-    v = (int)check_range(aom_read_bit(r, ACCT_STR) ? -v : v, xd->bd);
+    v = (int)check_range(av1_read_record_bit(xd->counts, r, ACCT_STR) ? -v : v,
+                         xd->bd);
 
     dqcoeff[scan[c]] = v;
 
@@ -218,22 +249,15 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
 }
 #endif  // !CONFIG_PVQ
 
-#if CONFIG_PALETTE
-void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
-                               aom_reader *r) {
-  const MODE_INFO *const mi = xd->mi[0];
-  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+static void decode_color_map_tokens(Av1ColorMapParam *param, aom_reader *r) {
   uint8_t color_order[PALETTE_MAX_SIZE];
-  const int n = mbmi->palette_mode_info.palette_size[plane];
-  uint8_t *const color_map = xd->plane[plane].color_index_map;
-  aom_cdf_prob(
-      *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
-      plane ? xd->tile_ctx->palette_uv_color_index_cdf
-            : xd->tile_ctx->palette_y_color_index_cdf;
-  int plane_block_width, plane_block_height, rows, cols;
-  av1_get_block_dimensions(mbmi->sb_type, plane, xd, &plane_block_width,
-                           &plane_block_height, &rows, &cols);
-  assert(plane == 0 || plane == 1);
+  const int n = param->n_colors;
+  uint8_t *const color_map = param->color_map;
+  MapCdf color_map_cdf = param->map_cdf;
+  int plane_block_width = param->plane_width;
+  int plane_block_height = param->plane_height;
+  int rows = param->rows;
+  int cols = param->cols;
 
   // The first color index.
   color_map[0] = av1_read_uniform(r, n);
@@ -246,14 +270,14 @@ void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, (i - j), j, n, color_order, NULL);
       const int color_idx = aom_read_symbol(
-          r, palette_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
+          r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
       assert(color_idx >= 0 && color_idx < n);
       color_map[(i - j) * plane_block_width + j] = color_order[color_idx];
     }
   }
   // Copy last column to extra columns.
   if (cols < plane_block_width) {
-    for (int i = 0; i < plane_block_height; ++i) {
+    for (int i = 0; i < rows; ++i) {
       memset(color_map + i * plane_block_width + cols,
              color_map[i * plane_block_width + cols - 1],
              (plane_block_width - cols));
@@ -265,7 +289,7 @@ void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, i, j, n, color_order, NULL);
       const int color_idx = aom_read_symbol(
-          r, palette_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
+          r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
       assert(color_idx >= 0 && color_idx < n);
       color_map[i * plane_block_width + j] = color_order[color_idx];
     }
@@ -280,7 +304,60 @@ void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
            color_map + (rows - 1) * plane_block_width, plane_block_width);
   }
 }
-#endif  // CONFIG_PALETTE
+
+static void get_palette_params(const MACROBLOCKD *const xd, int plane,
+                               BLOCK_SIZE bsize, Av1ColorMapParam *params) {
+  assert(plane == 0 || plane == 1);
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+  params->color_map = xd->plane[plane].color_index_map;
+  params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
+                          : xd->tile_ctx->palette_y_color_index_cdf;
+  params->n_colors = pmi->palette_size[plane];
+  av1_get_block_dimensions(bsize, plane, xd, &params->plane_width,
+                           &params->plane_height, &params->rows, &params->cols);
+}
+
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+static void get_mrc_params(const MACROBLOCKD *const xd, TX_SIZE tx_size,
+                           Av1ColorMapParam *params) {
+  memset(params, 0, sizeof(*params));
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int is_inter = is_inter_block(mbmi);
+  params->color_map = xd->mrc_mask;
+  params->map_cdf = is_inter ? xd->tile_ctx->mrc_mask_inter_cdf
+                             : xd->tile_ctx->mrc_mask_intra_cdf;
+  params->n_colors = 2;
+  params->plane_width = tx_size_wide[tx_size];
+  params->rows = tx_size_high[tx_size];
+  params->cols = tx_size_wide[tx_size];
+}
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+
+void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+                               aom_reader *r) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  assert(plane == 0 || plane == 1);
+  assert(mbmi->sb_type >= BLOCK_8X8);
+  Av1ColorMapParam color_map_params;
+  memset(&color_map_params, 0, sizeof(color_map_params));
+  get_palette_params(xd, plane, mbmi->sb_type, &color_map_params);
+  decode_color_map_tokens(&color_map_params, r);
+}
+
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+static void decode_mrc_tokens(MACROBLOCKD *const xd, TX_TYPE tx_size,
+                              aom_reader *r) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int is_inter = is_inter_block(mbmi);
+  if ((is_inter && !SIGNAL_MRC_MASK_INTER) ||
+      (!is_inter && !SIGNAL_MRC_MASK_INTRA))
+    return;
+  Av1ColorMapParam color_map_params;
+  get_mrc_params(xd, tx_size, &color_map_params);
+  decode_color_map_tokens(&color_map_params, r);
+}
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
 
 #if !CONFIG_PVQ || CONFIG_VAR_TX
 int av1_decode_block_tokens(AV1_COMMON *cm, MACROBLOCKD *const xd, int plane,
@@ -297,14 +374,19 @@ int av1_decode_block_tokens(AV1_COMMON *cm, MACROBLOCKD *const xd, int plane,
       get_dq_profile_from_ctx(xd->qindex[seg_id], ctx, ref, pd->plane_type);
 #endif  //  CONFIG_NEW_QUANT
 
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  if (tx_type == MRC_DCT) decode_mrc_tokens(xd, tx_size, r);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+
   const int eob =
       decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
 #if CONFIG_NEW_QUANT
                    pd->seg_dequant_nuq[seg_id][dq],
-#endif  // CONFIG_NEW_QUANT
+#else
 #if CONFIG_AOM_QM
                    pd->seg_iqmatrix[seg_id],
 #endif  // CONFIG_AOM_QM
+#endif  // CONFIG_NEW_QUANT
                    ctx, sc->scan, sc->neighbors, max_scan_line, r);
   av1_set_contexts(xd, pd, plane, tx_size, eob > 0, x, y);
 #if CONFIG_ADAPT_SCAN
diff --git a/third_party/aom/av1/decoder/detokenize.h b/third_party/aom/av1/decoder/detokenize.h
index 0e58a2803..eb31d58c6 100644
--- a/third_party/aom/av1/decoder/detokenize.h
+++ b/third_party/aom/av1/decoder/detokenize.h
@@ -22,9 +22,7 @@
 extern "C" {
 #endif
 
-#if CONFIG_PALETTE
 void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane, aom_reader *r);
-#endif  // CONFIG_PALETTE
 
 #if !CONFIG_PVQ || CONFIG_VAR_TX
 int av1_decode_block_tokens(AV1_COMMON *cm, MACROBLOCKD *const xd, int plane,
diff --git a/third_party/aom/av1/decoder/dthread.c b/third_party/aom/av1/decoder/dthread.c
index 50f8ed192..7f16b233c 100644
--- a/third_party/aom/av1/decoder/dthread.c
+++ b/third_party/aom/av1/decoder/dthread.c
@@ -181,7 +181,12 @@ void av1_frameworker_copy_context(AVxWorker *const dst_worker,
   memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
          (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
   dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level;
+#if CONFIG_LOOPFILTER_LEVEL
+  dst_cm->lf.filter_level[0] = src_cm->lf.filter_level[0];
+  dst_cm->lf.filter_level[1] = src_cm->lf.filter_level[1];
+#else
   dst_cm->lf.filter_level = src_cm->lf.filter_level;
+#endif
   memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, TOTAL_REFS_PER_FRAME);
   memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
   dst_cm->seg = src_cm->seg;
diff --git a/third_party/aom/av1/decoder/inspection.c b/third_party/aom/av1/decoder/inspection.c
index 4f98f18ea..98c51d4ba 100644
--- a/third_party/aom/av1/decoder/inspection.c
+++ b/third_party/aom/av1/decoder/inspection.c
@@ -18,13 +18,19 @@
 #include "av1/common/cfl.h"
 #endif
 
-void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) {
-  fd->mi_cols = ALIGN_POWER_OF_TWO(frame_width, 3) >> MI_SIZE_LOG2;
-  fd->mi_rows = ALIGN_POWER_OF_TWO(frame_height, 3) >> MI_SIZE_LOG2;
+static void ifd_init_mi_rc(insp_frame_data *fd, int mi_cols, int mi_rows) {
+  fd->mi_cols = mi_cols;
+  fd->mi_rows = mi_rows;
   fd->mi_grid = (insp_mi_data *)aom_malloc(sizeof(insp_mi_data) * fd->mi_rows *
                                            fd->mi_cols);
 }
 
+void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) {
+  int mi_cols = ALIGN_POWER_OF_TWO(frame_width, 3) >> MI_SIZE_LOG2;
+  int mi_rows = ALIGN_POWER_OF_TWO(frame_height, 3) >> MI_SIZE_LOG2;
+  ifd_init_mi_rc(fd, mi_cols, mi_rows);
+}
+
 void ifd_clear(insp_frame_data *fd) {
   aom_free(fd->mi_grid);
   fd->mi_grid = NULL;
@@ -35,9 +41,9 @@ void ifd_clear(insp_frame_data *fd) {
 int ifd_inspect(insp_frame_data *fd, void *decoder) {
   struct AV1Decoder *pbi = (struct AV1Decoder *)decoder;
   AV1_COMMON *const cm = &pbi->common;
-  // TODO(negge): Should this function just call ifd_clear() and ifd_init()?
   if (fd->mi_rows != cm->mi_rows || fd->mi_cols != cm->mi_cols) {
-    return 0;
+    ifd_clear(fd);
+    ifd_init_mi_rc(fd, cm->mi_rows, cm->mi_cols);
   }
   fd->show_frame = cm->show_frame;
   fd->frame_type = cm->frame_type;
@@ -85,26 +91,26 @@ int ifd_inspect(insp_frame_data *fd, void *decoder) {
       // Skip Flag
       mi->skip = mbmi->skip;
 #if CONFIG_DUAL_FILTER
-      mi->filter[0] = mbmi->interp_filter[0];
-      mi->filter[1] = mbmi->interp_filter[1];
+      mi->filter[0] = av1_extract_interp_filter(mbmi->interp_filters, 0);
+      mi->filter[1] = av1_extract_interp_filter(mbmi->interp_filters, 1);
 #else
-      mi->filter = mbmi->interp_filter;
+      mi->filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
 #endif
       // Transform
       mi->tx_type = mbmi->tx_type;
       mi->tx_size = mbmi->tx_size;
 
 #if CONFIG_CDEF
-      mi->cdef_level = cm->cdef_strengths[mbmi->cdef_strength] / CLPF_STRENGTHS;
+      mi->cdef_level =
+          cm->cdef_strengths[mbmi->cdef_strength] / CDEF_SEC_STRENGTHS;
       mi->cdef_strength =
-          cm->cdef_strengths[mbmi->cdef_strength] % CLPF_STRENGTHS;
+          cm->cdef_strengths[mbmi->cdef_strength] % CDEF_SEC_STRENGTHS;
       mi->cdef_strength += mi->cdef_strength == 3;
 #endif
 #if CONFIG_CFL
-      if (mbmi->uv_mode == UV_DC_PRED) {
+      if (mbmi->uv_mode == UV_CFL_PRED) {
         mi->cfl_alpha_idx = mbmi->cfl_alpha_idx;
-        mi->cfl_alpha_sign = (mbmi->cfl_alpha_signs[CFL_PRED_V] << CFL_PRED_V) +
-                             mbmi->cfl_alpha_signs[CFL_PRED_U];
+        mi->cfl_alpha_sign = mbmi->cfl_alpha_signs;
       } else {
         mi->cfl_alpha_idx = 0;
         mi->cfl_alpha_sign = 0;
diff --git a/third_party/aom/av1/decoder/symbolrate.h b/third_party/aom/av1/decoder/symbolrate.h
new file mode 100644
index 000000000..023287732
--- /dev/null
+++ b/third_party/aom/av1/decoder/symbolrate.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "aom_dsp/bitreader.h"
+
+#ifndef AV1_DECODER_SYMBOLRATE_H_
+#define AV1_DECODER_SYMBOLRATE_H_
+
+#if CONFIG_SYMBOLRATE
+static INLINE void av1_dump_symbol_rate(struct AV1Common *cm) {
+  const FRAME_COUNTS *counts = &cm->counts;
+  printf("%d %d %d %d\n", counts->coeff_num[0], counts->coeff_num[1],
+         counts->symbol_num[0], counts->symbol_num[1]);
+}
+static INLINE int av1_read_record_symbol(FRAME_COUNTS *counts, aom_reader *r,
+                                         aom_cdf_prob *cdf, int nsymbs,
+                                         const char *str) {
+  (void)str;
+  if (counts) ++counts->symbol_num[0];
+  return aom_read_symbol(r, cdf, nsymbs, str);
+}
+
+#if CONFIG_LV_MAP
+static INLINE int av1_read_record_bin(FRAME_COUNTS *counts, aom_reader *r,
+                                      aom_cdf_prob *cdf, int nsymbs,
+                                      const char *str) {
+  (void)str;
+  if (counts) ++counts->symbol_num[0];
+  return aom_read_bin(r, cdf, nsymbs, str);
+}
+#endif
+
+static INLINE int av1_read_record(FRAME_COUNTS *counts, aom_reader *r, int prob,
+                                  const char *str) {
+  (void)str;
+  if (counts) ++counts->symbol_num[0];
+  return aom_read(r, prob, str);
+}
+
+static INLINE int av1_read_record_cdf(FRAME_COUNTS *counts, aom_reader *r,
+                                      const aom_cdf_prob *cdf, int nsymbs,
+                                      const char *str) {
+  (void)str;
+  if (counts) ++counts->symbol_num[0];
+  return aom_read_cdf(r, cdf, nsymbs, str);
+}
+
+static INLINE int av1_read_record_bit(FRAME_COUNTS *counts, aom_reader *r,
+                                      const char *str) {
+  (void)str;
+  if (counts) ++counts->symbol_num[1];
+  return aom_read_bit(r, str);
+}
+
+static INLINE void av1_record_coeff(FRAME_COUNTS *counts, tran_low_t qcoeff) {
+  assert(qcoeff >= 0);
+  if (counts) ++counts->coeff_num[qcoeff != 0];
+}
+#else  // CONFIG_SYMBOLRATE
+
+#define av1_read_record_symbol(counts, r, cdf, nsymbs, ACCT_STR_NAME) \
+  aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME)
+
+#if CONFIG_LV_MAP
+#define av1_read_record_bin(counts, r, cdf, nsymbs, ACCT_STR_NAME) \
+  aom_read_bin(r, cdf, nsymbs, ACCT_STR_NAME)
+#endif
+
+#define av1_read_record(counts, r, prob, ACCT_STR_NAME) \
+  aom_read(r, prob, ACCT_STR_NAME)
+
+#define av1_read_record_cdf(counts, r, cdf, nsymbs, ACCT_STR_NAME) \
+  aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME)
+
+#define av1_read_record_bit(counts, r, ACCT_STR_NAME) \
+  aom_read_bit(r, ACCT_STR_NAME)
+
+#endif  // CONFIG_SYMBOLRATE
+
+#endif  // AV1_DECODER_SYMBOLRATE_H_
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
index 05aa28c9f..8f61c7eb8 100644
--- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c
+++ b/third_party/aom/av1/encoder/aq_cyclicrefresh.c
@@ -39,7 +39,7 @@ struct CYCLIC_REFRESH {
   // RD mult. parameters for segment 1.
   int rdmult;
   // Cyclic refresh map.
-  signed char *map;
+  int8_t *map;
   // Map of the last q a block was coded at.
   uint8_t *last_coded_q_map;
   // Thresholds applied to the projected rate/distortion of the coding block,
@@ -397,6 +397,7 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
   // Set the segmentation map: cycle through the superblocks, starting at
   // cr->mb_index, and stopping when either block_count blocks have been found
   // to be refreshed, or we have passed through whole frame.
+  if (cr->sb_index >= sbs_in_frame) cr->sb_index = 0;
   assert(cr->sb_index < sbs_in_frame);
   i = cr->sb_index;
   cr->target_num_seg_blocks = 0;
diff --git a/third_party/aom/av1/encoder/aq_variance.c b/third_party/aom/av1/encoder/aq_variance.c
index ab9b3790b..84d967215 100644
--- a/third_party/aom/av1/encoder/aq_variance.c
+++ b/third_party/aom/av1/encoder/aq_variance.c
@@ -151,8 +151,8 @@ static unsigned int block_variance(const AV1_COMP *const cpi, MACROBLOCK *x,
       (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
 
   if (right_overflow || bottom_overflow) {
-    const int bw = 8 * mi_size_wide[bs] - right_overflow;
-    const int bh = 8 * mi_size_high[bs] - bottom_overflow;
+    const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
+    const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
     int avg;
 #if CONFIG_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/third_party/aom/av1/encoder/arm/neon/dct_neon.c b/third_party/aom/av1/encoder/arm/neon/dct_neon.c
deleted file mode 100644
index f6ce24a3d..000000000
--- a/third_party/aom/av1/encoder/arm/neon/dct_neon.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <arm_neon.h>
-
-#include "./av1_rtcd.h"
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "av1/common/blockd.h"
-#include "aom_dsp/txfm_common.h"
-
-void av1_fdct8x8_quant_neon(const int16_t *input, int stride,
-                            int16_t *coeff_ptr, intptr_t n_coeffs,
-                            int skip_block, const int16_t *zbin_ptr,
-                            const int16_t *round_ptr, const int16_t *quant_ptr,
-                            const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
-                            int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                            uint16_t *eob_ptr, const int16_t *scan_ptr,
-                            const int16_t *iscan_ptr) {
-  int16_t temp_buffer[64];
-  (void)coeff_ptr;
-
-  aom_fdct8x8_neon(input, temp_buffer, stride);
-  av1_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
-                       quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
-}
diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c
index dd53d4223..033b4ba1a 100644
--- a/third_party/aom/av1/encoder/av1_quantize.c
+++ b/third_party/aom/av1/encoder/av1_quantize.c
@@ -443,11 +443,8 @@ static void quantize_fp_helper_c(
     const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
-    const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-    int log_scale) {
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, int log_scale) {
   int i, eob = -1;
   // TODO(jingning) Decide the need of these arguments after the
   // quantization process is completed.
@@ -464,35 +461,22 @@ static void quantize_fp_helper_c(
     for (i = 0; i < n_coeffs; i++) {
       const int rc = scan[i];
       const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
+      const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
       const int dequant =
           (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
           AOM_QM_BITS;
-#endif
       const int coeff_sign = (coeff >> 31);
-      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
       int tmp32 = 0;
-#if CONFIG_AOM_QM
       if (abs_coeff * wt >=
           (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) {
-#else
-      if (abs_coeff >= (dequant_ptr[rc != 0] >> (1 + log_scale))) {
-#endif
         abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale);
-        abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
-#if CONFIG_AOM_QM
+        abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX);
         tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >>
-                      ((16 - log_scale) + AOM_QM_BITS));
+                      (16 - log_scale + AOM_QM_BITS));
         qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
         dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / (1 << log_scale);
-#else
-        tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
-        qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-        dqcoeff_ptr[rc] =
-            qcoeff_ptr[rc] * dequant_ptr[rc != 0] / (1 << log_scale);
-#endif
       }
 
       if (tmp32) eob = i;
@@ -501,25 +485,60 @@ static void quantize_fp_helper_c(
   *eob_ptr = eob + 1;
 }
 
+static void highbd_quantize_fp_helper_c(
+    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
+    const qm_val_t *iqm_ptr, int log_scale) {
+  int i;
+  int eob = -1;
+  const int scale = 1 << log_scale;
+  const int shift = 16 - log_scale;
+  // TODO(jingning) Decide the need of these arguments after the
+  // quantization process is completed.
+  (void)zbin_ptr;
+  (void)quant_shift_ptr;
+  (void)iscan;
+
+  memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    // Quantization pass: All coefficients with index >= zero_flag are
+    // skippable. Note: zero_flag can be zero.
+    for (i = 0; i < count; i++) {
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
+      const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+      const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+      const int dequant =
+          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+          AOM_QM_BITS;
+      const int coeff_sign = (coeff >> 31);
+      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
+      const int abs_qcoeff =
+          (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
+      if (abs_qcoeff) eob = i;
+    }
+  }
+  *eob_ptr = eob + 1;
+}
+
 void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                        int skip_block, const int16_t *zbin_ptr,
                        const int16_t *round_ptr, const int16_t *quant_ptr,
                        const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
                        tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                        uint16_t *eob_ptr, const int16_t *scan,
-                       const int16_t *iscan
-#if CONFIG_AOM_QM
-                       ,
-                       const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                       ) {
+                       const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       0);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 0);
 }
 
 void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -528,19 +547,10 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                             ,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                             ) {
+                             const int16_t *scan, const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       1);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 1);
 }
 
 #if CONFIG_TX64X64
@@ -550,19 +560,10 @@ void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              const int16_t *quant_shift_ptr,
                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                             const int16_t *scan, const int16_t *iscan
-#if CONFIG_AOM_QM
-                             ,
-                             const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                             ) {
+                             const int16_t *scan, const int16_t *iscan) {
   quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, zbin_ptr, round_ptr,
                        quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
-                       dequant_ptr, eob_ptr, scan, iscan,
-#if CONFIG_AOM_QM
-                       qm_ptr, iqm_ptr,
-#endif
-                       2);
+                       dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, 2);
 }
 #endif  // CONFIG_TX64X64
 
@@ -576,58 +577,47 @@ void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif  // CONFIG_AOM_QM
-
-  switch (qparam->log_scale) {
-    case 0:
-      if (n_coeffs < 16) {
-        // TODO(jingning): Need SIMD implementation for smaller block size
-        // quantization.
-        quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                             p->round_fp, p->quant_fp, p->quant_shift,
-                             qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                             sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
-                             qm_ptr, iqm_ptr,
-#endif
-                             qparam->log_scale);
-      } else {
-        av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
-                        p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                        pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                        ,
-                        qm_ptr, iqm_ptr
-#endif
-                        );
-      }
-      break;
-    case 1:
-      av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                            p->round_fp, p->quant_fp, p->quant_shift,
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                            sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+                         p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                         pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+                         iqm_ptr, qparam->log_scale);
+  } else {
 #endif
-                            );
-      break;
+    switch (qparam->log_scale) {
+      case 0:
+        if (n_coeffs < 16) {
+          // TODO(jingning): Need SIMD implementation for smaller block size
+          // quantization.
+          quantize_fp_helper_c(
+              coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+              p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+              eob_ptr, sc->scan, sc->iscan, NULL, NULL, qparam->log_scale);
+        } else {
+          av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+                          p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                          pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        }
+        break;
+      case 1:
+        av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                              p->round_fp, p->quant_fp, p->quant_shift,
+                              qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                              sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                            p->round_fp, p->quant_fp, p->quant_shift,
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                            sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-      break;
+      case 2:
+        av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                              p->round_fp, p->quant_fp, p->quant_shift,
+                              qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                              sc->scan, sc->iscan);
+        break;
 #endif  // CONFIG_TX64X64
-    default: assert(0);
+      default: assert(0);
+    }
+#if CONFIG_AOM_QM
   }
+#endif
 }
 
 void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -640,43 +630,69 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                        p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                        pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
+                        iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  switch (qparam->log_scale) {
-    case 0:
-      aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                     p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                     pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                     ,
-                     qm_ptr, iqm_ptr
-#endif
-                     );
-      break;
-    case 1:
-      aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                           pd->dequant, eob_ptr, sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                           ,
-                           qm_ptr, iqm_ptr
-#endif
-                           );
-      break;
+    switch (qparam->log_scale) {
+      case 0:
+        aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                       p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                       pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
+      case 1:
+        aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                             pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
-                           p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                           pd->dequant, eob_ptr, sc->scan, sc->iscan
+      case 2:
+        aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+                             p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+                             pd->dequant, eob_ptr, sc->scan, sc->iscan);
+        break;
+#endif  // CONFIG_TX64X64
+      default: assert(0);
+    }
 #if CONFIG_AOM_QM
-                           ,
-                           qm_ptr, iqm_ptr
+  }
 #endif
-                           );
-      break;
-#endif  // CONFIG_TX64X64
-    default: assert(0);
+}
+
+static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+                        int skip_block, const int16_t *round_ptr,
+                        const int16_t quant, tran_low_t *qcoeff_ptr,
+                        tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+                        uint16_t *eob_ptr, const qm_val_t *qm_ptr,
+                        const qm_val_t *iqm_ptr, const int log_scale) {
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int64_t tmp;
+  int eob = -1;
+  int32_t tmp32;
+  int dequant;
+
+  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+  if (!skip_block) {
+    const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS);
+    const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS);
+    tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale),
+                INT16_MIN, INT16_MAX);
+    tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS));
+    qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+    dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+    dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / (1 << log_scale);
+    if (tmp32) eob = 0;
   }
+  *eob_ptr = eob + 1;
 }
 
 void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -686,45 +702,18 @@ void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                             const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
+  (void)sc;
+  assert(qparam->log_scale >= 0 && qparam->log_scale < (2 + CONFIG_TX64X64));
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
-#endif  // CONFIG_AOM_QM
-
-  (void)sc;
-
-  switch (qparam->log_scale) {
-    case 0:
-      aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
-                      p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
-                      eob_ptr
-#if CONFIG_AOM_QM
-                      ,
-                      qm_ptr, iqm_ptr
-#endif
-                      );
-      break;
-    case 1:
-      aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
-#endif
-                            );
-      break;
-#if CONFIG_TX64X64
-      aom_quantize_dc_64x64(coeff_ptr, skip_block, p->round, p->quant_fp[0],
-                            qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr
-#if CONFIG_AOM_QM
-                            ,
-                            qm_ptr, iqm_ptr
+#else
+  const qm_val_t *qm_ptr = NULL;
+  const qm_val_t *iqm_ptr = NULL;
 #endif
-                            );
-    case 2: break;
-#endif  // CONFIG_TX64X64
-    default: assert(0);
-  }
+  quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, p->quant_fp[0],
+              qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr, qm_ptr, iqm_ptr,
+              qparam->log_scale);
 }
 
 #if CONFIG_NEW_QUANT
@@ -857,29 +846,31 @@ void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    highbd_quantize_fp_helper_c(
+        coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp,
+        p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+        sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  if (n_coeffs < 16) {
-    // TODO(jingning): Need SIMD implementation for smaller block size
-    // quantization.
-    av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                             p->round_fp, p->quant_fp, p->quant_shift,
-                             qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                             sc->scan, sc->iscan,
-#if CONFIG_AOM_QM
-                             qm_ptr, iqm_ptr,
-#endif
-                             qparam->log_scale);
-    return;
-  }
+    if (n_coeffs < 16) {
+      // TODO(jingning): Need SIMD implementation for smaller block size
+      // quantization.
+      av1_highbd_quantize_fp_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                               p->round_fp, p->quant_fp, p->quant_shift,
+                               qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
+                               sc->scan, sc->iscan, qparam->log_scale);
+      return;
+    }
 
-  av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
-                         p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
-                         pd->dequant, eob_ptr, sc->scan, sc->iscan,
+    av1_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                           p->round_fp, p->quant_fp, p->quant_shift, qcoeff_ptr,
+                           dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+                           sc->iscan, qparam->log_scale);
 #if CONFIG_AOM_QM
-                         qm_ptr, iqm_ptr,
+  }
 #endif
-                         qparam->log_scale);
 }
 
 void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
@@ -894,86 +885,76 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                               p->round, p->quant, p->quant_shift, qcoeff_ptr,
+                               dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+                               sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
+  } else {
 #endif  // CONFIG_AOM_QM
 
-  switch (qparam->log_scale) {
-    case 0:
-      if (LIKELY(n_coeffs >= 8)) {
-        aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                              p->round, p->quant, p->quant_shift, qcoeff_ptr,
-                              dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
-                              sc->iscan
-#if CONFIG_AOM_QM
-                              ,
-                              qm_ptr, iqm_ptr
-#endif
-                              );
-      } else {
-        // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
-        // quantization
-        aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
+    switch (qparam->log_scale) {
+      case 0:
+        if (LIKELY(n_coeffs >= 8)) {
+          aom_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin,
                                 p->round, p->quant, p->quant_shift, qcoeff_ptr,
                                 dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
-                                sc->iscan
-#if CONFIG_AOM_QM
-                                ,
-                                qm_ptr, iqm_ptr
-#endif
-                                );
-      }
-      break;
-    case 1:
-      aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                sc->iscan);
+        } else {
+          // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size
+          // quantization
+          aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
                                   p->round, p->quant, p->quant_shift,
                                   qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                                  sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                                  ,
-                                  qm_ptr, iqm_ptr
-#endif
-                                  );
-      break;
+                                  sc->scan, sc->iscan);
+        }
+        break;
+      case 1:
+        aom_highbd_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift,
+                                    qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+                                    eob_ptr, sc->scan, sc->iscan);
+        break;
 #if CONFIG_TX64X64
-    case 2:
-      aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
-                                  p->round, p->quant, p->quant_shift,
-                                  qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr,
-                                  sc->scan, sc->iscan
-#if CONFIG_AOM_QM
-                                  ,
-                                  qm_ptr, iqm_ptr
-#endif
-                                  );
-      break;
+      case 2:
+        aom_highbd_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin,
+                                    p->round, p->quant, p->quant_shift,
+                                    qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+                                    eob_ptr, sc->scan, sc->iscan);
+        break;
 #endif  // CONFIG_TX64X64
-    default: assert(0);
+      default: assert(0);
+    }
+#if CONFIG_AOM_QM
   }
+#endif
 }
 
 static INLINE void highbd_quantize_dc(
     const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
     const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
     tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr,
-#if CONFIG_AOM_QM
-    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-    const int log_scale) {
+    const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) {
   int eob = -1;
 
   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-#if CONFIG_AOM_QM
-  (void)qm_ptr;
-  (void)iqm_ptr;
-#endif
+
   if (!skip_block) {
+    const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS);
+    const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS);
     const int coeff = coeff_ptr[0];
     const int coeff_sign = (coeff >> 31);
     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-    const int64_t tmp = abs_coeff + round_ptr[0];
-    const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale));
+    const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+    const int64_t tmpw = tmp * wt;
+    const int abs_qcoeff =
+        (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS));
     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-    dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale);
+    const int dequant =
+        (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+
+    dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / (1 << log_scale);
     if (abs_qcoeff) eob = 0;
   }
   *eob_ptr = eob + 1;
@@ -991,17 +972,16 @@ void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
 #if CONFIG_AOM_QM
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+#else
+  const qm_val_t *qm_ptr = NULL;
+  const qm_val_t *iqm_ptr = NULL;
 #endif  // CONFIG_AOM_QM
 
   (void)sc;
 
   highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
                      p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
-                     eob_ptr,
-#if CONFIG_AOM_QM
-                     qm_ptr, iqm_ptr,
-#endif
-                     qparam->log_scale);
+                     eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale);
 }
 
 #if CONFIG_NEW_QUANT
@@ -1517,61 +1497,16 @@ void av1_highbd_quantize_dc_nuq_facade(
 }
 #endif  // CONFIG_NEW_QUANT
 
-void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
-                              int skip_block, const int16_t *zbin_ptr,
-                              const int16_t *round_ptr,
-                              const int16_t *quant_ptr,
-                              const int16_t *quant_shift_ptr,
-                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                              const int16_t *scan, const int16_t *iscan,
-#if CONFIG_AOM_QM
-                              const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
-#endif
-                              int log_scale) {
-  int i;
-  int eob = -1;
-  const int scale = 1 << log_scale;
-  const int shift = 16 - log_scale;
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < count; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
-      const int dequant =
-          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-#endif
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-      const int64_t tmp = abs_coeff + (round_ptr[rc != 0] >> log_scale);
-#if CONFIG_AOM_QM
-      const int abs_qcoeff =
-          (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
-#else
-      const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> shift);
-      qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
-#endif
-      if (abs_qcoeff) eob = i;
-    }
-  }
-  *eob_ptr = eob + 1;
+void av1_highbd_quantize_fp_c(
+    const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+    const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, int log_scale) {
+  highbd_quantize_fp_helper_c(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                              quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                              dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan,
+                              NULL, NULL, log_scale);
 }
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {
@@ -1682,22 +1617,19 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *const xd = &x->e_mbd;
   const QUANTS *const quants = &cpi->quants;
 
-#if CONFIG_DELTA_Q
 #if CONFIG_EXT_DELTA_Q
-  int current_q_index = AOMMAX(
-      0, AOMMIN(QINDEX_RANGE - 1, cpi->oxcf.deltaq_mode != NO_DELTA_Q
-                                      ? cm->base_qindex + xd->delta_qindex
-                                      : cm->base_qindex));
+  int current_q_index =
+      AOMMAX(0, AOMMIN(QINDEX_RANGE - 1,
+                       cpi->oxcf.deltaq_mode != NO_DELTA_Q
+                           ? cm->base_qindex + xd->delta_qindex
+                           : cm->base_qindex));
 #else
   int current_q_index = AOMMAX(
-      0, AOMMIN(QINDEX_RANGE - 1, cm->delta_q_present_flag
-                                      ? cm->base_qindex + xd->delta_qindex
-                                      : cm->base_qindex));
+      0, AOMMIN(QINDEX_RANGE - 1,
+                cm->delta_q_present_flag ? cm->base_qindex + xd->delta_qindex
+                                         : cm->base_qindex));
 #endif
   const int qindex = av1_get_qindex(&cm->seg, segment_id, current_q_index);
-#else
-  const int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-#endif
   const int rdmult = av1_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
   int i;
 #if CONFIG_AOM_QM
diff --git a/third_party/aom/av1/encoder/bgsprite.c b/third_party/aom/av1/encoder/bgsprite.c
index 64deade06..ae2cb1d40 100644
--- a/third_party/aom/av1/encoder/bgsprite.c
+++ b/third_party/aom/av1/encoder/bgsprite.c
@@ -34,13 +34,28 @@
  */
 #define BGSPRITE_BLENDING_MODE 1
 
+// Enable removal of outliers from mean blending mode.
+#if BGSPRITE_BLENDING_MODE == 1
+#define BGSPRITE_MEAN_REMOVE_OUTLIERS 0
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
 /* Interpolation for panorama alignment sampling:
  * 0 = Nearest neighbor
  * 1 = Bilinear
  */
 #define BGSPRITE_INTERPOLATION 0
 
-#define TRANSFORM_MAT_DIM 3
+// Enable turning off bgsprite from firstpass metrics in define_gf_group.
+#define BGSPRITE_ENABLE_METRICS 1
+
+// Enable foreground/backgrond segmentation and combine with temporal filter.
+#define BGSPRITE_ENABLE_SEGMENTATION 1
+
+// Enable alignment using global motion.
+#define BGSPRITE_ENABLE_GME 0
+
+// Block size for foreground mask.
+#define BGSPRITE_MASK_BLOCK_SIZE 4
 
 typedef struct {
 #if CONFIG_HIGHBITDEPTH
@@ -52,8 +67,29 @@ typedef struct {
   uint8_t u;
   uint8_t v;
 #endif  // CONFIG_HIGHBITDEPTH
+  uint8_t exists;
 } YuvPixel;
 
+typedef struct {
+  int curr_model;
+  double mean[2];
+  double var[2];
+  int age[2];
+  double u_mean[2];
+  double v_mean[2];
+
+#if CONFIG_HIGHBITDEPTH
+  uint16_t y;
+  uint16_t u;
+  uint16_t v;
+#else
+  uint8_t y;
+  uint8_t u;
+  uint8_t v;
+#endif  // CONFIG_HIGHBITDEPTH
+  double final_var;
+} YuvPixelGaussian;
+
 // Maps to convert from matrix form to param vector form.
 static const int params_to_matrix_map[] = { 2, 3, 0, 4, 5, 1, 6, 7 };
 static const int matrix_to_params_map[] = { 2, 5, 0, 1, 3, 4, 6, 7 };
@@ -75,6 +111,8 @@ static void matrix_to_params(const double *const matrix, double *target) {
   }
 }
 
+#define TRANSFORM_MAT_DIM 3
+
 // Do matrix multiplication on params.
 static void multiply_params(double *const m1, double *const m2,
                             double *target) {
@@ -124,20 +162,20 @@ static void find_frame_limit(int width, int height,
   *y_max = (int)ceil(uv_matrix[1]);
   *y_min = (int)floor(uv_matrix[1]);
 
-  xy_matrix[0] = width;
+  xy_matrix[0] = width - 1;
   xy_matrix[1] = 0;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
 
-  xy_matrix[0] = width;
-  xy_matrix[1] = height;
+  xy_matrix[0] = width - 1;
+  xy_matrix[1] = height - 1;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
 
   xy_matrix[0] = 0;
-  xy_matrix[1] = height;
+  xy_matrix[1] = height - 1;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
@@ -198,79 +236,13 @@ static void invert_params(const double *const params, double *target) {
   matrix_to_params(inverse, target);
 }
 
-#if BGSPRITE_BLENDING_MODE == 0
-// swaps two YuvPixels.
-static void swap_yuv(YuvPixel *a, YuvPixel *b) {
-  const YuvPixel temp = *b;
-  *b = *a;
-  *a = temp;
-}
-
-// Partitions array to find pivot index in qselect.
-static int partition(YuvPixel arr[], int left, int right, int pivot_idx) {
-  YuvPixel pivot = arr[pivot_idx];
-
-  // Move pivot to the end.
-  swap_yuv(&arr[pivot_idx], &arr[right]);
-
-  int p_idx = left;
-  for (int i = left; i < right; ++i) {
-    if (arr[i].y <= pivot.y) {
-      swap_yuv(&arr[i], &arr[p_idx]);
-      p_idx++;
-    }
-  }
-
-  swap_yuv(&arr[p_idx], &arr[right]);
-
-  return p_idx;
-}
-
-// Returns the kth element in array, partially sorted in place (quickselect).
-static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) {
-  if (left >= right) {
-    return arr[left];
-  }
-  unsigned int seed = (int)time(NULL);
-  int pivot_idx = left + rand_r(&seed) % (right - left + 1);
-  pivot_idx = partition(arr, left, right, pivot_idx);
-
-  if (k == pivot_idx) {
-    return arr[k];
-  } else if (k < pivot_idx) {
-    return qselect(arr, left, pivot_idx - 1, k);
-  } else {
-    return qselect(arr, pivot_idx + 1, right, k);
-  }
-}
-#endif  // BGSPRITE_BLENDING_MODE == 0
-
-// Stitches images together to create ARF and stores it in 'panorama'.
-static void stitch_images(YV12_BUFFER_CONFIG **const frames,
-                          const int num_frames, const int center_idx,
-                          const double **const params, const int *const x_min,
-                          const int *const x_max, const int *const y_min,
-                          const int *const y_max, int pano_x_min,
-                          int pano_x_max, int pano_y_min, int pano_y_max,
-                          YV12_BUFFER_CONFIG *panorama) {
-  const int width = pano_x_max - pano_x_min + 1;
-  const int height = pano_y_max - pano_y_min + 1;
-
-  // Create temp_pano[y][x][num_frames] stack of pixel values
-  YuvPixel ***temp_pano = aom_malloc(height * sizeof(*temp_pano));
-  for (int i = 0; i < height; ++i) {
-    temp_pano[i] = aom_malloc(width * sizeof(**temp_pano));
-    for (int j = 0; j < width; ++j) {
-      temp_pano[i][j] = aom_malloc(num_frames * sizeof(***temp_pano));
-    }
-  }
-  // Create count[y][x] to count how many values in stack for median filtering
-  int **count = aom_malloc(height * sizeof(*count));
-  for (int i = 0; i < height; ++i) {
-    count[i] = aom_calloc(width, sizeof(**count));  // counts initialized to 0
-  }
-
-  // Re-sample images onto panorama (pre-median filtering).
+static void build_image_stack(YV12_BUFFER_CONFIG **const frames,
+                              const int num_frames, const double **const params,
+                              const int *const x_min, const int *const x_max,
+                              const int *const y_min, const int *const y_max,
+                              int pano_x_min, int pano_y_min,
+                              YuvPixel ***img_stack) {
+  // Re-sample images onto panorama (pre-filtering).
   const int x_offset = -pano_x_min;
   const int y_offset = -pano_y_min;
   const int frame_width = frames[0]->y_width;
@@ -376,24 +348,19 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
 
 #if CONFIG_HIGHBITDEPTH
           if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                (uint16_t)interpolated_yvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                (uint16_t)interpolated_uvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                (uint16_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].y = (uint16_t)interpolated_yvalue;
+            img_stack[pano_y][pano_x][i].u = (uint16_t)interpolated_uvalue;
+            img_stack[pano_y][pano_x][i].v = (uint16_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].exists = 1;
           } else {
 #endif  // CONFIG_HIGHBITDEPTH
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                (uint8_t)interpolated_yvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                (uint8_t)interpolated_uvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                (uint8_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].y = (uint8_t)interpolated_yvalue;
+            img_stack[pano_y][pano_x][i].u = (uint8_t)interpolated_uvalue;
+            img_stack[pano_y][pano_x][i].v = (uint8_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].exists = 1;
 #if CONFIG_HIGHBITDEPTH
           }
 #endif  // CONFIG_HIGHBITDEPTH
-          ++count[pano_y][pano_x];
         } else if (image_x >= 0 && image_x < frame_width && image_y >= 0 &&
                    image_y < frame_height) {
           // Place in panorama stack.
@@ -406,104 +373,405 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
               (image_x >> frames[i]->subsampling_x);
 #if CONFIG_HIGHBITDEPTH
           if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                y_buffer16[ychannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                u_buffer16[uvchannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                v_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].y = y_buffer16[ychannel_idx];
+            img_stack[pano_y][pano_x][i].u = u_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].v = v_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].exists = 1;
           } else {
 #endif  // CONFIG_HIGHBITDEPTH
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                frames[i]->y_buffer[ychannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                frames[i]->u_buffer[uvchannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                frames[i]->v_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].y = frames[i]->y_buffer[ychannel_idx];
+            img_stack[pano_y][pano_x][i].u = frames[i]->u_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].v = frames[i]->v_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].exists = 1;
 #if CONFIG_HIGHBITDEPTH
           }
 #endif  // CONFIG_HIGHBITDEPTH
-          ++count[pano_y][pano_x];
         }
       }
     }
   }
+}
 
-#if BGSPRITE_BLENDING_MODE == 1
-  // Apply mean filtering and store result in temp_pano[y][x][0].
+#if BGSPRITE_BLENDING_MODE == 0
+// swaps two YuvPixels.
+static void swap_yuv(YuvPixel *a, YuvPixel *b) {
+  const YuvPixel temp = *b;
+  *b = *a;
+  *a = temp;
+}
+
+// Partitions array to find pivot index in qselect.
+static int partition(YuvPixel arr[], int left, int right, int pivot_idx) {
+  YuvPixel pivot = arr[pivot_idx];
+
+  // Move pivot to the end.
+  swap_yuv(&arr[pivot_idx], &arr[right]);
+
+  int p_idx = left;
+  for (int i = left; i < right; ++i) {
+    if (arr[i].y <= pivot.y) {
+      swap_yuv(&arr[i], &arr[p_idx]);
+      p_idx++;
+    }
+  }
+
+  swap_yuv(&arr[p_idx], &arr[right]);
+
+  return p_idx;
+}
+
+// Returns the kth element in array, partially sorted in place (quickselect).
+static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) {
+  if (left >= right) {
+    return arr[left];
+  }
+  unsigned int seed = (int)time(NULL);
+  int pivot_idx = left + rand_r(&seed) % (right - left + 1);
+  pivot_idx = partition(arr, left, right, pivot_idx);
+
+  if (k == pivot_idx) {
+    return arr[k];
+  } else if (k < pivot_idx) {
+    return qselect(arr, left, pivot_idx - 1, k);
+  } else {
+    return qselect(arr, pivot_idx + 1, right, k);
+  }
+}
+
+// Blends image stack together using a temporal median.
+static void blend_median(const int width, const int height,
+                         const int num_frames, const YuvPixel ***image_stack,
+                         YuvPixel **blended_img) {
+  // Allocate stack of pixels
+  YuvPixel *pixel_stack = aom_calloc(num_frames, sizeof(*pixel_stack));
+
+  // Apply median filtering using quickselect.
   for (int y = 0; y < height; ++y) {
     for (int x = 0; x < width; ++x) {
-      if (count[y][x] == 0) {
+      int count = 0;
+      for (int i = 0; i < num_frames; ++i) {
+        if (image_stack[y][x][i].exists) {
+          pixel_stack[count] = image_stack[y][x][i];
+          ++count;
+        }
+      }
+      if (count == 0) {
         // Just make the pixel black.
         // TODO(toddnguyen): Color the pixel with nearest neighbor
+        blended_img[y][x].exists = 0;
       } else {
-        // Find
-        uint32_t y_sum = 0;
-        uint32_t u_sum = 0;
-        uint32_t v_sum = 0;
-        for (int i = 0; i < count[y][x]; ++i) {
-          y_sum += temp_pano[y][x][i].y;
-          u_sum += temp_pano[y][x][i].u;
-          v_sum += temp_pano[y][x][i].v;
+        const int median_idx = (int)floor(count / 2);
+        YuvPixel median = qselect(pixel_stack, 0, count - 1, median_idx);
+
+        // Make the median value the 0th index for UV subsampling later
+        blended_img[y][x] = median;
+        blended_img[y][x].exists = 1;
+      }
+    }
+  }
+
+  aom_free(pixel_stack);
+}
+#endif  // BGSPRITE_BLENDING_MODE == 0
+
+#if BGSPRITE_BLENDING_MODE == 1
+// Blends image stack together using a temporal mean.
+static void blend_mean(const int width, const int height, const int num_frames,
+                       const YuvPixel ***image_stack, YuvPixel **blended_img,
+                       int highbitdepth) {
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      // Find
+      uint32_t y_sum = 0;
+      uint32_t u_sum = 0;
+      uint32_t v_sum = 0;
+      uint32_t count = 0;
+      for (int i = 0; i < num_frames; ++i) {
+        if (image_stack[y][x][i].exists) {
+          y_sum += image_stack[y][x][i].y;
+          u_sum += image_stack[y][x][i].u;
+          v_sum += image_stack[y][x][i].v;
+          ++count;
         }
+      }
 
-        const uint32_t unsigned_count = (uint32_t)count[y][x];
+#if BGSPRITE_MEAN_REMOVE_OUTLIERS
+      if (count > 1) {
+        double stdev = 0;
+        double y_mean = (double)y_sum / count;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists) {
+            stdev += pow(y_mean - image_stack[y][x][i].y, 2);
+          }
+        }
+        stdev = sqrt(stdev / count);
+
+        uint32_t inlier_y_sum = 0;
+        uint32_t inlier_u_sum = 0;
+        uint32_t inlier_v_sum = 0;
+        uint32_t inlier_count = 0;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists &&
+              fabs(image_stack[y][x][i].y - y_mean) <= 1.5 * stdev) {
+            inlier_y_sum += image_stack[y][x][i].y;
+            inlier_u_sum += image_stack[y][x][i].u;
+            inlier_v_sum += image_stack[y][x][i].v;
+            ++inlier_count;
+          }
+        }
+        count = inlier_count;
+        y_sum = inlier_y_sum;
+        u_sum = inlier_u_sum;
+        v_sum = inlier_v_sum;
+      }
+#endif  // BGSPRITE_MEAN_REMOVE_OUTLIERS
 
+      if (count != 0) {
+        blended_img[y][x].exists = 1;
 #if CONFIG_HIGHBITDEPTH
-        if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
-          temp_pano[y][x][0].y = (uint16_t)OD_DIVU(y_sum, unsigned_count);
-          temp_pano[y][x][0].u = (uint16_t)OD_DIVU(u_sum, unsigned_count);
-          temp_pano[y][x][0].v = (uint16_t)OD_DIVU(v_sum, unsigned_count);
+        if (highbitdepth) {
+          blended_img[y][x].y = (uint16_t)OD_DIVU(y_sum, count);
+          blended_img[y][x].u = (uint16_t)OD_DIVU(u_sum, count);
+          blended_img[y][x].v = (uint16_t)OD_DIVU(v_sum, count);
         } else {
 #endif  // CONFIG_HIGHBITDEPTH
-          temp_pano[y][x][0].y = (uint8_t)OD_DIVU(y_sum, unsigned_count);
-          temp_pano[y][x][0].u = (uint8_t)OD_DIVU(u_sum, unsigned_count);
-          temp_pano[y][x][0].v = (uint8_t)OD_DIVU(v_sum, unsigned_count);
+          (void)highbitdepth;
+          blended_img[y][x].y = (uint8_t)OD_DIVU(y_sum, count);
+          blended_img[y][x].u = (uint8_t)OD_DIVU(u_sum, count);
+          blended_img[y][x].v = (uint8_t)OD_DIVU(v_sum, count);
 #if CONFIG_HIGHBITDEPTH
         }
 #endif  // CONFIG_HIGHBITDEPTH
+      } else {
+        blended_img[y][x].exists = 0;
       }
     }
   }
-#else
-  // Apply median filtering using quickselect.
-  for (int y = 0; y < height; ++y) {
-    for (int x = 0; x < width; ++x) {
-      if (count[y][x] == 0) {
-        // Just make the pixel black.
-        // TODO(toddnguyen): Color the pixel with nearest neighbor
+}
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
+#if BGSPRITE_ENABLE_SEGMENTATION
+// Builds dual-mode single gaussian model from image stack.
+static void build_gaussian(const YuvPixel ***image_stack, const int num_frames,
+                           const int width, const int height,
+                           const int x_block_width, const int y_block_height,
+                           const int block_size, YuvPixelGaussian **gauss) {
+  const double initial_variance = 10.0;
+  const double s_theta = 2.0;
+
+  // Add images to dual-mode single gaussian model
+  for (int y_block = 0; y_block < y_block_height; ++y_block) {
+    for (int x_block = 0; x_block < x_block_width; ++x_block) {
+      // Process all blocks.
+      YuvPixelGaussian *model = &gauss[y_block][x_block];
+
+      // Process all frames.
+      for (int i = 0; i < num_frames; ++i) {
+        // Add block to the Gaussian model.
+        double max_variance[2] = { 0.0, 0.0 };
+        double temp_y_mean = 0.0;
+        double temp_u_mean = 0.0;
+        double temp_v_mean = 0.0;
+
+        // Find mean/variance of a block of pixels.
+        int temp_count = 0;
+        for (int sub_y = 0; sub_y < block_size; ++sub_y) {
+          for (int sub_x = 0; sub_x < block_size; ++sub_x) {
+            const int y = y_block * block_size + sub_y;
+            const int x = x_block * block_size + sub_x;
+            if (y < height && x < width && image_stack[y][x][i].exists) {
+              ++temp_count;
+              temp_y_mean += (double)image_stack[y][x][i].y;
+              temp_u_mean += (double)image_stack[y][x][i].u;
+              temp_v_mean += (double)image_stack[y][x][i].v;
+
+              const double variance_0 =
+                  pow((double)image_stack[y][x][i].y - model->mean[0], 2);
+              const double variance_1 =
+                  pow((double)image_stack[y][x][i].y - model->mean[1], 2);
+
+              if (variance_0 > max_variance[0]) {
+                max_variance[0] = variance_0;
+              }
+              if (variance_1 > max_variance[1]) {
+                max_variance[1] = variance_1;
+              }
+            }
+          }
+        }
+
+        // If pixels exist in the block, add to the model.
+        if (temp_count > 0) {
+          assert(temp_count <= block_size * block_size);
+          temp_y_mean /= temp_count;
+          temp_u_mean /= temp_count;
+          temp_v_mean /= temp_count;
+
+          // Switch the background model to the oldest model.
+          if (model->age[0] > model->age[1]) {
+            model->curr_model = 0;
+          } else if (model->age[1] > model->age[0]) {
+            model->curr_model = 1;
+          }
+
+          // If model is empty, initialize model.
+          if (model->age[model->curr_model] == 0) {
+            model->mean[model->curr_model] = temp_y_mean;
+            model->u_mean[model->curr_model] = temp_u_mean;
+            model->v_mean[model->curr_model] = temp_v_mean;
+            model->var[model->curr_model] = initial_variance;
+            model->age[model->curr_model] = 1;
+          } else {
+            // Constants for current model and foreground model (0 or 1).
+            const int opposite = 1 - model->curr_model;
+            const int current = model->curr_model;
+            const double j = i;
+
+            // Put block into the appropriate model.
+            if (pow(temp_y_mean - model->mean[current], 2) <
+                s_theta * model->var[current]) {
+              // Add block to the current background model
+              model->age[current] += 1;
+              const double prev_weight = 1 / j;
+              const double curr_weight = (j - 1) / j;
+              model->mean[current] = prev_weight * model->mean[current] +
+                                     curr_weight * temp_y_mean;
+              model->u_mean[current] = prev_weight * model->u_mean[current] +
+                                       curr_weight * temp_u_mean;
+              model->v_mean[current] = prev_weight * model->v_mean[current] +
+                                       curr_weight * temp_v_mean;
+              model->var[current] = prev_weight * model->var[current] +
+                                    curr_weight * max_variance[current];
+            } else {
+              // Block does not fit into current background candidate. Add to
+              // foreground candidate and reinitialize if necessary.
+              const double var_fg = pow(temp_y_mean - model->mean[opposite], 2);
+
+              if (var_fg <= s_theta * model->var[opposite]) {
+                model->age[opposite] += 1;
+                const double prev_weight = 1 / j;
+                const double curr_weight = (j - 1) / j;
+                model->mean[opposite] = prev_weight * model->mean[opposite] +
+                                        curr_weight * temp_y_mean;
+                model->u_mean[opposite] =
+                    prev_weight * model->u_mean[opposite] +
+                    curr_weight * temp_u_mean;
+                model->v_mean[opposite] =
+                    prev_weight * model->v_mean[opposite] +
+                    curr_weight * temp_v_mean;
+                model->var[opposite] = prev_weight * model->var[opposite] +
+                                       curr_weight * max_variance[opposite];
+              } else if (model->age[opposite] == 0 ||
+                         var_fg > s_theta * model->var[opposite]) {
+                model->mean[opposite] = temp_y_mean;
+                model->u_mean[opposite] = temp_u_mean;
+                model->v_mean[opposite] = temp_v_mean;
+                model->var[opposite] = initial_variance;
+                model->age[opposite] = 1;
+              } else {
+                // This case should never happen.
+                assert(0);
+              }
+            }
+          }
+        }
+      }
+
+      // Select the oldest candidate as the background model.
+      if (model->age[0] == 0 && model->age[1] == 0) {
+        model->y = 0;
+        model->u = 0;
+        model->v = 0;
+        model->final_var = 0;
+      } else if (model->age[0] > model->age[1]) {
+        model->y = (uint8_t)model->mean[0];
+        model->u = (uint8_t)model->u_mean[0];
+        model->v = (uint8_t)model->v_mean[0];
+        model->final_var = model->var[0];
       } else {
-        // Find
-        const int median_idx = (int)floor(count[y][x] / 2);
-        YuvPixel median =
-            qselect(temp_pano[y][x], 0, count[y][x] - 1, median_idx);
+        model->y = (uint8_t)model->mean[1];
+        model->u = (uint8_t)model->u_mean[1];
+        model->v = (uint8_t)model->v_mean[1];
+        model->final_var = model->var[1];
+      }
+    }
+  }
+}
 
-        // Make the median value the 0th index for UV subsampling later
-        temp_pano[y][x][0] = median;
-        assert(median.y == temp_pano[y][x][0].y &&
-               median.u == temp_pano[y][x][0].u &&
-               median.v == temp_pano[y][x][0].v);
+// Builds foreground mask based on reference image and gaussian model.
+// In mask[][], 1 is foreground and 0 is background.
+static void build_mask(const int x_min, const int y_min, const int x_offset,
+                       const int y_offset, const int x_block_width,
+                       const int y_block_height, const int block_size,
+                       const YuvPixelGaussian **gauss,
+                       YV12_BUFFER_CONFIG *const reference,
+                       YV12_BUFFER_CONFIG *const panorama, uint8_t **mask) {
+  const int crop_x_offset = x_min + x_offset;
+  const int crop_y_offset = y_min + y_offset;
+  const double d_theta = 4.0;
+
+  for (int y_block = 0; y_block < y_block_height; ++y_block) {
+    for (int x_block = 0; x_block < x_block_width; ++x_block) {
+      // Create mask to determine if ARF is background for foreground.
+      const YuvPixelGaussian *model = &gauss[y_block][x_block];
+      double temp_y_mean = 0.0;
+      int temp_count = 0;
+
+      for (int sub_y = 0; sub_y < block_size; ++sub_y) {
+        for (int sub_x = 0; sub_x < block_size; ++sub_x) {
+          // x and y are panorama coordinates.
+          const int y = y_block * block_size + sub_y;
+          const int x = x_block * block_size + sub_x;
+
+          const int arf_y = y - crop_y_offset;
+          const int arf_x = x - crop_x_offset;
+
+          if (arf_y >= 0 && arf_y < panorama->y_height && arf_x >= 0 &&
+              arf_x < panorama->y_width) {
+            ++temp_count;
+            const int ychannel_idx = arf_y * panorama->y_stride + arf_x;
+            temp_y_mean += (double)reference->y_buffer[ychannel_idx];
+          }
+        }
+      }
+      if (temp_count > 0) {
+        assert(temp_count <= block_size * block_size);
+        temp_y_mean /= temp_count;
+
+        if (pow(temp_y_mean - model->y, 2) > model->final_var * d_theta) {
+          // Mark block as foreground.
+          mask[y_block][x_block] = 1;
+        }
       }
     }
   }
-#endif  // BGSPRITE_BLENDING_MODE == 1
+}
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
 
-  // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at
-  // the same size as the frames. For now, we crop the generated panorama.
-  // assert(panorama->y_width < width && panorama->y_height < height);
+// Resamples blended_img into panorama, including UV subsampling.
+static void resample_panorama(YuvPixel **blended_img, const int center_idx,
+                              const int *const x_min, const int *const y_min,
+                              int pano_x_min, int pano_x_max, int pano_y_min,
+                              int pano_y_max, YV12_BUFFER_CONFIG *panorama) {
+  const int width = pano_x_max - pano_x_min + 1;
+  const int height = pano_y_max - pano_y_min + 1;
+  const int x_offset = -pano_x_min;
+  const int y_offset = -pano_y_min;
   const int crop_x_offset = x_min[center_idx] + x_offset;
   const int crop_y_offset = y_min[center_idx] + y_offset;
-
 #if CONFIG_HIGHBITDEPTH
   if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
     // Use median Y value.
     uint16_t *pano_y_buffer16 = CONVERT_TO_SHORTPTR(panorama->y_buffer);
+    uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer);
+    uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer);
+
     for (int y = 0; y < panorama->y_height; ++y) {
       for (int x = 0; x < panorama->y_width; ++x) {
         const int ychannel_idx = y * panorama->y_stride + x;
-        if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+        if (blended_img[y + crop_y_offset][x + crop_x_offset].exists) {
           pano_y_buffer16[ychannel_idx] =
-              temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+              blended_img[y + crop_y_offset][x + crop_x_offset].y;
         } else {
           pano_y_buffer16[ychannel_idx] = 0;
         }
@@ -511,9 +779,6 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
     }
 
     // UV subsampling with median UV values
-    uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer);
-    uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer);
-
     for (int y = 0; y < panorama->uv_height; ++y) {
       for (int x = 0; x < panorama->uv_width; ++x) {
         uint32_t avg_count = 0;
@@ -526,9 +791,9 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
             int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
             int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
             if (y_sample > 0 && y_sample < height && x_sample > 0 &&
-                x_sample < width && count[y_sample][x_sample] > 0) {
-              u_sum += temp_pano[y_sample][x_sample][0].u;
-              v_sum += temp_pano[y_sample][x_sample][0].v;
+                x_sample < width && blended_img[y_sample][x_sample].exists) {
+              u_sum += blended_img[y_sample][x_sample].u;
+              v_sum += blended_img[y_sample][x_sample].v;
               avg_count++;
             }
           }
@@ -546,35 +811,36 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
     }
   } else {
 #endif  // CONFIG_HIGHBITDEPTH
-    // Use median Y value.
+    // Use blended Y value.
     for (int y = 0; y < panorama->y_height; ++y) {
       for (int x = 0; x < panorama->y_width; ++x) {
         const int ychannel_idx = y * panorama->y_stride + x;
-        if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+        // Use filtered background.
+        if (blended_img[y + crop_y_offset][x + crop_x_offset].exists) {
           panorama->y_buffer[ychannel_idx] =
-              temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+              blended_img[y + crop_y_offset][x + crop_x_offset].y;
         } else {
           panorama->y_buffer[ychannel_idx] = 0;
         }
       }
     }
 
-    // UV subsampling with median UV values
+    // UV subsampling with blended UV values.
     for (int y = 0; y < panorama->uv_height; ++y) {
       for (int x = 0; x < panorama->uv_width; ++x) {
         uint16_t avg_count = 0;
         uint16_t u_sum = 0;
         uint16_t v_sum = 0;
 
-        // Look at surrounding pixels for subsampling
+        // Look at surrounding pixels for subsampling.
         for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) {
           for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) {
             int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
             int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
             if (y_sample > 0 && y_sample < height && x_sample > 0 &&
-                x_sample < width && count[y_sample][x_sample] > 0) {
-              u_sum += temp_pano[y_sample][x_sample][0].u;
-              v_sum += temp_pano[y_sample][x_sample][0].v;
+                x_sample < width && blended_img[y_sample][x_sample].exists) {
+              u_sum += blended_img[y_sample][x_sample].u;
+              v_sum += blended_img[y_sample][x_sample].v;
               avg_count++;
             }
           }
@@ -595,19 +861,266 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
 #if CONFIG_HIGHBITDEPTH
   }
 #endif  // CONFIG_HIGHBITDEPTH
+}
 
+#if BGSPRITE_ENABLE_SEGMENTATION
+// Combines temporal filter output and bgsprite output to make final ARF output
+static void combine_arf(YV12_BUFFER_CONFIG *const temporal_arf,
+                        YV12_BUFFER_CONFIG *const bgsprite,
+                        uint8_t **const mask, const int block_size,
+                        const int x_offset, const int y_offset,
+                        YV12_BUFFER_CONFIG *target) {
+  const int height = temporal_arf->y_height;
+  const int width = temporal_arf->y_width;
+
+  YuvPixel **blended_img = aom_malloc(height * sizeof(*blended_img));
   for (int i = 0; i < height; ++i) {
+    blended_img[i] = aom_malloc(width * sizeof(**blended_img));
+  }
+
+  const int block_2_height = (height / BGSPRITE_MASK_BLOCK_SIZE) +
+                             (height % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+  const int block_2_width = (width / BGSPRITE_MASK_BLOCK_SIZE) +
+                            (width % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+
+  for (int block_y = 0; block_y < block_2_height; ++block_y) {
+    for (int block_x = 0; block_x < block_2_width; ++block_x) {
+      int count = 0;
+      int total = 0;
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int img_y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int img_x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
+          const int mask_y = (y_offset + img_y) / block_size;
+          const int mask_x = (x_offset + img_x) / block_size;
+
+          if (img_y < height && img_x < width) {
+            if (mask[mask_y][mask_x]) {
+              ++count;
+            }
+            ++total;
+          }
+        }
+      }
+
+      const double threshold = 0.30;
+      const int amount = (int)(threshold * total);
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
+          if (y < height && x < width) {
+            blended_img[y][x].exists = 1;
+            const int ychannel_idx = y * temporal_arf->y_stride + x;
+            const int uvchannel_idx =
+                (y >> temporal_arf->subsampling_y) * temporal_arf->uv_stride +
+                (x >> temporal_arf->subsampling_x);
+
+            if (count > amount) {
+// Foreground; use temporal arf.
+#if CONFIG_HIGHBITDEPTH
+              if (temporal_arf->flags & YV12_FLAG_HIGHBITDEPTH) {
+                uint16_t *pano_y_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->y_buffer);
+                uint16_t *pano_u_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->u_buffer);
+                uint16_t *pano_v_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->v_buffer);
+                blended_img[y][x].y = pano_y_buffer16[ychannel_idx];
+                blended_img[y][x].u = pano_u_buffer16[uvchannel_idx];
+                blended_img[y][x].v = pano_v_buffer16[uvchannel_idx];
+              } else {
+#endif  // CONFIG_HIGHBITDEPTH
+                blended_img[y][x].y = temporal_arf->y_buffer[ychannel_idx];
+                blended_img[y][x].u = temporal_arf->u_buffer[uvchannel_idx];
+                blended_img[y][x].v = temporal_arf->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+              }
+#endif  // CONFIG_HIGHBITDEPTH
+            } else {
+// Background; use bgsprite arf.
+#if CONFIG_HIGHBITDEPTH
+              if (bgsprite->flags & YV12_FLAG_HIGHBITDEPTH) {
+                uint16_t *pano_y_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->y_buffer);
+                uint16_t *pano_u_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->u_buffer);
+                uint16_t *pano_v_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->v_buffer);
+                blended_img[y][x].y = pano_y_buffer16[ychannel_idx];
+                blended_img[y][x].u = pano_u_buffer16[uvchannel_idx];
+                blended_img[y][x].v = pano_v_buffer16[uvchannel_idx];
+              } else {
+#endif  // CONFIG_HIGHBITDEPTH
+                blended_img[y][x].y = bgsprite->y_buffer[ychannel_idx];
+                blended_img[y][x].u = bgsprite->u_buffer[uvchannel_idx];
+                blended_img[y][x].v = bgsprite->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+              }
+#endif  // CONFIG_HIGHBITDEPTH
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const int x_min = 0;
+  const int y_min = 0;
+  resample_panorama(blended_img, 0, &x_min, &y_min, 0, width - 1, 0, height - 1,
+                    target);
+
+  for (int i = 0; i < height; ++i) {
+    aom_free(blended_img[i]);
+  }
+  aom_free(blended_img);
+}
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
+
+// Stitches images together to create ARF and stores it in 'panorama'.
+static void stitch_images(AV1_COMP *cpi, YV12_BUFFER_CONFIG **const frames,
+                          const int num_frames, const int distance,
+                          const int center_idx, const double **const params,
+                          const int *const x_min, const int *const x_max,
+                          const int *const y_min, const int *const y_max,
+                          int pano_x_min, int pano_x_max, int pano_y_min,
+                          int pano_y_max, YV12_BUFFER_CONFIG *panorama) {
+  const int width = pano_x_max - pano_x_min + 1;
+  const int height = pano_y_max - pano_y_min + 1;
+
+  // Create pano_stack[y][x][num_frames] stack of pixel values
+  YuvPixel ***pano_stack = aom_malloc(height * sizeof(*pano_stack));
+  for (int i = 0; i < height; ++i) {
+    pano_stack[i] = aom_malloc(width * sizeof(**pano_stack));
     for (int j = 0; j < width; ++j) {
-      aom_free(temp_pano[i][j]);
+      pano_stack[i][j] = aom_calloc(num_frames, sizeof(***pano_stack));
     }
-    aom_free(temp_pano[i]);
-    aom_free(count[i]);
   }
-  aom_free(count);
-  aom_free(temp_pano);
+
+  build_image_stack(frames, num_frames, params, x_min, x_max, y_min, y_max,
+                    pano_x_min, pano_y_min, pano_stack);
+
+  // Create blended_img[y][x] of combined panorama pixel values.
+  YuvPixel **blended_img = aom_malloc(height * sizeof(*blended_img));
+  for (int i = 0; i < height; ++i) {
+    blended_img[i] = aom_malloc(width * sizeof(**blended_img));
+  }
+
+// Blending and saving result in blended_img.
+#if BGSPRITE_BLENDING_MODE == 1
+  blend_mean(width, height, num_frames, (const YuvPixel ***)pano_stack,
+             blended_img, panorama->flags & YV12_FLAG_HIGHBITDEPTH);
+#else   // BGSPRITE_BLENDING_MODE != 1
+  blend_median(width, height, num_frames, (const YuvPixel ***)pano_stack,
+               blended_img);
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
+  // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at
+  // the same size as the frames. For now, we crop the generated panorama.
+  assert(panorama->y_width <= width && panorama->y_height <= height);
+
+  // Resamples the blended_img into the panorama buffer.
+  YV12_BUFFER_CONFIG bgsprite;
+  memset(&bgsprite, 0, sizeof(bgsprite));
+  aom_alloc_frame_buffer(&bgsprite, frames[0]->y_width, frames[0]->y_height,
+                         frames[0]->subsampling_x, frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &bgsprite);
+  bgsprite.bit_depth = frames[0]->bit_depth;
+  resample_panorama(blended_img, center_idx, x_min, y_min, pano_x_min,
+                    pano_x_max, pano_y_min, pano_y_max, &bgsprite);
+
+#if BGSPRITE_ENABLE_SEGMENTATION
+  YV12_BUFFER_CONFIG temporal_bgsprite;
+  memset(&temporal_bgsprite, 0, sizeof(temporal_bgsprite));
+  aom_alloc_frame_buffer(&temporal_bgsprite, frames[0]->y_width,
+                         frames[0]->y_height, frames[0]->subsampling_x,
+                         frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &temporal_bgsprite);
+  temporal_bgsprite.bit_depth = frames[0]->bit_depth;
+
+  av1_temporal_filter(cpi, &bgsprite, &temporal_bgsprite, distance);
+
+  // Block size constants for gaussian model.
+  const int N_1 = 2;
+  const int y_block_height = (height / N_1) + (height % N_1 != 0 ? 1 : 0);
+  const int x_block_width = (width / N_1) + (height % N_1 != 0 ? 1 : 0);
+  YuvPixelGaussian **gauss = aom_malloc(y_block_height * sizeof(*gauss));
+  for (int i = 0; i < y_block_height; ++i) {
+    gauss[i] = aom_calloc(x_block_width, sizeof(**gauss));
+  }
+
+  // Build Gaussian model.
+  build_gaussian((const YuvPixel ***)pano_stack, num_frames, width, height,
+                 x_block_width, y_block_height, N_1, gauss);
+
+  // Select background model and build foreground mask.
+  uint8_t **mask = aom_malloc(y_block_height * sizeof(*mask));
+  for (int i = 0; i < y_block_height; ++i) {
+    mask[i] = aom_calloc(x_block_width, sizeof(**mask));
+  }
+
+  const int x_offset = -pano_x_min;
+  const int y_offset = -pano_y_min;
+  build_mask(x_min[center_idx], y_min[center_idx], x_offset, y_offset,
+             x_block_width, y_block_height, N_1,
+             (const YuvPixelGaussian **)gauss,
+             (YV12_BUFFER_CONFIG * const) frames[center_idx], panorama, mask);
+
+  YV12_BUFFER_CONFIG temporal_arf;
+  memset(&temporal_arf, 0, sizeof(temporal_arf));
+  aom_alloc_frame_buffer(&temporal_arf, frames[0]->y_width, frames[0]->y_height,
+                         frames[0]->subsampling_x, frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &temporal_arf);
+  temporal_arf.bit_depth = frames[0]->bit_depth;
+  av1_temporal_filter(cpi, NULL, &temporal_arf, distance);
+
+  combine_arf(&temporal_arf, &temporal_bgsprite, mask, N_1, x_offset, y_offset,
+              panorama);
+
+  aom_free_frame_buffer(&temporal_arf);
+  aom_free_frame_buffer(&temporal_bgsprite);
+  for (int i = 0; i < y_block_height; ++i) {
+    aom_free(gauss[i]);
+    aom_free(mask[i]);
+  }
+  aom_free(gauss);
+  aom_free(mask);
+#else   // !BGSPRITE_ENABLE_SEGMENTATION
+  av1_temporal_filter(cpi, &bgsprite, panorama, distance);
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
+
+  aom_free_frame_buffer(&bgsprite);
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
+      aom_free(pano_stack[i][j]);
+    }
+    aom_free(pano_stack[i]);
+    aom_free(blended_img[i]);
+  }
+  aom_free(pano_stack);
+  aom_free(blended_img);
 }
 
 int av1_background_sprite(AV1_COMP *cpi, int distance) {
+#if BGSPRITE_ENABLE_METRICS
+  // Do temporal filter if firstpass stats disable bgsprite.
+  if (!cpi->bgsprite_allowed) {
+    return 1;
+  }
+#endif  // BGSPRITE_ENABLE_METRICS
+
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
   static const double identity_params[MAX_PARAMDIM - 1] = {
     0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
@@ -626,7 +1139,6 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
 #if CONFIG_EXT_REFS
   const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
   if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
-    cpi->alt_ref_buffer = av1_lookahead_peek(cpi->lookahead, distance)->img;
     cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
     frames_fwd = 0;
     frames_bwd = 0;
@@ -646,17 +1158,6 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
     frames[frames_to_stitch - 1 - frame] = &buf->img;
   }
 
-  YV12_BUFFER_CONFIG temp_bg;
-  memset(&temp_bg, 0, sizeof(temp_bg));
-  aom_alloc_frame_buffer(&temp_bg, frames[0]->y_width, frames[0]->y_height,
-                         frames[0]->subsampling_x, frames[0]->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
-                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
-#endif
-                         frames[0]->border, 0);
-  aom_yv12_copy_frame(frames[0], &temp_bg);
-  temp_bg.bit_depth = frames[0]->bit_depth;
-
   // Allocate empty arrays for parameters between frames.
   double **params = aom_malloc(frames_to_stitch * sizeof(*params));
   for (int i = 0; i < frames_to_stitch; ++i) {
@@ -664,9 +1165,10 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
     memcpy(params[i], identity_params, sizeof(identity_params));
   }
 
-  // Use global motion to find affine transformations between frames.
-  // params[i] will have the transform from frame[i] to frame[i-1].
-  // params[0] will have the identity matrix because it has no previous frame.
+// Use global motion to find affine transformations between frames.
+// params[i] will have the transform from frame[i] to frame[i-1].
+// params[0] will have the identity matrix (has no previous frame).
+#if BGSPRITE_ENABLE_GME
   TransformationType model = AFFINE;
   int inliers_by_motion[RANSAC_NUM_MOTIONS];
   for (int frame = 0; frame < frames_to_stitch - 1; ++frame) {
@@ -686,6 +1188,7 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
       return 1;
     }
   }
+#endif  // BGSPRITE_ENABLE_GME
 
   // Compound the transformation parameters.
   for (int i = 1; i < frames_to_stitch; ++i) {
@@ -702,7 +1205,7 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
   int *y_max = aom_malloc(frames_to_stitch * sizeof(*y_max));
   int *y_min = aom_malloc(frames_to_stitch * sizeof(*y_min));
 
-  find_limits(cpi->initial_width, cpi->initial_height,
+  find_limits(frames[0]->y_width, frames[0]->y_height,
               (const double **const)params, frames_to_stitch, x_min, x_max,
               y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
 
@@ -721,20 +1224,17 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
   }
 
   // Recompute frame limits for new adjusted center.
-  find_limits(cpi->initial_width, cpi->initial_height,
+  find_limits(frames[0]->y_width, frames[0]->y_height,
               (const double **const)params, frames_to_stitch, x_min, x_max,
               y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
 
-  // Stitch Images.
-  stitch_images(frames, frames_to_stitch, center_idx,
+  // Stitch Images and apply bgsprite filter.
+  stitch_images(cpi, frames, frames_to_stitch, distance, center_idx,
                 (const double **const)params, x_min, x_max, y_min, y_max,
-                pano_x_min, pano_x_max, pano_y_min, pano_y_max, &temp_bg);
-
-  // Apply temporal filter.
-  av1_temporal_filter(cpi, &temp_bg, distance);
+                pano_x_min, pano_x_max, pano_y_min, pano_y_max,
+                &cpi->alt_ref_buffer);
 
   // Free memory.
-  aom_free_frame_buffer(&temp_bg);
   for (int i = 0; i < frames_to_stitch; ++i) {
     aom_free(params[i]);
   }
@@ -746,3 +1246,12 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
 
   return 0;
 }
+
+#undef _POSIX_C_SOURCE
+#undef BGSPRITE_BLENDING_MODE
+#undef BGSPRITE_INTERPOLATION
+#undef BGSPRITE_ENABLE_METRICS
+#undef BGSPRITE_ENABLE_SEGMENTATION
+#undef BGSPRITE_ENABLE_GME
+#undef BGSPRITE_MASK_BLOCK_SIZE
+#undef TRANSFORM_MAT_DIM
diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c
index 2e0abc186..08f605f10 100644
--- a/third_party/aom/av1/encoder/bitstream.c
+++ b/third_party/aom/av1/encoder/bitstream.c
@@ -14,9 +14,9 @@
 #include <stdio.h>
 
 #include "aom/aom_encoder.h"
-#include "aom_dsp/bitwriter_buffer.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_dsp/binary_codes_writer.h"
+#include "aom_dsp/bitwriter_buffer.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem_ops.h"
 #include "aom_ports/system_state.h"
@@ -40,9 +40,6 @@
 #include "av1/common/seg_common.h"
 #include "av1/common/tile_common.h"
 
-#if CONFIG_ANS
-#include "aom_dsp/buf_ans.h"
-#endif  // CONFIG_ANS
 #if CONFIG_LV_MAP
 #include "av1/encoder/encodetxb.h"
 #endif  // CONFIG_LV_MAP
@@ -50,9 +47,9 @@
 #include "av1/encoder/cost.h"
 #include "av1/encoder/encodemv.h"
 #include "av1/encoder/mcomp.h"
-#if CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
+#if CONFIG_PALETTE_DELTA_ENCODING
 #include "av1/encoder/palette.h"
-#endif  // CONFIG_PALETTE && CONFIG_PALETTE_DELTA_ENCODING
+#endif  // CONFIG_PALETTE_DELTA_ENCODING
 #include "av1/encoder/segmentation.h"
 #include "av1/encoder/subexp.h"
 #include "av1/encoder/tokenize.h"
@@ -62,12 +59,13 @@
 
 #define ENC_MISMATCH_DEBUG 0
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 static struct av1_token
     inter_singleref_comp_mode_encodings[INTER_SINGLEREF_COMP_MODES];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ || CONFIG_EXT_INTRA
 static INLINE void write_uniform(aom_writer *w, int n, int v) {
   const int l = get_unsigned_bits(n);
   const int m = (1 << l) - n;
@@ -79,63 +77,47 @@ static INLINE void write_uniform(aom_writer *w, int n, int v) {
     aom_write_literal(w, (v - m) & 1, 1);
   }
 }
-#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
+#endif  // !CONFIG_PVQ || CONFIG_EXT_INTRA
 
-#if CONFIG_EXT_TX
-static struct av1_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES];
-static struct av1_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
-#else
-static struct av1_token ext_tx_encodings[TX_TYPES];
-#endif  // CONFIG_EXT_TX
 #if CONFIG_EXT_INTRA
 #if CONFIG_INTRA_INTERP
 static struct av1_token intra_filter_encodings[INTRA_FILTERS];
 #endif  // CONFIG_INTRA_INTERP
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
 static struct av1_token interintra_mode_encodings[INTERINTRA_MODES];
 #endif
 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
 static struct av1_token compound_type_encodings[COMPOUND_TYPES];
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-static struct av1_token ncobmc_mode_encodings[MAX_NCOBMC_MODES];
-#endif
-#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #if CONFIG_LOOP_RESTORATION
 static struct av1_token switchable_restore_encodings[RESTORE_SWITCHABLE_TYPES];
+static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
+                                             MACROBLOCKD *xd,
+                                             aom_writer *const w, int plane,
+                                             int rtile_idx);
 #endif  // CONFIG_LOOP_RESTORATION
-static void write_uncompressed_header(AV1_COMP *cpi,
-                                      struct aom_write_bit_buffer *wb);
+#if CONFIG_OBU
+static void write_uncompressed_header_obu(AV1_COMP *cpi,
+                                          struct aom_write_bit_buffer *wb);
+#else
+static void write_uncompressed_header_frame(AV1_COMP *cpi,
+                                            struct aom_write_bit_buffer *wb);
+#endif
+
 static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data);
+
+#if !CONFIG_OBU || CONFIG_EXT_TILE
 static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
                        const uint32_t data_size, const uint32_t max_tile_size,
                        const uint32_t max_tile_col_size,
                        int *const tile_size_bytes,
                        int *const tile_col_size_bytes);
-
+#endif
 void av1_encode_token_init(void) {
-#if CONFIG_EXT_TX
-  int s;
-#endif  // CONFIG_EXT_TX
-#if CONFIG_EXT_TX
-  for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
-    av1_tokens_from_tree(ext_tx_inter_encodings[s], av1_ext_tx_inter_tree[s]);
-  }
-  for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
-    av1_tokens_from_tree(ext_tx_intra_encodings[s], av1_ext_tx_intra_tree[s]);
-  }
-#else
-  av1_tokens_from_tree(ext_tx_encodings, av1_ext_tx_tree);
-#endif  // CONFIG_EXT_TX
-
 #if CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
   av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree);
 #endif  // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
   av1_tokens_from_tree(interintra_mode_encodings, av1_interintra_mode_tree);
 #endif  // CONFIG_INTERINTRA
@@ -146,35 +128,10 @@ void av1_encode_token_init(void) {
 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
   av1_tokens_from_tree(compound_type_encodings, av1_compound_type_tree);
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  av1_tokens_from_tree(ncobmc_mode_encodings, av1_ncobmc_mode_tree);
-#endif
-#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #if CONFIG_LOOP_RESTORATION
   av1_tokens_from_tree(switchable_restore_encodings,
                        av1_switchable_restore_tree);
 #endif  // CONFIG_LOOP_RESTORATION
-
-  /* This hack is necessary when CONFIG_DUAL_FILTER is enabled because the five
-      SWITCHABLE_FILTERS are not consecutive, e.g., 0, 1, 2, 3, 4, when doing
-      an in-order traversal of the av1_switchable_interp_tree structure. */
-  av1_indices_from_tree(av1_switchable_interp_ind, av1_switchable_interp_inv,
-                        av1_switchable_interp_tree);
-/* This hack is necessary because the four TX_TYPES are not consecutive,
-    e.g., 0, 1, 2, 3, when doing an in-order traversal of the av1_ext_tx_tree
-    structure. */
-#if CONFIG_EXT_TX
-  for (s = 1; s < EXT_TX_SETS_INTRA; ++s)
-    av1_indices_from_tree(av1_ext_tx_intra_ind[s], av1_ext_tx_intra_inv[s],
-                          av1_ext_tx_intra_tree[s]);
-  for (s = 1; s < EXT_TX_SETS_INTER; ++s)
-    av1_indices_from_tree(av1_ext_tx_inter_ind[s], av1_ext_tx_inter_inv[s],
-                          av1_ext_tx_inter_tree[s]);
-#else
-  av1_indices_from_tree(av1_ext_tx_ind, av1_ext_tx_inv, av1_ext_tx_tree);
-#endif
 }
 
 static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
@@ -184,7 +141,7 @@ static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx,
 #if CONFIG_INTRABC
   assert(!is_intrabc_block(&mi->mbmi));
 #endif  // CONFIG_INTRABC
-  aom_write_symbol(w, av1_intra_mode_ind[mode],
+  aom_write_symbol(w, mode,
                    get_y_mode_cdf(frame_ctx, mi, above_mi, left_mi, block),
                    INTRA_MODES);
   (void)cm;
@@ -234,16 +191,12 @@ static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
 
   assert(mbmi->ref_mv_idx < 3);
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
       mbmi->mode == SR_NEW_NEWMV) {
 #else   // !CONFIG_COMPOUND_SINGLEREF
   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-  if (mbmi->mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
     int idx;
     for (idx = 0; idx < 2; ++idx) {
       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
@@ -282,7 +235,6 @@ static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
   }
 }
 
-#if CONFIG_EXT_INTER
 static void write_inter_compound_mode(AV1_COMMON *cm, MACROBLOCKD *xd,
                                       aom_writer *w, PREDICTION_MODE mode,
                                       const int16_t mode_ctx) {
@@ -305,30 +257,12 @@ static void write_inter_singleref_comp_mode(MACROBLOCKD *xd, aom_writer *w,
                    inter_singleref_comp_cdf, INTER_SINGLEREF_COMP_MODES);
 }
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#endif  // CONFIG_EXT_INTER
 
 static void encode_unsigned_max(struct aom_write_bit_buffer *wb, int data,
                                 int max) {
   aom_wb_write_literal(wb, data, get_unsigned_bits(max));
 }
 
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-static void prob_diff_update(const aom_tree_index *tree,
-                             aom_prob probs[/*n - 1*/],
-                             const unsigned int counts[/* n */], int n,
-                             int probwt, aom_writer *w) {
-  int i;
-  unsigned int branch_ct[32][2];
-
-  // Assuming max number of probabilities <= 32
-  assert(n <= 32);
-
-  av1_tree_probs_from_distribution(tree, branch_ct, counts);
-  for (i = 0; i < n - 1; ++i)
-    av1_cond_prob_diff_update(w, &probs[i], branch_ct[i], probwt);
-}
-#endif
-
 #if CONFIG_VAR_TX
 static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                 const MB_MODE_INFO *mbmi, TX_SIZE tx_size,
@@ -381,7 +315,7 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd,
     aom_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
 #endif
 
-    if (tx_size == TX_8X8) {
+    if (sub_txs == TX_4X4) {
       txfm_partition_update(xd->above_txfm_context + blk_col,
                             xd->left_txfm_context + blk_row, sub_txs, tx_size);
       return;
@@ -406,7 +340,7 @@ static void update_txfm_partition_probs(AV1_COMMON *cm, aom_writer *w,
                               counts->txfm_partition[k], probwt);
 }
 #endif  // CONFIG_NEW_MULTISYMBOL
-#endif
+#endif  // CONFIG_VAR_TX
 
 static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                                    aom_writer *w) {
@@ -414,17 +348,12 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   (void)cm;
-// For sub8x8 blocks the tx_size symbol does not need to be sent
-#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_EXT_TX) && CONFIG_RECT_TX
-  if (bsize > BLOCK_4X4) {
-#else
-  if (bsize >= BLOCK_8X8) {
-#endif
+  if (block_signals_txsize(bsize)) {
     const TX_SIZE tx_size = mbmi->tx_size;
     const int is_inter = is_inter_block(mbmi);
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                     : intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+                                         : intra_tx_size_cat_lookup[bsize];
     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
     const int depth = tx_size_to_depth(coded_tx_size);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -435,9 +364,14 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                      tx_size_cat + 2);
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
+#if CONFIG_NEW_MULTISYMBOL
+      aom_write_symbol(w, tx_size == quarter_txsize_lookup[bsize],
+                       cm->fc->quarter_tx_size_cdf, 2);
+#else
       aom_write(w, tx_size == quarter_txsize_lookup[bsize],
                 cm->fc->quarter_tx_size_prob);
 #endif
+#endif
   }
 }
 
@@ -496,14 +430,12 @@ static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
                               const MODE_INFO *mi, aom_writer *w) {
   const MB_MODE_INFO *mbmi = &mi->mbmi;
 
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  MOTION_MODE last_motion_mode_allowed =
-      motion_mode_allowed_wrapper(0,
-#if CONFIG_GLOBAL_MOTION
-                                  0, cm->global_motion,
-#endif  // CONFIG_GLOBAL_MOTION
-                                  mi);
-#else
+#if !CONFIG_GLOBAL_MOTION
+  // The cm parameter is only used with global_motion or with
+  // motion_var and warped_motion. In other cases, explicitly ignore
+  // it to avoid a compiler warning.
+  (void)cm;
+#endif
   MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
       0, cm->global_motion,
@@ -512,9 +444,18 @@ static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
       xd,
 #endif
       mi);
-#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
   if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return;
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  if (last_motion_mode_allowed == NCOBMC_ADAPT_WEIGHT) {
+    aom_write_symbol(w, mbmi->motion_mode,
+                     xd->tile_ctx->ncobmc_cdf[mbmi->sb_type],
+                     OBMC_FAMILY_MODES);
+  } else if (last_motion_mode_allowed == OBMC_CAUSAL) {
+    aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL,
+                     xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2);
+  } else {
+#else
   if (last_motion_mode_allowed == OBMC_CAUSAL) {
 #if CONFIG_NEW_MULTISYMBOL
     aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL,
@@ -524,6 +465,7 @@ static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
               cm->fc->obmc_prob[mbmi->sb_type]);
 #endif
   } else {
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
     aom_write_symbol(w, mbmi->motion_mode,
                      xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
@@ -540,30 +482,16 @@ static void write_ncobmc_mode(MACROBLOCKD *xd, const MODE_INFO *mi,
   ADAPT_OVERLAP_BLOCK ao_block = adapt_overlap_block_lookup[mbmi->sb_type];
   if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT) return;
 
-#ifndef TRAINING_WEIGHTS
   aom_write_symbol(w, mbmi->ncobmc_mode[0],
                    xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
   if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
     aom_write_symbol(w, mbmi->ncobmc_mode[1],
                      xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
   }
-#else
-  int block;
-  for (block = 0; block < 4; ++block)
-    aom_write_symbol(w, mbmi->ncobmc_mode[0][block],
-                     xd->tile_ctx->ncobmc_mode_cdf[ao_block], MAX_NCOBMC_MODES);
-  if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
-    for (block = 0; block < 4; ++block)
-      aom_write_symbol(w, mbmi->ncobmc_mode[1][block],
-                       xd->tile_ctx->ncobmc_mode_cdf[ao_block],
-                       MAX_NCOBMC_MODES);
-  }
-#endif
 }
 #endif
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
-#if CONFIG_DELTA_Q
 static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                                int delta_qindex, aom_writer *w) {
   int sign = delta_qindex < 0;
@@ -579,7 +507,7 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   if (!smallval) {
     rem_bits = OD_ILOG_NZ(abs - 1) - 1;
     thr = (1 << rem_bits) + 1;
-    aom_write_literal(w, rem_bits, 3);
+    aom_write_literal(w, rem_bits - 1, 3);
     aom_write_literal(w, abs - thr, rem_bits);
   }
   if (abs > 0) {
@@ -589,6 +517,9 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 
 #if CONFIG_EXT_DELTA_Q
 static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+#if CONFIG_LOOPFILTER_LEVEL
+                                int lf_id,
+#endif
                                 int delta_lflevel, aom_writer *w) {
   int sign = delta_lflevel < 0;
   int abs = sign ? -delta_lflevel : delta_lflevel;
@@ -597,13 +528,24 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   (void)cm;
 
+#if CONFIG_LOOPFILTER_LEVEL
+  if (cm->delta_lf_multi) {
+    assert(lf_id >= 0 && lf_id < FRAME_LF_COUNT);
+    aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL),
+                     ec_ctx->delta_lf_multi_cdf[lf_id], DELTA_LF_PROBS + 1);
+  } else {
+    aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
+                     DELTA_LF_PROBS + 1);
+  }
+#else
   aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf,
                    DELTA_LF_PROBS + 1);
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
   if (!smallval) {
     rem_bits = OD_ILOG_NZ(abs - 1) - 1;
     thr = (1 << rem_bits) + 1;
-    aom_write_literal(w, rem_bits, 3);
+    aom_write_literal(w, rem_bits - 1, 3);
     aom_write_literal(w, abs - thr, rem_bits);
   }
   if (abs > 0) {
@@ -611,7 +553,6 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   }
 }
 #endif  // CONFIG_EXT_DELTA_Q
-#endif  // CONFIG_DELTA_Q
 
 #if !CONFIG_NEW_MULTISYMBOL
 static void update_skip_probs(AV1_COMMON *cm, aom_writer *w,
@@ -625,20 +566,21 @@ static void update_skip_probs(AV1_COMMON *cm, aom_writer *w,
 }
 #endif
 
-#if CONFIG_PALETTE
-static void pack_palette_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
-                                int num) {
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
+static void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
+                            int num) {
   const TOKENEXTRA *p = *tp;
   write_uniform(w, n, p->token);  // The first color index.
   ++p;
   --num;
   for (int i = 0; i < num; ++i) {
-    aom_write_symbol(w, p->token, p->palette_cdf, n);
+    aom_write_symbol(w, p->token, p->color_map_cdf, n);
     ++p;
   }
   *tp = p;
 }
-#endif  // CONFIG_PALETTE
+#endif  // !CONFIG_PVQ
 
 #if !CONFIG_PVQ
 #if CONFIG_SUPERTX
@@ -667,6 +609,7 @@ static void update_supertx_probs(AV1_COMMON *cm, int probwt, aom_writer *w) {
 }
 #endif  // CONFIG_SUPERTX
 
+#if !CONFIG_LV_MAP
 #if CONFIG_NEW_MULTISYMBOL
 static INLINE void write_coeff_extra(const aom_cdf_prob *const *cdf, int val,
                                      int n, aom_writer *w) {
@@ -693,12 +636,14 @@ static INLINE void write_coeff_extra(const aom_prob *pb, int value,
     aom_write_record(w, bb, pb[index], token_stats);
   }
 }
-#endif
+#endif  // CONFIG_NEW_MULTISYMBOL
 
-#if !CONFIG_LV_MAP
 static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
                            const TOKENEXTRA *const stop,
                            aom_bit_depth_t bit_depth, const TX_SIZE tx_size,
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                           TX_TYPE tx_type, int is_inter,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                            TOKEN_STATS *token_stats) {
   const TOKENEXTRA *p = *tp;
 #if CONFIG_VAR_TX
@@ -706,6 +651,17 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp,
   const int seg_eob = tx_size_2d[tx_size];
 #endif
 
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  if (tx_type == MRC_DCT && ((is_inter && SIGNAL_MRC_MASK_INTER) ||
+                             (!is_inter && SIGNAL_MRC_MASK_INTRA))) {
+    int rows = tx_size_high[tx_size];
+    int cols = tx_size_wide[tx_size];
+    assert(tx_size == TX_32X32);
+    assert(p < stop);
+    pack_map_tokens(w, &p, 2, rows * cols);
+  }
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+
   while (p < stop && p->token != EOSB_TOKEN) {
     const int token = p->token;
     const int eob_val = p->eob_val;
@@ -949,6 +905,10 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
   TX_SIZE plane_tx_size;
   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  TX_TYPE tx_type = av1_get_tx_type(plane ? PLANE_TYPE_UV : PLANE_TYPE_Y, xd,
+                                    blk_row, blk_col, block, tx_size);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
@@ -960,7 +920,11 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp,
     TOKEN_STATS tmp_token_stats;
     init_token_stats(&tmp_token_stats);
 #if !CONFIG_PVQ
-    pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size, &tmp_token_stats);
+    pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size,
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                   tx_type, is_inter_block(mbmi),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                   &tmp_token_stats);
 #else
     pack_pvq_tokens(w, x, xd, plane, bsize, tx_size);
 #endif
@@ -1020,9 +984,13 @@ static void write_segment_id(aom_writer *w, const struct segmentation *seg,
 #if CONFIG_NEW_MULTISYMBOL
 #define WRITE_REF_BIT(bname, pname) \
   aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(cm, xd), 2)
+#define WRITE_REF_BIT2(bname, pname) \
+  aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(xd), 2)
 #else
 #define WRITE_REF_BIT(bname, pname) \
   aom_write(w, bname, av1_get_pred_prob_##pname(cm, xd))
+#define WRITE_REF_BIT2(bname, pname) \
+  aom_write(w, bname, av1_get_pred_prob_##pname(cm, xd))
 #endif
 
 // This function encodes the reference frame
@@ -1042,14 +1010,12 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
     // does the feature use compound prediction or not
     // (if not specified at the frame/segment level)
     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
-#if !SUB8X8_COMP_REF
-      if (mbmi->sb_type != BLOCK_4X4)
-#endif
+      if (is_comp_ref_allowed(mbmi->sb_type))
 #if CONFIG_NEW_MULTISYMBOL
         aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(cm, xd), 2);
 #else
-      aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
-#endif
+        aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd));
+#endif  // CONFIG_NEW_MULTISYMBOL
     } else {
       assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
     }
@@ -1064,7 +1030,12 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
       if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
         if (L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm) || BWD_AND_ALT(cm))
 #endif  // CONFIG_VAR_REFS
-          aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd));
+#if CONFIG_NEW_MULTISYMBOL
+          aom_write_symbol(w, comp_ref_type,
+                           av1_get_comp_reference_type_cdf(xd), 2);
+#else
+      aom_write(w, comp_ref_type, av1_get_comp_reference_type_prob(cm, xd));
+#endif
 #if CONFIG_VAR_REFS
         else
           assert(comp_ref_type == BIDIR_COMP_REFERENCE);
@@ -1081,7 +1052,7 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 #if CONFIG_VAR_REFS
         if ((L_AND_L2(cm) || L_AND_L3(cm) || L_AND_G(cm)) && BWD_AND_ALT(cm))
 #endif  // CONFIG_VAR_REFS
-          aom_write(w, bit, av1_get_pred_prob_uni_comp_ref_p(cm, xd));
+          WRITE_REF_BIT2(bit, uni_comp_ref_p);
 
         if (!bit) {
           assert(mbmi->ref_frame[0] == LAST_FRAME);
@@ -1090,14 +1061,13 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 #endif  // CONFIG_VAR_REFS
             const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME ||
                              mbmi->ref_frame[1] == GOLDEN_FRAME;
-            aom_write(w, bit1, av1_get_pred_prob_uni_comp_ref_p1(cm, xd));
-
+            WRITE_REF_BIT2(bit1, uni_comp_ref_p1);
             if (bit1) {
 #if CONFIG_VAR_REFS
               if (L_AND_L3(cm) && L_AND_G(cm)) {
 #endif  // CONFIG_VAR_REFS
                 const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME;
-                aom_write(w, bit2, av1_get_pred_prob_uni_comp_ref_p2(cm, xd));
+                WRITE_REF_BIT2(bit2, uni_comp_ref_p2);
 #if CONFIG_VAR_REFS
               }
 #endif  // CONFIG_VAR_REFS
@@ -1147,11 +1117,20 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
       }
 
 #if CONFIG_VAR_REFS
-      // Test need to explicitly code (BWD) vs (ALT) branch node in tree
-      if (BWD_AND_ALT(cm)) {
+      // Test need to explicitly code (BWD,ALT2) vs (ALT) branch node in tree
+      if (BWD_OR_ALT2(cm) && ALTREF_IS_VALID(cm)) {
 #endif  // CONFIG_VAR_REFS
         const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
         WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
+
+        if (!bit_bwd) {
+#if CONFIG_VAR_REFS
+          // Test need to explicitly code (BWD,ALT2) vs (ALT) branch node in
+          // tree
+          if (BWD_AND_ALT2(cm))
+#endif  // CONFIG_VAR_REFS
+            WRITE_REF_BIT(mbmi->ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
+        }
 #if CONFIG_VAR_REFS
       }
 #endif  // CONFIG_VAR_REFS
@@ -1162,22 +1141,31 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
 #endif  // CONFIG_EXT_REFS
     } else {
 #if CONFIG_EXT_REFS
-      const int bit0 = (mbmi->ref_frame[0] == ALTREF_FRAME ||
-                        mbmi->ref_frame[0] == BWDREF_FRAME);
+      const int bit0 = (mbmi->ref_frame[0] <= ALTREF_FRAME &&
+                        mbmi->ref_frame[0] >= BWDREF_FRAME);
 #if CONFIG_VAR_REFS
-      // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT) branch node in
-      // tree
-      if ((L_OR_L2(cm) || L3_OR_G(cm)) && BWD_OR_ALT(cm))
+      // Test need to explicitly code (L,L2,L3,G) vs (BWD,ALT2,ALT) branch node
+      // in tree
+      if ((L_OR_L2(cm) || L3_OR_G(cm)) &&
+          (BWD_OR_ALT2(cm) || ALTREF_IS_VALID(cm)))
 #endif  // CONFIG_VAR_REFS
         WRITE_REF_BIT(bit0, single_ref_p1);
 
       if (bit0) {
 #if CONFIG_VAR_REFS
-        // Test need to explicitly code (BWD) vs (ALT) branch node in tree
-        if (BWD_AND_ALT(cm)) {
+        // Test need to explicitly code (BWD,ALT2) vs (ALT) branch node in tree
+        if (BWD_OR_ALT2(cm) && ALTREF_IS_VALID(cm)) {
 #endif  // CONFIG_VAR_REFS
           const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
           WRITE_REF_BIT(bit1, single_ref_p2);
+
+          if (!bit1) {
+#if CONFIG_VAR_REFS
+            // Test need to explicitly code (BWD) vs (ALT2) branch node in tree
+            if (BWD_AND_ALT2(cm))
+#endif  // CONFIG_VAR_REFS
+              WRITE_REF_BIT(mbmi->ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
+          }
 #if CONFIG_VAR_REFS
         }
 #endif  // CONFIG_VAR_REFS
@@ -1231,11 +1219,7 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
                                          const MB_MODE_INFO *const mbmi,
                                          int mi_row, int mi_col,
                                          aom_writer *w) {
-  if (mbmi->mode == DC_PRED
-#if CONFIG_PALETTE
-      && mbmi->palette_mode_info.palette_size[0] == 0
-#endif  // CONFIG_PALETTE
-      ) {
+  if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
     aom_write(w, mbmi->filter_intra_mode_info.use_filter_intra_mode[0],
               cm->fc->filter_intra_probs[0]);
     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
@@ -1256,11 +1240,8 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm,
   (void)mi_col;
 #endif  // CONFIG_CB4X4
 
-  if (mbmi->uv_mode == UV_DC_PRED
-#if CONFIG_PALETTE
-      && mbmi->palette_mode_info.palette_size[1] == 0
-#endif  // CONFIG_PALETTE
-      ) {
+  if (mbmi->uv_mode == UV_DC_PRED &&
+      mbmi->palette_mode_info.palette_size[1] == 0) {
     aom_write(w, mbmi->filter_intra_mode_info.use_filter_intra_mode[1],
               cm->fc->filter_intra_probs[1]);
     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1]) {
@@ -1312,16 +1293,9 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
   if (!av1_is_interp_needed(xd)) {
-#if CONFIG_DUAL_FILTER
-    for (int i = 0; i < 4; ++i)
-      assert(mbmi->interp_filter[i] == (cm->interp_filter == SWITCHABLE
-                                            ? EIGHTTAP_REGULAR
-                                            : cm->interp_filter));
-#else
-    assert(mbmi->interp_filter == (cm->interp_filter == SWITCHABLE
-                                       ? EIGHTTAP_REGULAR
-                                       : cm->interp_filter));
-#endif  // CONFIG_DUAL_FILTER
+    assert(mbmi->interp_filters ==
+           av1_broadcast_interp_filter(
+               av1_unswitchable_filter(cm->interp_filter)));
     return;
   }
   if (cm->interp_filter == SWITCHABLE) {
@@ -1332,26 +1306,28 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
           (mbmi->ref_frame[1] > INTRA_FRAME &&
            has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
         const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-        aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter[dir]],
-                         ec_ctx->switchable_interp_cdf[ctx],
+        InterpFilter filter =
+            av1_extract_interp_filter(mbmi->interp_filters, dir);
+        aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx],
                          SWITCHABLE_FILTERS);
-        ++cpi->interp_filter_selected[0][mbmi->interp_filter[dir]];
+        ++cpi->interp_filter_selected[0][filter];
       } else {
-        assert(mbmi->interp_filter[dir] == EIGHTTAP_REGULAR);
+        assert(av1_extract_interp_filter(mbmi->interp_filters, dir) ==
+               EIGHTTAP_REGULAR);
       }
     }
 #else
     {
       const int ctx = av1_get_pred_context_switchable_interp(xd);
-      aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter],
-                       ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS);
-      ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+      InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
+      aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx],
+                       SWITCHABLE_FILTERS);
+      ++cpi->interp_filter_selected[0][filter];
     }
 #endif  // CONFIG_DUAL_FILTER
   }
 }
 
-#if CONFIG_PALETTE
 #if CONFIG_PALETTE_DELTA_ENCODING
 // Transmit color values with delta encoding. Write the first value as
 // literal, and the deltas between each value and the previous one. "min_val" is
@@ -1392,10 +1368,8 @@ static void write_palette_colors_y(const MACROBLOCKD *const xd,
                                    const PALETTE_MODE_INFO *const pmi,
                                    int bit_depth, aom_writer *w) {
   const int n = pmi->palette_size[0];
-  const MODE_INFO *const above_mi = xd->above_mi;
-  const MODE_INFO *const left_mi = xd->left_mi;
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
+  const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
   int out_cache_colors[PALETTE_MAX_SIZE];
   uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
   const int n_out_cache =
@@ -1421,10 +1395,8 @@ static void write_palette_colors_uv(const MACROBLOCKD *const xd,
   const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
   const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
   // U channel colors.
-  const MODE_INFO *const above_mi = xd->above_mi;
-  const MODE_INFO *const left_mi = xd->left_mi;
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
+  const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
   int out_cache_colors[PALETTE_MAX_SIZE];
   uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
   const int n_out_cache = av1_index_color_cache(
@@ -1484,6 +1456,9 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
 
+  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_LARGEST);
+  const int block_palette_idx = bsize - BLOCK_8X8;
+
   if (mbmi->mode == DC_PRED) {
     const int n = pmi->palette_size[0];
     int palette_y_mode_ctx = 0;
@@ -1495,12 +1470,19 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
       palette_y_mode_ctx +=
           (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
     }
+#if CONFIG_NEW_MULTISYMBOL
+    aom_write_symbol(
+        w, n > 0,
+        xd->tile_ctx->palette_y_mode_cdf[block_palette_idx][palette_y_mode_ctx],
+        2);
+#else
     aom_write(
         w, n > 0,
-        av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_y_mode_ctx]);
+        av1_default_palette_y_mode_prob[block_palette_idx][palette_y_mode_ctx]);
+#endif
     if (n > 0) {
       aom_write_symbol(w, n - PALETTE_MIN_SIZE,
-                       xd->tile_ctx->palette_y_size_cdf[bsize - BLOCK_8X8],
+                       xd->tile_ctx->palette_y_size_cdf[block_palette_idx],
                        PALETTE_SIZES);
 #if CONFIG_PALETTE_DELTA_ENCODING
       write_palette_colors_y(xd, pmi, cm->bit_depth, w);
@@ -1516,10 +1498,15 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   if (mbmi->uv_mode == UV_DC_PRED) {
     const int n = pmi->palette_size[1];
     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
+#if CONFIG_NEW_MULTISYMBOL
+    aom_write_symbol(w, n > 0,
+                     xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2);
+#else
     aom_write(w, n > 0, av1_default_palette_uv_mode_prob[palette_uv_mode_ctx]);
+#endif
     if (n > 0) {
       aom_write_symbol(w, n - PALETTE_MIN_SIZE,
-                       xd->tile_ctx->palette_uv_size_cdf[bsize - BLOCK_8X8],
+                       xd->tile_ctx->palette_uv_size_cdf[block_palette_idx],
                        PALETTE_SIZES);
 #if CONFIG_PALETTE_DELTA_ENCODING
       write_palette_colors_uv(xd, pmi, cm->bit_depth, w);
@@ -1538,7 +1525,6 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
     }
   }
 }
-#endif  // CONFIG_PALETTE
 
 void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
 #if CONFIG_SUPERTX
@@ -1583,25 +1569,64 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
         !supertx_enabled &&
 #endif  // CONFIG_SUPERTX
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+#if CONFIG_MRC_TX
+      if (tx_type == MRC_DCT)
+        assert(mbmi->valid_mrc_mask && "Invalid MRC mask");
+#endif  // CONFIG_MRC_TX
+      const TxSetType tx_set_type = get_ext_tx_set_type(
+          tx_size, bsize, is_inter, cm->reduced_tx_set_used);
       const int eset =
           get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
       // eset == 0 should correspond to a set with only DCT_DCT and there
       // is no need to send the tx_type
       assert(eset > 0);
+      assert(av1_ext_tx_used[tx_set_type][tx_type]);
+#if !CONFIG_LGT_FROM_PRED
       if (is_inter) {
-        assert(ext_tx_used_inter[eset][tx_type]);
-        aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type],
+        aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
                          ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
-                         ext_tx_cnt_inter[eset]);
+                         av1_num_ext_tx_set[tx_set_type]);
       } else if (ALLOW_INTRA_EXT_TX) {
-        assert(ext_tx_used_intra[eset][tx_type]);
         aom_write_symbol(
-            w, av1_ext_tx_intra_ind[eset][tx_type],
+            w, av1_ext_tx_ind[tx_set_type][tx_type],
             ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
-            ext_tx_cnt_intra[eset]);
+            av1_num_ext_tx_set[tx_set_type]);
       }
-    }
 #else
+      // only signal tx_type when lgt is not allowed or not selected
+      if (is_inter) {
+        if (LGT_FROM_PRED_INTER) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used)
+            aom_write(w, mbmi->use_lgt, ec_ctx->inter_lgt_prob[square_tx_size]);
+          if (!mbmi->use_lgt)
+            aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
+                             ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
+                             av1_num_ext_tx_set[tx_set_type]);
+        } else {
+          aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
+                           ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
+                           av1_num_ext_tx_set[tx_set_type]);
+        }
+      } else if (ALLOW_INTRA_EXT_TX) {
+        if (LGT_FROM_PRED_INTRA) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used)
+            aom_write(w, mbmi->use_lgt,
+                      ec_ctx->intra_lgt_prob[square_tx_size][mbmi->mode]);
+          if (!mbmi->use_lgt)
+            aom_write_symbol(
+                w, av1_ext_tx_ind[tx_set_type][tx_type],
+                ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
+                av1_num_ext_tx_set[tx_set_type]);
+        } else {
+          aom_write_symbol(
+              w, av1_ext_tx_ind[tx_set_type][tx_type],
+              ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode],
+              av1_num_ext_tx_set[tx_set_type]);
+        }
+      }
+#endif  // CONFIG_LGT_FROM_PRED
+    }
+#else  // CONFIG_EXT_TX
     if (tx_size < TX_32X32 &&
         ((!cm->seg.enabled && cm->base_qindex > 0) ||
          (cm->seg.enabled && xd->qindex[mbmi->segment_id] > 0)) &&
@@ -1627,36 +1652,32 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
 
 static void write_intra_mode(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
                              PREDICTION_MODE mode, aom_writer *w) {
-  aom_write_symbol(w, av1_intra_mode_ind[mode],
-                   frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
+  aom_write_symbol(w, mode, frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
                    INTRA_MODES);
 }
 
 static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
                                 UV_PREDICTION_MODE uv_mode,
                                 PREDICTION_MODE y_mode, aom_writer *w) {
-  aom_write_symbol(w, av1_intra_mode_ind[get_uv_mode(uv_mode)],
-                   frame_ctx->uv_mode_cdf[y_mode], UV_INTRA_MODES);
+#if !CONFIG_CFL
+  uv_mode = get_uv_mode(uv_mode);
+#endif
+  aom_write_symbol(w, uv_mode, frame_ctx->uv_mode_cdf[y_mode], UV_INTRA_MODES);
 }
 
 #if CONFIG_CFL
-static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int ind,
-                             const CFL_SIGN_TYPE signs[CFL_SIGNS],
-                             aom_writer *w) {
-  // Check for uninitialized signs
-  if (cfl_alpha_codes[ind][CFL_PRED_U] == 0)
-    assert(signs[CFL_PRED_U] == CFL_SIGN_POS);
-  if (cfl_alpha_codes[ind][CFL_PRED_V] == 0)
-    assert(signs[CFL_PRED_V] == CFL_SIGN_POS);
-
-  // Write a symbol representing a combination of alpha Cb and alpha Cr.
-  aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE);
-
-  // Signs are only signaled for nonzero codes.
-  if (cfl_alpha_codes[ind][CFL_PRED_U] != 0)
-    aom_write_bit(w, signs[CFL_PRED_U]);
-  if (cfl_alpha_codes[ind][CFL_PRED_V] != 0)
-    aom_write_bit(w, signs[CFL_PRED_V]);
+static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, int idx,
+                             int joint_sign, aom_writer *w) {
+  aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS);
+  // Magnitudes are only signaled for nonzero codes.
+  if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    aom_write_symbol(w, CFL_IDX_U(idx), cdf_u, CFL_ALPHABET_SIZE);
+  }
+  if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
+    aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    aom_write_symbol(w, CFL_IDX_V(idx), cdf_v, CFL_ALPHABET_SIZE);
+  }
 }
 #endif
 
@@ -1715,7 +1736,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
 #else
   skip = write_skip(cm, xd, segment_id, mi, w);
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DELTA_Q
   if (cm->delta_q_present_flag) {
     int super_block_upper_left =
         ((mi_row & MAX_MIB_MASK) == 0) && ((mi_col & MAX_MIB_MASK) == 0);
@@ -1726,6 +1746,25 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
       write_delta_qindex(cm, xd, reduced_delta_qindex, w);
       xd->prev_qindex = mbmi->current_q_index;
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+      if (cm->delta_lf_present_flag) {
+        if (cm->delta_lf_multi) {
+          for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+            int reduced_delta_lflevel =
+                (mbmi->curr_delta_lf[lf_id] - xd->prev_delta_lf[lf_id]) /
+                cm->delta_lf_res;
+            write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w);
+            xd->prev_delta_lf[lf_id] = mbmi->curr_delta_lf[lf_id];
+          }
+        } else {
+          int reduced_delta_lflevel =
+              (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
+              cm->delta_lf_res;
+          write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w);
+          xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
+        }
+      }
+#else
       if (cm->delta_lf_present_flag) {
         int reduced_delta_lflevel =
             (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
@@ -1733,10 +1772,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
         write_delta_lflevel(cm, xd, reduced_delta_lflevel, w);
         xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
       }
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif  // CONFIG_EXT_DELTA_Q
     }
   }
-#endif
 
 #if CONFIG_SUPERTX
   if (!supertx_enabled)
@@ -1744,14 +1783,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
     write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
 
   if (cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
-#if CONFIG_RECT_TX
-      bsize > BLOCK_4X4 &&
-#else
+#if CONFIG_CB4X4 && CONFIG_VAR_TX && !CONFIG_RECT_TX
       (bsize >= BLOCK_8X8 || (bsize > BLOCK_4X4 && is_inter)) &&
-#endif  // CONFIG_RECT_TX
 #else
-      bsize >= BLOCK_8X8 &&
+      block_signals_txsize(bsize) &&
 #endif
 #if CONFIG_SUPERTX
       !supertx_enabled &&
@@ -1759,23 +1794,30 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
       !(is_inter && skip) && !xd->lossless[segment_id]) {
 #if CONFIG_VAR_TX
     if (is_inter) {  // This implies skip flag is 0.
-      const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, bsize);
+      const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, bsize, 0);
       const int bh = tx_size_high_unit[max_tx_size];
       const int bw = tx_size_wide_unit[max_tx_size];
       const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
       const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
+      int init_depth =
+          (height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
       int idx, idy;
       for (idy = 0; idy < height; idy += bh)
         for (idx = 0; idx < width; idx += bw)
-          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, height != width, idy,
-                              idx, w);
+          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, init_depth, idy, idx,
+                              w);
 #if CONFIG_RECT_TX_EXT
       if (is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
           quarter_txsize_lookup[bsize] != max_tx_size &&
           (mbmi->tx_size == quarter_txsize_lookup[bsize] ||
            mbmi->tx_size == max_tx_size)) {
+#if CONFIG_NEW_MULTISYMBOL
+        aom_write_symbol(w, mbmi->tx_size != max_tx_size,
+                         cm->fc->quarter_tx_size_cdf, 2);
+#else
         aom_write(w, mbmi->tx_size != max_tx_size,
                   cm->fc->quarter_tx_size_prob);
+#endif
       }
 #endif
     } else {
@@ -1812,7 +1854,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
 #endif  // CONFIG_CB4X4
 
 #if CONFIG_CFL
-      if (mbmi->uv_mode == UV_DC_PRED) {
+      if (mbmi->uv_mode == UV_CFL_PRED) {
         write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
       }
 #endif
@@ -1824,10 +1866,8 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
 #if CONFIG_EXT_INTRA
     write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
-    if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+    if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
       write_palette_mode_info(cm, xd, mi, w);
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
     if (bsize >= BLOCK_8X8 || unify_bsize)
       write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
@@ -1836,16 +1876,15 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
     int16_t mode_ctx;
     write_ref_frames(cm, xd, w);
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
       // NOTE: Handle single ref comp mode
       if (!is_compound)
         aom_write(w, is_inter_singleref_comp_mode(mode),
                   av1_get_inter_mode_prob(cm, xd));
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
     if (is_compound || is_inter_singleref_comp_mode(mode))
 #else   // !CONFIG_COMPOUND_SINGLEREF
@@ -1853,7 +1892,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
 #endif  // CONFIG_COMPOUND_SINGLEREF
       mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
     else
-#endif  // CONFIG_EXT_INTER
 
       mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
                                            mbmi->ref_frame, bsize, -1);
@@ -1861,7 +1899,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
     // If segment skip is not enabled code the mode.
     if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
       if (bsize >= BLOCK_8X8 || unify_bsize) {
-#if CONFIG_EXT_INTER
         if (is_inter_compound_mode(mode))
           write_inter_compound_mode(cm, xd, w, mode, mode_ctx);
 #if CONFIG_COMPOUND_SINGLEREF
@@ -1869,18 +1906,13 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
           write_inter_singleref_comp_mode(xd, w, mode, mode_ctx);
 #endif  // CONFIG_COMPOUND_SINGLEREF
         else if (is_inter_singleref_mode(mode))
-#endif  // CONFIG_EXT_INTER
           write_inter_mode(w, mode, ec_ctx, mode_ctx);
 
-#if CONFIG_EXT_INTER
         if (mode == NEWMV || mode == NEW_NEWMV ||
 #if CONFIG_COMPOUND_SINGLEREF
             mbmi->mode == SR_NEW_NEWMV ||
 #endif  // CONFIG_COMPOUND_SINGLEREF
             have_nearmv_in_inter_mode(mode))
-#else   // !CONFIG_EXT_INTER
-        if (mode == NEARMV || mode == NEWMV)
-#endif  // CONFIG_EXT_INTER
           write_drl_idx(ec_ctx, mbmi, mbmi_ext, w);
         else
           assert(mbmi->ref_mv_idx == 0);
@@ -1903,23 +1935,15 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
         for (idx = 0; idx < 2; idx += num_4x4_w) {
           const int j = idy * 2 + idx;
           const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
-#if CONFIG_EXT_INTER
           if (!is_compound)
-#endif  // CONFIG_EXT_INTER
             mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
                                                  mbmi->ref_frame, bsize, j);
-#if CONFIG_EXT_INTER
           if (is_inter_compound_mode(b_mode))
             write_inter_compound_mode(cm, xd, w, b_mode, mode_ctx);
           else if (is_inter_singleref_mode(b_mode))
-#endif  // CONFIG_EXT_INTER
             write_inter_mode(w, b_mode, ec_ctx, mode_ctx);
 
-#if CONFIG_EXT_INTER
           if (b_mode == NEWMV || b_mode == NEW_NEWMV) {
-#else
-          if (b_mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
             for (ref = 0; ref < 1 + is_compound; ++ref) {
               int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
               int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
@@ -1927,16 +1951,9 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
                                         mbmi->ref_mv_idx);
               nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx];
               av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
-#if CONFIG_EXT_INTER
-                            &mi->bmi[j].ref_mv[ref].as_mv,
-#else
-                            &mi->bmi[j].pred_mv[ref].as_mv,
-#endif  // CONFIG_EXT_INTER
-                            nmvc, allow_hp);
+                            &mi->bmi[j].ref_mv[ref].as_mv, nmvc, allow_hp);
             }
-          }
-#if CONFIG_EXT_INTER
-          else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
+          } else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
             int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
             int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                                       mbmi_ext->ref_mv_stack[rf_type], 1,
@@ -1953,15 +1970,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
             av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
                           &mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp);
           }
-#endif  // CONFIG_EXT_INTER
         }
       }
     } else {
-#if CONFIG_EXT_INTER
       if (mode == NEWMV || mode == NEW_NEWMV) {
-#else
-      if (mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
         int_mv ref_mv;
         for (ref = 0; ref < 1 + is_compound; ++ref) {
           int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
@@ -1973,7 +1985,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
           av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
                         allow_hp);
         }
-#if CONFIG_EXT_INTER
       } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
         int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
         int nmv_ctx =
@@ -2008,11 +2019,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
         av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc,
                       allow_hp);
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#endif  // CONFIG_EXT_INTER
       }
     }
 
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
     if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
 #if CONFIG_SUPERTX
         !supertx_enabled &&
@@ -2045,22 +2055,18 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
         }
       }
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #if CONFIG_SUPERTX
     if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
-#if CONFIG_EXT_INTER
-      if (mbmi->ref_frame[1] != INTRA_FRAME)
-#endif  // CONFIG_EXT_INTER
-        write_motion_mode(cm, xd, mi, w);
+      if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mi, w);
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
     write_ncobmc_mode(xd, mi, w);
 #endif
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
-#if CONFIG_EXT_INTER
     if (
 #if CONFIG_COMPOUND_SINGLEREF
         is_inter_anyref_comp_mode(mbmi->mode) &&
@@ -2074,10 +2080,16 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
         is_any_masked_compound_used(bsize)) {
 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
       if (cm->allow_masked_compound) {
-        aom_write_symbol(w, mbmi->interinter_compound_type,
-                         ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES);
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+        if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
+          aom_write_bit(w, mbmi->interinter_compound_type == COMPOUND_AVERAGE);
+        else
+#endif  // CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          aom_write_symbol(w, mbmi->interinter_compound_type,
+                           ec_ctx->compound_type_cdf[bsize], COMPOUND_TYPES);
 #if CONFIG_WEDGE
-        if (mbmi->interinter_compound_type == COMPOUND_WEDGE) {
+        if (is_interinter_compound_used(COMPOUND_WEDGE, bsize) &&
+            mbmi->interinter_compound_type == COMPOUND_WEDGE) {
           aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize));
           aom_write_bit(w, mbmi->wedge_sign);
         }
@@ -2090,7 +2102,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
       }
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
     }
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION || CONFIG_GLOBAL_MOTION
     write_mb_interp_filter(cpi, xd, w);
@@ -2106,12 +2117,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
 #endif  // !CONFIG_TXK_SEL
 }
 
-static void write_mb_modes_kf(AV1_COMMON *cm,
-#if CONFIG_DELTA_Q
-                              MACROBLOCKD *xd,
-#else
-                              const MACROBLOCKD *xd,
-#endif  // CONFIG_DELTA_Q
+static void write_mb_modes_kf(AV1_COMMON *cm, MACROBLOCKD *xd,
 #if CONFIG_INTRABC
                               const MB_MODE_INFO_EXT *mbmi_ext,
 #endif  // CONFIG_INTRABC
@@ -2135,7 +2141,6 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
 
   if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id);
 
-#if CONFIG_DELTA_Q
   const int skip = write_skip(cm, xd, mbmi->segment_id, mi, w);
   if (cm->delta_q_present_flag) {
     int super_block_upper_left =
@@ -2147,6 +2152,25 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
       write_delta_qindex(cm, xd, reduced_delta_qindex, w);
       xd->prev_qindex = mbmi->current_q_index;
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+      if (cm->delta_lf_present_flag) {
+        if (cm->delta_lf_multi) {
+          for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+            int reduced_delta_lflevel =
+                (mbmi->curr_delta_lf[lf_id] - xd->prev_delta_lf[lf_id]) /
+                cm->delta_lf_res;
+            write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w);
+            xd->prev_delta_lf[lf_id] = mbmi->curr_delta_lf[lf_id];
+          }
+        } else {
+          int reduced_delta_lflevel =
+              (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
+              cm->delta_lf_res;
+          write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w);
+          xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
+        }
+      }
+#else
       if (cm->delta_lf_present_flag) {
         int reduced_delta_lflevel =
             (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
@@ -2154,29 +2178,19 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
         write_delta_lflevel(cm, xd, reduced_delta_lflevel, w);
         xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
       }
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif  // CONFIG_EXT_DELTA_Q
     }
   }
-#else
-  write_skip(cm, xd, mbmi->segment_id, mi, w);
-#endif
 
   int enable_tx_size = cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_CB4X4 && (CONFIG_VAR_TX || CONFIG_RECT_TX)
-#if CONFIG_RECT_TX
-                       bsize > BLOCK_4X4 &&
-#else
-                       bsize >= BLOCK_8X8 &&
-#endif  // CONFIG_RECT_TX
-#else
-                       bsize >= BLOCK_8X8 &&
-#endif
+                       block_signals_txsize(bsize) &&
                        !xd->lossless[mbmi->segment_id];
 
 #if CONFIG_INTRABC
-  if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) {
+  if (av1_allow_intrabc(bsize, cm)) {
     int use_intrabc = is_intrabc_block(mbmi);
-    aom_write(w, use_intrabc, ec_ctx->intrabc_prob);
+    aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
     if (use_intrabc) {
       assert(mbmi->mode == DC_PRED);
       assert(mbmi->uv_mode == UV_DC_PRED);
@@ -2221,7 +2235,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
 #endif  // CONFIG_CB4X4
 
 #if CONFIG_CFL
-    if (mbmi->uv_mode == UV_DC_PRED) {
+    if (mbmi->uv_mode == UV_CFL_PRED) {
       write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w);
     }
 #endif
@@ -2232,10 +2246,8 @@ static void write_mb_modes_kf(AV1_COMMON *cm,
 #if CONFIG_EXT_INTRA
   write_intra_angle_info(xd, ec_ctx, w);
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
-  if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+  if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
     write_palette_mode_info(cm, xd, mi, w);
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
   if (bsize >= BLOCK_8X8 || unify_bsize)
     write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w);
@@ -2312,7 +2324,7 @@ static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
   m = xd->mi[0];
   if (is_inter_block(&m->mbmi)) {
 #define FRAME_TO_CHECK 1
-    if (cm->current_video_frame == FRAME_TO_CHECK /* && cm->show_frame == 1*/) {
+    if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
       const MB_MODE_INFO *const mbmi = &m->mbmi;
       const BLOCK_SIZE bsize = mbmi->sb_type;
 
@@ -2331,21 +2343,6 @@ static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
 #endif  // CONFIG_COMPOUND_SINGLEREF
           mv[1].as_int = 0;
       }
-      int interp_ctx[2] = { -1 };
-      int interp_filter[2] = { cm->interp_filter };
-      if (cm->interp_filter == SWITCHABLE) {
-        int dir;
-        for (dir = 0; dir < 2; ++dir) {
-          if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
-              (mbmi->ref_frame[1] > INTRA_FRAME &&
-               has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
-            interp_ctx[dir] = av1_get_pred_context_switchable_interp(xd, dir);
-            interp_filter[dir] = mbmi->interp_filter[dir];
-          } else {
-            interp_filter[dir] = EIGHTTAP_REGULAR;
-          }
-        }
-      }
 
       MACROBLOCK *const x = &cpi->td.mb;
       const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
@@ -2373,13 +2370,11 @@ static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
           "Frame=%d, (mi_row,mi_col)=(%d,%d), mode=%d, bsize=%d, "
           "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, "
           "ref[1]=%d, motion_mode=%d, inter_mode_ctx=%d, mode_ctx=%d, "
-          "interp_ctx=(%d,%d), interp_filter=(%d,%d), newmv_ctx=%d, "
-          "zeromv_ctx=%d, refmv_ctx=%d\n",
+          "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d\n",
           cm->current_video_frame, mi_row, mi_col, mbmi->mode, bsize,
           cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col, mv[1].as_mv.row,
           mv[1].as_mv.col, mbmi->ref_frame[0], mbmi->ref_frame[1],
           mbmi->motion_mode, mbmi_ext->mode_context[ref_frame_type], mode_ctx,
-          interp_ctx[0], interp_ctx[1], interp_filter[0], interp_filter[1],
           newmv_ctx, zeromv_ctx, refmv_ctx);
     }
   }
@@ -2400,7 +2395,7 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
   m = xd->mi[0];
 
   assert(m->mbmi.sb_type <= cm->sb_size ||
-         (m->mbmi.sb_type >= BLOCK_4X16 && m->mbmi.sb_type <= BLOCK_32X8));
+         (m->mbmi.sb_type >= BLOCK_SIZES && m->mbmi.sb_type < BLOCK_SIZES_ALL));
 
   bh = mi_size_high[m->mbmi.sb_type];
   bw = mi_size_wide[m->mbmi.sb_type];
@@ -2431,14 +2426,13 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile,
     // up if they are scaled. has_subpel_mv_component is in turn needed by
     // write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
     set_ref_ptrs(cm, xd, m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!has_second_ref(&m->mbmi) && is_inter_singleref_comp_mode(m->mbmi.mode))
       xd->block_refs[1] = xd->block_refs[0];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #endif  // CONFIG_DUAL_FILTER || CONFIG_WARPED_MOTION
 
 #if ENC_MISMATCH_DEBUG
-    // NOTE(zoeliu): For debug
     enc_dump_logs(cpi, mi_row, mi_col);
 #endif  // ENC_MISMATCH_DEBUG
 
@@ -2469,7 +2463,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
   xd->mi = cm->mi_grid_visible + mi_offset;
 
   assert(mbmi->sb_type <= cm->sb_size ||
-         (mbmi->sb_type >= BLOCK_4X16 && mbmi->sb_type <= BLOCK_32X8));
+         (mbmi->sb_type >= BLOCK_SIZES && mbmi->sb_type < BLOCK_SIZES_ALL));
 
   bh = mi_size_high[mbmi->sb_type];
   bw = mi_size_wide[mbmi->sb_type];
@@ -2481,7 +2475,8 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
 #endif  // CONFIG_DEPENDENT_HORZTILES
                  cm->mi_rows, cm->mi_cols);
 
-#if CONFIG_PALETTE
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
   for (plane = 0; plane <= 1; ++plane) {
     const uint8_t palette_size_plane =
         mbmi->palette_mode_info.palette_size[plane];
@@ -2494,11 +2489,13 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
       av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
                                &cols);
       assert(*tok < tok_end);
-      pack_palette_tokens(w, tok, palette_size_plane, rows * cols);
+      pack_map_tokens(w, tok, palette_size_plane, rows * cols);
+#if !CONFIG_LV_MAP
       assert(*tok < tok_end + mbmi->skip);
+#endif  // !CONFIG_LV_MAP
     }
   }
-#endif  // CONFIG_PALETTE
+#endif  // !CONFIG_PVQ
 
 #if CONFIG_COEF_INTERLEAVE
   if (!mbmi->skip) {
@@ -2585,7 +2582,9 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
       if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type,
                                xd->plane[plane].subsampling_x,
                                xd->plane[plane].subsampling_y)) {
+#if !CONFIG_LV_MAP
         (*tok)++;
+#endif  // !CONFIG_LV_MAP
         continue;
       }
 #endif
@@ -2620,12 +2619,15 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
       mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high);
 
       if (is_inter_block(mbmi)) {
-        const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
+        const TX_SIZE max_tx_size = get_vartx_max_txsize(
+            mbmi, plane_bsize, pd->subsampling_x || pd->subsampling_y);
         int block = 0;
         const int step =
             tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
         const int bkw = tx_size_wide_unit[max_tx_size];
         const int bkh = tx_size_high_unit[max_tx_size];
+        assert(bkw <= mu_blocks_wide);
+        assert(bkh <= mu_blocks_high);
         for (row = 0; row < num_4x4_h; row += mu_blocks_high) {
           const int unit_height = AOMMIN(mu_blocks_high + row, num_4x4_h);
           for (col = 0; col < num_4x4_w; col += mu_blocks_wide) {
@@ -2673,7 +2675,15 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
             for (blk_row = row; blk_row < unit_height; blk_row += bkh) {
               for (blk_col = col; blk_col < unit_width; blk_col += bkw) {
 #if !CONFIG_PVQ
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                TX_TYPE tx_type =
+                    av1_get_tx_type(plane ? PLANE_TYPE_UV : PLANE_TYPE_Y, xd,
+                                    blk_row, blk_col, 0, tx);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                 pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx,
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                               tx_type, is_inter_block(mbmi),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                &token_stats);
 #else
                 pack_pvq_tokens(w, x, xd, plane, bsize, tx);
@@ -2692,8 +2702,16 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
 #if CONFIG_LV_MAP
       (void)tx;
       av1_write_coeffs_mb(cm, x, w, plane);
-#else   // CONFIG_LV_MAP
-      pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats);
+#else  // CONFIG_LV_MAP
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+      TX_TYPE tx_type = av1_get_tx_type(plane ? PLANE_TYPE_UV : PLANE_TYPE_Y,
+                                        xd, blk_row, blk_col, 0, tx);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+      pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx,
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                     tx_type, is_inter_block(mbmi),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                     &token_stats);
 #endif  // CONFIG_LV_MAP
 
 #else
@@ -2718,7 +2736,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile,
 #endif  // CONFIG_COEF_INTERLEAVE
 }
 
-#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
+#if CONFIG_MOTION_VAR && NC_MODE_INFO
 static void write_tokens_sb(AV1_COMP *cpi, const TileInfo *const tile,
                             aom_writer *w, const TOKENEXTRA **tok,
                             const TOKENEXTRA *const tok_end, int mi_row,
@@ -2765,6 +2783,9 @@ static void write_tokens_sb(AV1_COMP *cpi, const TileInfo *const tile,
                         subsize);
         break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error NC_MODE_INFO+MOTION_VAR not yet supported for new HORZ/VERT_AB partitions
+#endif
       case PARTITION_HORZ_A:
         write_tokens_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
         write_tokens_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
@@ -2804,7 +2825,8 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
                supertx_enabled,
 #endif
                mi_row, mi_col);
-#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
+
+#if CONFIG_MOTION_VAR && NC_MODE_INFO
   (void)tok;
   (void)tok_end;
 #else
@@ -2829,13 +2851,6 @@ static void write_partition(const AV1_COMMON *const cm,
 #endif
                                                 bsize)
                       : 0;
-#if CONFIG_UNPOISON_PARTITION_CTX
-  const aom_prob *const probs =
-      ctx < PARTITION_CONTEXTS ? cm->fc->partition_prob[ctx] : NULL;
-#else
-  const aom_prob *const probs = cm->fc->partition_prob[ctx];
-#endif
-
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   (void)cm;
 
@@ -2843,19 +2858,26 @@ static void write_partition(const AV1_COMMON *const cm,
 
   if (has_rows && has_cols) {
 #if CONFIG_EXT_PARTITION_TYPES
-    if (bsize <= BLOCK_8X8)
-      aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES);
-    else
-      aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], EXT_PARTITION_TYPES);
+    const int num_partition_types =
+        (mi_width_log2_lookup[bsize] > mi_width_log2_lookup[BLOCK_8X8])
+            ? EXT_PARTITION_TYPES
+            : PARTITION_TYPES;
 #else
-    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES);
-#endif  // CONFIG_EXT_PARTITION_TYPES
+    const int num_partition_types = PARTITION_TYPES;
+#endif
+    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], num_partition_types);
   } else if (!has_rows && has_cols) {
     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
-    aom_write(w, p == PARTITION_SPLIT, probs[1]);
+    assert(bsize > BLOCK_8X8);
+    aom_cdf_prob cdf[2];
+    partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx]);
+    aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
   } else if (has_rows && !has_cols) {
     assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
-    aom_write(w, p == PARTITION_SPLIT, probs[2]);
+    assert(bsize > BLOCK_8X8);
+    aom_cdf_prob cdf[2];
+    partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx]);
+    aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
   } else {
     assert(p == PARTITION_SPLIT);
   }
@@ -2885,7 +2907,10 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
 #if CONFIG_EXT_PARTITION_TYPES
   const int quarter_step = mi_size_wide[bsize] / 4;
   int i;
-#endif
+#if CONFIG_EXT_PARTITION_TYPES_AB
+  const int qbs = mi_size_wide[bsize] / 4;
+#endif  // CONFIG_EXT_PARTITION_TYPES_AB
+#endif  // CONFIG_EXT_PARTITION_TYPES
   const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
   const BLOCK_SIZE subsize = get_subsize(bsize, partition);
 #if CONFIG_CB4X4
@@ -2899,7 +2924,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
   MB_MODE_INFO *mbmi;
   const int pack_token = !supertx_enabled;
   TX_SIZE supertx_size;
-  int plane;
 #endif
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
@@ -2959,6 +2983,42 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
                                mi_row + hbs, mi_col + hbs, subsize);
         break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+      case PARTITION_HORZ_A:
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row + qbs, mi_col);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row + hbs, mi_col);
+        break;
+      case PARTITION_HORZ_B:
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row + hbs, mi_col);
+        if (mi_row + 3 * qbs < cm->mi_rows)
+          write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                                mi_row + 3 * qbs, mi_col);
+        break;
+      case PARTITION_VERT_A:
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col + qbs);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col + hbs);
+        break;
+      case PARTITION_VERT_B:
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col);
+        write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                              mi_row, mi_col + hbs);
+        if (mi_col + 3 * qbs < cm->mi_cols)
+          write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+                                mi_row, mi_col + 3 * qbs);
+        break;
+#else
       case PARTITION_HORZ_A:
         write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
                               mi_row, mi_col);
@@ -2991,6 +3051,7 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
         write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
                               mi_row + hbs, mi_col + hbs);
         break;
+#endif
       case PARTITION_HORZ_4:
         for (i = 0; i < 4; ++i) {
           int this_mi_row = mi_row + i * quarter_step;
@@ -3039,10 +3100,12 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
         !skip) {
       const int eset =
           get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used);
+      const int tx_set_type =
+          get_ext_tx_set_type(supertx_size, bsize, 1, cm->reduced_tx_set_used);
       if (eset > 0) {
-        aom_write_symbol(w, av1_ext_tx_inter_ind[eset][mbmi->tx_type],
+        aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][mbmi->tx_type],
                          ec_ctx->inter_ext_tx_cdf[eset][supertx_size],
-                         ext_tx_cnt_inter[eset]);
+                         av1_num_ext_tx_set[tx_set_type]);
       }
     }
 #else
@@ -3054,7 +3117,11 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
 
     if (!skip) {
       assert(*tok < tok_end);
-      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+        TX_TYPE tx_type = av1_get_tx_type(plane ? PLANE_TYPE_UV : PLANE_TYPE_Y,
+                                          xd, blk_row, blk_col, block, tx_size);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
         const struct macroblockd_plane *const pd = &xd->plane[plane];
         const int mbmi_txb_size = txsize_to_bsize[mbmi->tx_size];
         const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi_txb_size, pd);
@@ -3073,7 +3140,11 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
         token_stats.cost = 0;
         for (row = 0; row < max_blocks_high; row += stepr)
           for (col = 0; col < max_blocks_wide; col += stepc)
-            pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx, &token_stats);
+            pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx,
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                           tx_type, is_inter_block(mbmi),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                           &token_stats);
         assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
         (*tok)++;
       }
@@ -3096,6 +3167,61 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
+#if CONFIG_LPF_SB
+  // send filter level for each superblock (64x64)
+  if (bsize == cm->sb_size) {
+    if (mi_row == 0 && mi_col == 0) {
+      aom_write_literal(w, cm->mi_grid_visible[0]->mbmi.filt_lvl, 6);
+      cm->mi_grid_visible[0]->mbmi.reuse_sb_lvl = 0;
+      cm->mi_grid_visible[0]->mbmi.delta = 0;
+      cm->mi_grid_visible[0]->mbmi.sign = 0;
+    } else {
+      int prev_mi_row, prev_mi_col;
+      if (mi_col - MAX_MIB_SIZE < 0) {
+        prev_mi_row = mi_row - MAX_MIB_SIZE;
+        prev_mi_col = mi_col;
+      } else {
+        prev_mi_row = mi_row;
+        prev_mi_col = mi_col - MAX_MIB_SIZE;
+      }
+      MB_MODE_INFO *curr_mbmi =
+          &cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi;
+      MB_MODE_INFO *prev_mbmi =
+          &cm->mi_grid_visible[prev_mi_row * cm->mi_stride + prev_mi_col]->mbmi;
+
+      const uint8_t curr_lvl = curr_mbmi->filt_lvl;
+      const uint8_t prev_lvl = prev_mbmi->filt_lvl;
+
+      const int reuse_prev_lvl = curr_lvl == prev_lvl;
+      const int reuse_ctx = prev_mbmi->reuse_sb_lvl;
+      curr_mbmi->reuse_sb_lvl = reuse_prev_lvl;
+      aom_write_symbol(w, reuse_prev_lvl,
+                       xd->tile_ctx->lpf_reuse_cdf[reuse_ctx], 2);
+
+      if (reuse_prev_lvl) {
+        curr_mbmi->delta = 0;
+        curr_mbmi->sign = 0;
+      } else {
+        const unsigned int delta = abs(curr_lvl - prev_lvl) / LPF_STEP;
+        const int delta_ctx = prev_mbmi->delta;
+        curr_mbmi->delta = delta;
+        aom_write_symbol(w, delta, xd->tile_ctx->lpf_delta_cdf[delta_ctx],
+                         DELTA_RANGE);
+
+        if (delta) {
+          const int sign = curr_lvl > prev_lvl;
+          const int sign_ctx = prev_mbmi->sign;
+          curr_mbmi->sign = sign;
+          aom_write_symbol(w, sign,
+                           xd->tile_ctx->lpf_sign_cdf[reuse_ctx][sign_ctx], 2);
+        } else {
+          curr_mbmi->sign = 0;
+        }
+      }
+    }
+  }
+#endif
+
 #if CONFIG_CDEF
   if (bsize == cm->sb_size && cm->cdef_bits != 0 && !cm->all_lossless) {
     int width_step = mi_size_wide[BLOCK_64X64];
@@ -3109,14 +3235,30 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
            width += width_step) {
         if (!sb_all_skip(cm, mi_row + height, mi_col + width))
           aom_write_literal(
-              w, cm->mi_grid_visible[(mi_row + height) * cm->mi_stride +
-                                     (mi_col + width)]
-                     ->mbmi.cdef_strength,
+              w,
+              cm->mi_grid_visible[(mi_row + height) * cm->mi_stride +
+                                  (mi_col + width)]
+                  ->mbmi.cdef_strength,
               cm->cdef_bits);
       }
     }
   }
 #endif
+#if CONFIG_LOOP_RESTORATION
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    int rcol0, rcol1, rrow0, rrow1, nhtiles;
+    if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
+                                           &rcol0, &rcol1, &rrow0, &rrow1,
+                                           &nhtiles)) {
+      for (int rrow = rrow0; rrow < rrow1; ++rrow) {
+        for (int rcol = rcol0; rcol < rcol1; ++rcol) {
+          int rtile_idx = rcol + rrow * nhtiles;
+          loop_restoration_write_sb_coeffs(cm, xd, w, plane, rtile_idx);
+        }
+      }
+    }
+  }
+#endif
 }
 
 static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
@@ -3141,16 +3283,18 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
 #if CONFIG_PVQ
   assert(cpi->td.mb.pvq_q->curr_pos == 0);
 #endif
-#if CONFIG_DELTA_Q
   if (cpi->common.delta_q_present_flag) {
     xd->prev_qindex = cpi->common.base_qindex;
 #if CONFIG_EXT_DELTA_Q
     if (cpi->common.delta_lf_present_flag) {
+#if CONFIG_LOOPFILTER_LEVEL
+      for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+        xd->prev_delta_lf[lf_id] = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
       xd->prev_delta_lf_from_base = 0;
     }
 #endif  // CONFIG_EXT_DELTA_Q
   }
-#endif
 
   for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += cm->mib_size) {
     av1_zero_left_context(xd);
@@ -3158,7 +3302,7 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
     for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) {
       write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0, mi_row, mi_col,
                              cm->sb_size);
-#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
+#if CONFIG_MOTION_VAR && NC_MODE_INFO
       write_tokens_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, cm->sb_size);
 #endif
     }
@@ -3224,8 +3368,9 @@ static void encode_restoration_mode(AV1_COMMON *cm,
   int s = AOMMIN(cm->subsampling_x, cm->subsampling_y);
   if (s && (cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
             cm->rst_info[2].frame_restoration_type != RESTORE_NONE)) {
-    aom_wb_write_bit(wb, cm->rst_info[1].restoration_tilesize !=
-                             cm->rst_info[0].restoration_tilesize);
+    aom_wb_write_bit(wb,
+                     cm->rst_info[1].restoration_tilesize !=
+                         cm->rst_info[0].restoration_tilesize);
     assert(cm->rst_info[1].restoration_tilesize ==
                cm->rst_info[0].restoration_tilesize ||
            cm->rst_info[1].restoration_tilesize ==
@@ -3240,13 +3385,17 @@ static void encode_restoration_mode(AV1_COMMON *cm,
   }
 }
 
-static void write_wiener_filter(WienerInfo *wiener_info,
+static void write_wiener_filter(int wiener_win, WienerInfo *wiener_info,
                                 WienerInfo *ref_wiener_info, aom_writer *wb) {
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-      WIENER_FILT_TAP0_SUBEXP_K,
-      ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
-      wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
+  if (wiener_win == WIENER_WIN)
+    aom_write_primitive_refsubexpfin(
+        wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+        WIENER_FILT_TAP0_SUBEXP_K,
+        ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
+        wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
+  else
+    assert(wiener_info->vfilter[0] == 0 &&
+           wiener_info->vfilter[WIENER_WIN - 1] == 0);
   aom_write_primitive_refsubexpfin(
       wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
       WIENER_FILT_TAP1_SUBEXP_K,
@@ -3257,11 +3406,15 @@ static void write_wiener_filter(WienerInfo *wiener_info,
       WIENER_FILT_TAP2_SUBEXP_K,
       ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
       wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
-  aom_write_primitive_refsubexpfin(
-      wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-      WIENER_FILT_TAP0_SUBEXP_K,
-      ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
-      wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
+  if (wiener_win == WIENER_WIN)
+    aom_write_primitive_refsubexpfin(
+        wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+        WIENER_FILT_TAP0_SUBEXP_K,
+        ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
+        wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
+  else
+    assert(wiener_info->hfilter[0] == 0 &&
+           wiener_info->hfilter[WIENER_WIN - 1] == 0);
   aom_write_primitive_refsubexpfin(
       wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
       WIENER_FILT_TAP1_SUBEXP_K,
@@ -3290,99 +3443,63 @@ static void write_sgrproj_filter(SgrprojInfo *sgrproj_info,
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
 
-static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) {
-  int i, p;
-#if CONFIG_FRAME_SUPERRES
-  const int width = cm->superres_upscaled_width;
-  const int height = cm->superres_upscaled_height;
-#else
-  const int width = cm->width;
-  const int height = cm->height;
-#endif  // CONFIG_FRAME_SUPERRES
-  const int ntiles =
-      av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
-                          NULL, NULL, NULL, NULL);
-  WienerInfo ref_wiener_info;
-  SgrprojInfo ref_sgrproj_info;
-  set_default_wiener(&ref_wiener_info);
-  set_default_sgrproj(&ref_sgrproj_info);
-  const int ntiles_uv = av1_get_rest_ntiles(
-      ROUND_POWER_OF_TWO(width, cm->subsampling_x),
-      ROUND_POWER_OF_TWO(height, cm->subsampling_y),
-      cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL);
-  RestorationInfo *rsi = &cm->rst_info[0];
-  if (rsi->frame_restoration_type != RESTORE_NONE) {
-    if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
-      // RESTORE_SWITCHABLE
-      for (i = 0; i < ntiles; ++i) {
-        av1_write_token(
-            wb, av1_switchable_restore_tree, cm->fc->switchable_restore_prob,
-            &switchable_restore_encodings[rsi->restoration_type[i]]);
-        if (rsi->restoration_type[i] == RESTORE_WIENER) {
-          write_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, wb);
-        } else if (rsi->restoration_type[i] == RESTORE_SGRPROJ) {
-          write_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, wb);
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
-      for (i = 0; i < ntiles; ++i) {
-        aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE,
-                  RESTORE_NONE_WIENER_PROB);
-        if (rsi->restoration_type[i] != RESTORE_NONE) {
-          write_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, wb);
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
-      for (i = 0; i < ntiles; ++i) {
-        aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE,
-                  RESTORE_NONE_SGRPROJ_PROB);
-        if (rsi->restoration_type[i] != RESTORE_NONE) {
-          write_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, wb);
-        }
-      }
+static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
+                                             MACROBLOCKD *xd,
+                                             aom_writer *const w, int plane,
+                                             int rtile_idx) {
+  const RestorationInfo *rsi = cm->rst_info + plane;
+  if (rsi->frame_restoration_type == RESTORE_NONE) return;
+
+  const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
+  WienerInfo *wiener_info = xd->wiener_info + plane;
+  SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane;
+
+  if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
+    assert(plane == 0);
+    av1_write_token(
+        w, av1_switchable_restore_tree, cm->fc->switchable_restore_prob,
+        &switchable_restore_encodings[rsi->restoration_type[rtile_idx]]);
+    if (rsi->restoration_type[rtile_idx] == RESTORE_WIENER) {
+      write_wiener_filter(wiener_win, &rsi->wiener_info[rtile_idx], wiener_info,
+                          w);
+    } else if (rsi->restoration_type[rtile_idx] == RESTORE_SGRPROJ) {
+      write_sgrproj_filter(&rsi->sgrproj_info[rtile_idx], sgrproj_info, w);
     }
-  }
-  for (p = 1; p < MAX_MB_PLANE; ++p) {
-    set_default_wiener(&ref_wiener_info);
-    set_default_sgrproj(&ref_sgrproj_info);
-    rsi = &cm->rst_info[p];
-    if (rsi->frame_restoration_type == RESTORE_WIENER) {
-      for (i = 0; i < ntiles_uv; ++i) {
-        if (ntiles_uv > 1)
-          aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE,
-                    RESTORE_NONE_WIENER_PROB);
-        if (rsi->restoration_type[i] != RESTORE_NONE) {
-          write_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, wb);
-        }
-      }
-    } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
-      for (i = 0; i < ntiles_uv; ++i) {
-        if (ntiles_uv > 1)
-          aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE,
-                    RESTORE_NONE_SGRPROJ_PROB);
-        if (rsi->restoration_type[i] != RESTORE_NONE) {
-          write_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, wb);
-        }
-      }
-    } else if (rsi->frame_restoration_type != RESTORE_NONE) {
-      assert(0);
+  } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
+    aom_write(w, rsi->restoration_type[rtile_idx] != RESTORE_NONE,
+              RESTORE_NONE_WIENER_PROB);
+    if (rsi->restoration_type[rtile_idx] != RESTORE_NONE) {
+      write_wiener_filter(wiener_win, &rsi->wiener_info[rtile_idx], wiener_info,
+                          w);
+    }
+  } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
+    aom_write(w, rsi->restoration_type[rtile_idx] != RESTORE_NONE,
+              RESTORE_NONE_SGRPROJ_PROB);
+    if (rsi->restoration_type[rtile_idx] != RESTORE_NONE) {
+      write_sgrproj_filter(&rsi->sgrproj_info[rtile_idx], sgrproj_info, w);
     }
   }
 }
+
 #endif  // CONFIG_LOOP_RESTORATION
 
 static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
   int i;
   struct loopfilter *lf = &cm->lf;
 
-  // Encode the loop filter level and type
-  aom_wb_write_literal(wb, lf->filter_level, 6);
-#if CONFIG_UV_LVL
-  if (lf->filter_level > 0) {
+// Encode the loop filter level and type
+#if !CONFIG_LPF_SB
+#if CONFIG_LOOPFILTER_LEVEL
+  aom_wb_write_literal(wb, lf->filter_level[0], 6);
+  aom_wb_write_literal(wb, lf->filter_level[1], 6);
+  if (lf->filter_level[0] || lf->filter_level[1]) {
     aom_wb_write_literal(wb, lf->filter_level_u, 6);
     aom_wb_write_literal(wb, lf->filter_level_v, 6);
   }
-#endif
+#else
+  aom_wb_write_literal(wb, lf->filter_level, 6);
+#endif  // CONFIG_LOOPFILTER_LEVEL
+#endif  // CONFIG_LPF_SB
   aom_wb_write_literal(wb, lf->sharpness_level, 3);
 
   // Write out loop filter deltas applied at the MB level based on mode or
@@ -3418,12 +3535,18 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
 #if CONFIG_CDEF
 static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
   int i;
-  aom_wb_write_literal(wb, cm->cdef_dering_damping - 5, 1);
-  aom_wb_write_literal(wb, cm->cdef_clpf_damping - 3, 2);
+#if CONFIG_CDEF_SINGLEPASS
+  aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
+  assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
+#else
+  aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
+  aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
+#endif
   aom_wb_write_literal(wb, cm->cdef_bits, 2);
   for (i = 0; i < cm->nb_cdef_strengths; i++) {
     aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
-    aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
+    if (cm->subsampling_x == cm->subsampling_y)
+      aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
   }
 }
 #endif
@@ -3564,6 +3687,72 @@ static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) {
   }
 }
 
+#if CONFIG_MAX_TILE
+
+// Same function as write_uniform but writing to uncompresses header wb
+static void wb_write_uniform(struct aom_write_bit_buffer *wb, int n, int v) {
+  const int l = get_unsigned_bits(n);
+  const int m = (1 << l) - n;
+  if (l == 0) return;
+  if (v < m) {
+    aom_wb_write_literal(wb, v, l - 1);
+  } else {
+    aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1);
+    aom_wb_write_literal(wb, (v - m) & 1, 1);
+  }
+}
+
+static void write_tile_info_max_tile(const AV1_COMMON *const cm,
+                                     struct aom_write_bit_buffer *wb) {
+  int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  int width_sb = width_mi >> MAX_MIB_SIZE_LOG2;
+  int height_sb = height_mi >> MAX_MIB_SIZE_LOG2;
+  int size_sb, i;
+
+  aom_wb_write_bit(wb, cm->uniform_tile_spacing_flag);
+
+  if (cm->uniform_tile_spacing_flag) {
+    // Uniform spaced tiles with power-of-two number of rows and columns
+    // tile columns
+    int ones = cm->log2_tile_cols - cm->min_log2_tile_cols;
+    while (ones--) {
+      aom_wb_write_bit(wb, 1);
+    }
+    if (cm->log2_tile_cols < cm->max_log2_tile_cols) {
+      aom_wb_write_bit(wb, 0);
+    }
+
+    // rows
+    ones = cm->log2_tile_rows - cm->min_log2_tile_rows;
+    while (ones--) {
+      aom_wb_write_bit(wb, 1);
+    }
+    if (cm->log2_tile_rows < cm->max_log2_tile_rows) {
+      aom_wb_write_bit(wb, 0);
+    }
+  } else {
+    // Explicit tiles with configurable tile widths and heights
+    // columns
+    for (i = 0; i < cm->tile_cols; i++) {
+      size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i];
+      wb_write_uniform(wb, AOMMIN(width_sb, MAX_TILE_WIDTH_SB), size_sb - 1);
+      width_sb -= size_sb;
+    }
+    assert(width_sb == 0);
+
+    // rows
+    for (i = 0; i < cm->tile_rows; i++) {
+      size_sb = cm->tile_row_start_sb[i + 1] - cm->tile_row_start_sb[i];
+      wb_write_uniform(wb, AOMMIN(height_sb, cm->max_tile_height_sb),
+                       size_sb - 1);
+      height_sb -= size_sb;
+    }
+    assert(height_sb == 0);
+  }
+}
+#endif
+
 static void write_tile_info(const AV1_COMMON *const cm,
                             struct aom_write_bit_buffer *wb) {
 #if CONFIG_EXT_TILE
@@ -3596,20 +3785,25 @@ static void write_tile_info(const AV1_COMMON *const cm,
 #endif  // CONFIG_EXT_PARTITION
   } else {
 #endif  // CONFIG_EXT_TILE
-    int min_log2_tile_cols, max_log2_tile_cols, ones;
-    av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
-    // columns
-    ones = cm->log2_tile_cols - min_log2_tile_cols;
-    while (ones--) aom_wb_write_bit(wb, 1);
+#if CONFIG_MAX_TILE
+    write_tile_info_max_tile(cm, wb);
+#else
+  int min_log2_tile_cols, max_log2_tile_cols, ones;
+  av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
 
-    if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0);
+  // columns
+  ones = cm->log2_tile_cols - min_log2_tile_cols;
+  while (ones--) aom_wb_write_bit(wb, 1);
 
-    // rows
-    aom_wb_write_bit(wb, cm->log2_tile_rows != 0);
-    if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1);
+  if (cm->log2_tile_cols < max_log2_tile_cols) aom_wb_write_bit(wb, 0);
+
+  // rows
+  aom_wb_write_bit(wb, cm->log2_tile_rows != 0);
+  if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1);
+#endif
 #if CONFIG_DEPENDENT_HORZTILES
-    if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
+    if (cm->tile_rows > 1) aom_wb_write_bit(wb, cm->dependent_horz_tiles);
 #endif
 #if CONFIG_EXT_TILE
   }
@@ -3620,10 +3814,30 @@ static void write_tile_info(const AV1_COMMON *const cm,
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 
-static int get_refresh_mask(AV1_COMP *cpi) {
+#if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+static int get_refresh_mask_gf16(AV1_COMP *cpi) {
   int refresh_mask = 0;
 
+  if (cpi->refresh_last_frame || cpi->refresh_golden_frame ||
+      cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame ||
+      cpi->refresh_alt_ref_frame) {
+    assert(cpi->refresh_fb_idx >= 0 && cpi->refresh_fb_idx < REF_FRAMES);
+    refresh_mask |= (1 << cpi->refresh_fb_idx);
+  }
+
+  return refresh_mask;
+}
+#endif  // USE_GF16_MULTI_LAYER
+#endif  // CONFIG_EXT_REFS
+
+static int get_refresh_mask(AV1_COMP *cpi) {
+  int refresh_mask = 0;
 #if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+  if (cpi->rc.baseline_gf_interval == 16) return get_refresh_mask_gf16(cpi);
+#endif  // USE_GF16_MULTI_LAYER
+
   // NOTE(zoeliu): When LAST_FRAME is to get refreshed, the decoder will be
   // notified to get LAST3_FRAME refreshed and then the virtual indexes for all
   // the 3 LAST reference frames will be updated accordingly, i.e.:
@@ -3634,13 +3848,10 @@ static int get_refresh_mask(AV1_COMP *cpi) {
   //     LAST3_FRAME.
   refresh_mask |=
       (cpi->refresh_last_frame << cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]);
-  if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
-    // We have swapped the virtual indices
-    refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->arf_map[0]);
-  } else {
-    refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->bwd_fb_idx);
-  }
-#else
+
+  refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->bwd_fb_idx);
+  refresh_mask |= (cpi->refresh_alt2_ref_frame << cpi->alt2_fb_idx);
+#else   // !CONFIG_EXT_REFS
   refresh_mask |= (cpi->refresh_last_frame << cpi->lst_fb_idx);
 #endif  // CONFIG_EXT_REFS
 
@@ -3658,9 +3869,8 @@ static int get_refresh_mask(AV1_COMP *cpi) {
     return refresh_mask | (cpi->refresh_golden_frame << cpi->alt_fb_idx);
   } else {
 #if CONFIG_EXT_REFS
-    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    int arf_idx = cpi->arf_map[gf_group->arf_update_idx[gf_group->index]];
-#else
+    const int arf_idx = cpi->alt_fb_idx;
+#else   // !CONFIG_EXT_REFS
     int arf_idx = cpi->alt_fb_idx;
     if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
@@ -3725,15 +3935,12 @@ static INLINE int find_identical_tile(
 }
 #endif  // CONFIG_EXT_TILE
 
+#if !CONFIG_OBU || CONFIG_EXT_TILE
 static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
                             unsigned int *max_tile_size,
                             unsigned int *max_tile_col_size) {
   const AV1_COMMON *const cm = &cpi->common;
-#if CONFIG_ANS
-  struct BufAnsCoder *buf_ans = &cpi->buf_ans;
-#else
   aom_writer mode_bc;
-#endif  // CONFIG_ANS
   int tile_row, tile_col;
   TOKENEXTRA *(*const tok_buffers)[MAX_TILE_COLS] = cpi->tile_tok;
   TileBufferEnc(*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
@@ -3744,7 +3951,7 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
   const int have_tiles = tile_cols * tile_rows > 1;
   struct aom_write_bit_buffer wb = { dst, 0 };
   const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
-  uint32_t comp_hdr_size;
+  uint32_t compressed_hdr_size;
   // Fixed size tile groups for the moment
   const int num_tg_hdrs = cm->num_tg;
   const int tg_size =
@@ -3759,7 +3966,6 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
   int tile_size_bytes = 4;
   int tile_col_size_bytes;
   uint32_t uncompressed_hdr_size = 0;
-  struct aom_write_bit_buffer comp_hdr_len_wb;
   struct aom_write_bit_buffer tg_params_wb;
   struct aom_write_bit_buffer tile_size_bytes_wb;
   uint32_t saved_offset;
@@ -3806,19 +4012,14 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
         cpi->td.mb.pvq_q = &this_tile->pvq_q;
         cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
 #endif  // CONFIG_PVQ
-#if !CONFIG_ANS
+#if CONFIG_ANS
+        mode_bc.size = 1 << cpi->common.ans_window_size_log2;
+#endif
         aom_start_encode(&mode_bc, buf->data + data_offset);
         write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
         assert(tok == tok_end);
         aom_stop_encode(&mode_bc);
         tile_size = mode_bc.pos;
-#else
-        buf_ans_write_init(buf_ans, buf->data + data_offset);
-        write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
-        assert(tok == tok_end);
-        aom_buf_ans_flush(buf_ans);
-        tile_size = buf_ans_write_end(buf_ans);
-#endif  // !CONFIG_ANS
 #if CONFIG_PVQ
         cpi->td.mb.pvq_q = NULL;
 #endif
@@ -3866,7 +4067,7 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
     }
   } else {
 #endif  // CONFIG_EXT_TILE
-    write_uncompressed_header(cpi, &wb);
+    write_uncompressed_header_frame(cpi, &wb);
 
 #if CONFIG_EXT_REFS
     if (cm->show_existing_frame) {
@@ -3887,14 +4088,22 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
       aom_wb_overwrite_literal(&wb, (1 << n_log2_tiles) - 1, n_log2_tiles);
     }
 
-    /* Write a placeholder for the compressed header length */
-    comp_hdr_len_wb = wb;
-    aom_wb_write_literal(&wb, 0, 16);
+    if (!use_compressed_header(cm)) {
+      uncompressed_hdr_size = aom_wb_bytes_written(&wb);
+      compressed_hdr_size = 0;
+    } else {
+      /* Write a placeholder for the compressed header length */
+      struct aom_write_bit_buffer comp_hdr_len_wb = wb;
+      aom_wb_write_literal(&wb, 0, 16);
+
+      uncompressed_hdr_size = aom_wb_bytes_written(&wb);
+      compressed_hdr_size =
+          write_compressed_header(cpi, dst + uncompressed_hdr_size);
+      aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(compressed_hdr_size),
+                               16);
+    }
 
-    uncompressed_hdr_size = aom_wb_bytes_written(&wb);
-    comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size);
-    aom_wb_overwrite_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16);
-    hdr_size = uncompressed_hdr_size + comp_hdr_size;
+    hdr_size = uncompressed_hdr_size + compressed_hdr_size;
     total_size += hdr_size;
 
     for (tile_row = 0; tile_row < tile_rows; tile_row++) {
@@ -3938,7 +4147,7 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
             // Copy compressed header
             memmove(dst + old_total_size + uncompressed_hdr_size,
                     dst + uncompressed_hdr_size,
-                    comp_hdr_size * sizeof(uint8_t));
+                    compressed_hdr_size * sizeof(uint8_t));
             total_size += hdr_size;
             tile_count = 1;
             curr_tg_data_size = hdr_size + tile_size + 4;
@@ -3957,7 +4166,7 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
             // Copy compressed header
             memmove(dst + total_size + uncompressed_hdr_size,
                     dst + uncompressed_hdr_size,
-                    comp_hdr_size * sizeof(uint8_t));
+                    compressed_hdr_size * sizeof(uint8_t));
             total_size += hdr_size;
             tile_count = 0;
             curr_tg_data_size = hdr_size;
@@ -3982,22 +4191,24 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
         cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
 #endif  // CONFIG_PVQ
 #if CONFIG_ANS
-        buf_ans_write_init(buf_ans, dst + total_size);
-        write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
-        assert(tok == tok_end);
-        aom_buf_ans_flush(buf_ans);
-        tile_size = buf_ans_write_end(buf_ans);
-#else
-      aom_start_encode(&mode_bc, dst + total_size);
-      write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+        mode_bc.size = 1 << cpi->common.ans_window_size_log2;
+#endif  // CONFIG_ANS
+#if CONFIG_LOOP_RESTORATION
+        for (int p = 0; p < MAX_MB_PLANE; ++p) {
+          set_default_wiener(cpi->td.mb.e_mbd.wiener_info + p);
+          set_default_sgrproj(cpi->td.mb.e_mbd.sgrproj_info + p);
+        }
+#endif  // CONFIG_LOOP_RESTORATION
+
+        aom_start_encode(&mode_bc, dst + total_size);
+        write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
 #if !CONFIG_LV_MAP
 #if !CONFIG_PVQ
-      assert(tok == tok_end);
+        assert(tok == tok_end);
 #endif  // !CONFIG_PVQ
 #endif  // !CONFIG_LV_MAP
-      aom_stop_encode(&mode_bc);
-      tile_size = mode_bc.pos;
-#endif  // CONFIG_ANS
+        aom_stop_encode(&mode_bc);
+        tile_size = mode_bc.pos;
 #if CONFIG_PVQ
         cpi->td.mb.pvq_q = NULL;
 #endif
@@ -4018,18 +4229,20 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
     }
     // Write the final tile group size
     if (n_log2_tiles) {
-      aom_wb_overwrite_literal(&tg_params_wb, (1 << n_log2_tiles) - tile_count,
-                               n_log2_tiles);
+      aom_wb_overwrite_literal(
+          &tg_params_wb, (tile_cols * tile_rows) - tile_count, n_log2_tiles);
       aom_wb_overwrite_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
     }
     // Remux if possible. TODO (Thomas Davies): do this for more than one tile
     // group
     if (have_tiles && tg_count == 1) {
-      int data_size = total_size - (uncompressed_hdr_size + comp_hdr_size);
-      data_size = remux_tiles(cm, dst + uncompressed_hdr_size + comp_hdr_size,
-                              data_size, *max_tile_size, *max_tile_col_size,
-                              &tile_size_bytes, &tile_col_size_bytes);
-      total_size = data_size + uncompressed_hdr_size + comp_hdr_size;
+      int data_size =
+          total_size - (uncompressed_hdr_size + compressed_hdr_size);
+      data_size =
+          remux_tiles(cm, dst + uncompressed_hdr_size + compressed_hdr_size,
+                      data_size, *max_tile_size, *max_tile_col_size,
+                      &tile_size_bytes, &tile_col_size_bytes);
+      total_size = data_size + uncompressed_hdr_size + compressed_hdr_size;
       aom_wb_overwrite_literal(&tile_size_bytes_wb, tile_size_bytes - 1, 2);
     }
 
@@ -4038,6 +4251,7 @@ static uint32_t write_tiles(AV1_COMP *const cpi, uint8_t *const dst,
 #endif  // CONFIG_EXT_TILE
   return (uint32_t)total_size;
 }
+#endif
 
 static void write_render_size(const AV1_COMMON *cm,
                               struct aom_write_bit_buffer *wb) {
@@ -4053,12 +4267,12 @@ static void write_render_size(const AV1_COMMON *cm,
 static void write_superres_scale(const AV1_COMMON *const cm,
                                  struct aom_write_bit_buffer *wb) {
   // First bit is whether to to scale or not
-  if (cm->superres_scale_numerator == SCALE_DENOMINATOR) {
+  if (cm->superres_scale_denominator == SCALE_NUMERATOR) {
     aom_wb_write_bit(wb, 0);  // no scaling
   } else {
     aom_wb_write_bit(wb, 1);  // scaling, write scale factor
     aom_wb_write_literal(
-        wb, cm->superres_scale_numerator - SUPERRES_SCALE_NUMERATOR_MIN,
+        wb, cm->superres_scale_denominator - SUPERRES_SCALE_DENOMINATOR_MIN,
         SUPERRES_SCALE_BITS);
   }
 }
@@ -4109,12 +4323,6 @@ static void write_frame_size_with_refs(AV1_COMP *cpi,
   if (!found) write_frame_size(cm, wb);
 }
 
-static void write_sync_code(struct aom_write_bit_buffer *wb) {
-  aom_wb_write_literal(wb, AV1_SYNC_CODE_0, 8);
-  aom_wb_write_literal(wb, AV1_SYNC_CODE_1, 8);
-  aom_wb_write_literal(wb, AV1_SYNC_CODE_2, 8);
-}
-
 static void write_profile(BITSTREAM_PROFILE profile,
                           struct aom_write_bit_buffer *wb) {
   switch (profile) {
@@ -4161,11 +4369,9 @@ static void write_bitdepth_colorspace_sampling(
 }
 
 #if CONFIG_REFERENCE_BUFFER
-void write_sequence_header(
-#if CONFIG_EXT_TILE
-    AV1_COMMON *const cm,
-#endif  // CONFIG_EXT_TILE
-    SequenceHeader *seq_params) {
+void write_sequence_header(AV1_COMMON *const cm,
+                           struct aom_write_bit_buffer *wb) {
+  SequenceHeader *seq_params = &cm->seq_params;
   /* Placeholder for actually writing to the bitstream */
   seq_params->frame_id_numbers_present_flag =
 #if CONFIG_EXT_TILE
@@ -4174,10 +4380,29 @@ void write_sequence_header(
                            FRAME_ID_NUMBERS_PRESENT_FLAG;
   seq_params->frame_id_length_minus7 = FRAME_ID_LENGTH_MINUS7;
   seq_params->delta_frame_id_length_minus2 = DELTA_FRAME_ID_LENGTH_MINUS2;
+
+  aom_wb_write_bit(wb, seq_params->frame_id_numbers_present_flag);
+  if (seq_params->frame_id_numbers_present_flag) {
+    aom_wb_write_literal(wb, seq_params->frame_id_length_minus7, 4);
+    aom_wb_write_literal(wb, seq_params->delta_frame_id_length_minus2, 4);
+  }
+}
+#endif  // CONFIG_REFERENCE_BUFFER
+
+static void write_sb_size(const AV1_COMMON *cm,
+                          struct aom_write_bit_buffer *wb) {
+  (void)cm;
+  (void)wb;
+  assert(cm->mib_size == mi_size_wide[cm->sb_size]);
+  assert(cm->mib_size == 1 << cm->mib_size_log2);
+#if CONFIG_EXT_PARTITION
+  assert(cm->sb_size == BLOCK_128X128 || cm->sb_size == BLOCK_64X64);
+  aom_wb_write_bit(wb, cm->sb_size == BLOCK_128X128 ? 1 : 0);
+#else
+  assert(cm->sb_size == BLOCK_64X64);
+#endif  // CONFIG_EXT_PARTITION
 }
-#endif
 
-#if CONFIG_EXT_INTER
 static void write_compound_tools(const AV1_COMMON *cm,
                                  struct aom_write_bit_buffer *wb) {
   (void)cm;
@@ -4201,22 +4426,129 @@ static void write_compound_tools(const AV1_COMMON *cm,
   }
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 }
-#endif  // CONFIG_EXT_INTER
 
-static void write_uncompressed_header(AV1_COMP *cpi,
-                                      struct aom_write_bit_buffer *wb) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+#if CONFIG_GLOBAL_MOTION
+static void write_global_motion_params(const WarpedMotionParams *params,
+                                       const WarpedMotionParams *ref_params,
+                                       struct aom_write_bit_buffer *wb,
+                                       int allow_hp) {
+  TransformationType type = params->wmtype;
+  int trans_bits;
+  int trans_prec_diff;
 
-#if CONFIG_REFERENCE_BUFFER
-  /* TODO: Move outside frame loop or inside key-frame branch */
-  write_sequence_header(
-#if CONFIG_EXT_TILE
-      cm,
-#endif  // CONFIG_EXT_TILE
-      &cpi->seq_params);
+  aom_wb_write_bit(wb, type != IDENTITY);
+  if (type != IDENTITY) {
+#if GLOBAL_TRANS_TYPES > 4
+    aom_wb_write_literal(wb, type - 1, GLOBAL_TYPE_BITS);
+#else
+    aom_wb_write_bit(wb, type == ROTZOOM);
+    if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION);
+#endif  // GLOBAL_TRANS_TYPES > 4
+  }
+
+  switch (type) {
+    case HOMOGRAPHY:
+    case HORTRAPEZOID:
+    case VERTRAPEZOID:
+      if (type != HORTRAPEZOID)
+        aom_wb_write_signed_primitive_refsubexpfin(
+            wb, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
+            (ref_params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF),
+            (params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF));
+      if (type != VERTRAPEZOID)
+        aom_wb_write_signed_primitive_refsubexpfin(
+            wb, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
+            (ref_params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF),
+            (params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF));
+    // fallthrough intended
+    case AFFINE:
+    case ROTZOOM:
+      aom_wb_write_signed_primitive_refsubexpfin(
+          wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+          (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
+              (1 << GM_ALPHA_PREC_BITS),
+          (params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
+      if (type != VERTRAPEZOID)
+        aom_wb_write_signed_primitive_refsubexpfin(
+            wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+            (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF),
+            (params->wmmat[3] >> GM_ALPHA_PREC_DIFF));
+      if (type >= AFFINE) {
+        if (type != HORTRAPEZOID)
+          aom_wb_write_signed_primitive_refsubexpfin(
+              wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+              (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF),
+              (params->wmmat[4] >> GM_ALPHA_PREC_DIFF));
+        aom_wb_write_signed_primitive_refsubexpfin(
+            wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
+            (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
+                (1 << GM_ALPHA_PREC_BITS),
+            (params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
+                (1 << GM_ALPHA_PREC_BITS));
+      }
+    // fallthrough intended
+    case TRANSLATION:
+      trans_bits = (type == TRANSLATION) ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
+                                         : GM_ABS_TRANS_BITS;
+      trans_prec_diff = (type == TRANSLATION)
+                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
+                            : GM_TRANS_PREC_DIFF;
+      aom_wb_write_signed_primitive_refsubexpfin(
+          wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+          (ref_params->wmmat[0] >> trans_prec_diff),
+          (params->wmmat[0] >> trans_prec_diff));
+      aom_wb_write_signed_primitive_refsubexpfin(
+          wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+          (ref_params->wmmat[1] >> trans_prec_diff),
+          (params->wmmat[1] >> trans_prec_diff));
+      break;
+    case IDENTITY: break;
+    default: assert(0);
+  }
+}
+
+static void write_global_motion(AV1_COMP *cpi,
+                                struct aom_write_bit_buffer *wb) {
+  AV1_COMMON *const cm = &cpi->common;
+  int frame;
+  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+    const WarpedMotionParams *ref_params =
+        cm->error_resilient_mode ? &default_warp_params
+                                 : &cm->prev_frame->global_motion[frame];
+    write_global_motion_params(&cm->global_motion[frame], ref_params, wb,
+                               cm->allow_high_precision_mv);
+    // TODO(sarahparker, debargha): The logic in the commented out code below
+    // does not work currently and causes mismatches when resize is on.
+    // Fix it before turning the optimization back on.
+    /*
+    YV12_BUFFER_CONFIG *ref_buf = get_ref_frame_buffer(cpi, frame);
+    if (cpi->source->y_crop_width == ref_buf->y_crop_width &&
+        cpi->source->y_crop_height == ref_buf->y_crop_height) {
+      write_global_motion_params(&cm->global_motion[frame],
+                                 &cm->prev_frame->global_motion[frame], wb,
+                                 cm->allow_high_precision_mv);
+    } else {
+      assert(cm->global_motion[frame].wmtype == IDENTITY &&
+             "Invalid warp type for frames of different resolutions");
+    }
+    */
+    /*
+    printf("Frame %d/%d: Enc Ref %d: %d %d %d %d\n",
+           cm->current_video_frame, cm->show_frame, frame,
+           cm->global_motion[frame].wmmat[0],
+           cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2],
+           cm->global_motion[frame].wmmat[3]);
+           */
+  }
+}
 #endif
 
+#if !CONFIG_OBU
+static void write_uncompressed_header_frame(AV1_COMP *cpi,
+                                            struct aom_write_bit_buffer *wb) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+
   aom_wb_write_literal(wb, AOM_FRAME_MARKER, 2);
 
   write_profile(cm->profile, wb);
@@ -4244,8 +4576,8 @@ static void write_uncompressed_header(AV1_COMP *cpi,
     aom_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
 
 #if CONFIG_REFERENCE_BUFFER
-    if (cpi->seq_params.frame_id_numbers_present_flag) {
-      int frame_id_len = cpi->seq_params.frame_id_length_minus7 + 7;
+    if (cm->seq_params.frame_id_numbers_present_flag) {
+      int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
       int display_frame_id = cm->ref_frame_id[cpi->existing_fb_idx_to_show];
       aom_wb_write_literal(wb, display_frame_id, frame_id_len);
       /* Add a zero byte to prevent emulation of superframe marker */
@@ -4253,7 +4585,7 @@ static void write_uncompressed_header(AV1_COMP *cpi,
       /* Consider to have this logic only one place */
       aom_wb_write_literal(wb, 0, 8);
     }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 
     return;
   } else {
@@ -4265,33 +4597,46 @@ static void write_uncompressed_header(AV1_COMP *cpi,
 
   aom_wb_write_bit(wb, cm->frame_type);
   aom_wb_write_bit(wb, cm->show_frame);
+  if (cm->frame_type != KEY_FRAME)
+    if (!cm->show_frame) aom_wb_write_bit(wb, cm->intra_only);
   aom_wb_write_bit(wb, cm->error_resilient_mode);
 
+  if (frame_is_intra_only(cm)) {
+#if CONFIG_REFERENCE_BUFFER
+    write_sequence_header(cm, wb);
+#endif  // CONFIG_REFERENCE_BUFFER
+  }
 #if CONFIG_REFERENCE_BUFFER
   cm->invalid_delta_frame_id_minus1 = 0;
-  if (cpi->seq_params.frame_id_numbers_present_flag) {
-    int frame_id_len = cpi->seq_params.frame_id_length_minus7 + 7;
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
     aom_wb_write_literal(wb, cm->current_frame_id, frame_id_len);
   }
-#endif
-
+#endif  // CONFIG_REFERENCE_BUFFER
   if (cm->frame_type == KEY_FRAME) {
-    write_sync_code(wb);
     write_bitdepth_colorspace_sampling(cm, wb);
     write_frame_size(cm, wb);
+    write_sb_size(cm, wb);
+
 #if CONFIG_ANS && ANS_MAX_SYMBOLS
     assert(cpi->common.ans_window_size_log2 >= 8);
     assert(cpi->common.ans_window_size_log2 < 24);
     aom_wb_write_literal(wb, cpi->common.ans_window_size_log2 - 8, 4);
 #endif  // CONFIG_ANS && ANS_MAX_SYMBOLS
-#if CONFIG_PALETTE || CONFIG_INTRABC
     aom_wb_write_bit(wb, cm->allow_screen_content_tools);
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
+#if CONFIG_AMVR
+    if (cm->allow_screen_content_tools) {
+      if (cm->seq_mv_precision_level == 2) {
+        aom_wb_write_bit(wb, 1);
+      } else {
+        aom_wb_write_bit(wb, 0);
+        aom_wb_write_bit(wb, cm->seq_mv_precision_level == 0);
+      }
+    }
+#endif
   } else {
-    if (!cm->show_frame) aom_wb_write_bit(wb, cm->intra_only);
-#if CONFIG_PALETTE || CONFIG_INTRABC
     if (cm->intra_only) aom_wb_write_bit(wb, cm->allow_screen_content_tools);
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
     if (!cm->error_resilient_mode) {
       if (cm->intra_only) {
         aom_wb_write_bit(wb,
@@ -4304,13 +4649,12 @@ static void write_uncompressed_header(AV1_COMP *cpi,
                            cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
       }
     }
-
+#endif
 #if CONFIG_EXT_REFS
     cpi->refresh_frame_mask = get_refresh_mask(cpi);
 #endif  // CONFIG_EXT_REFS
 
     if (cm->intra_only) {
-      write_sync_code(wb);
       write_bitdepth_colorspace_sampling(cm, wb);
 
 #if CONFIG_EXT_REFS
@@ -4346,12 +4690,14 @@ static void write_uncompressed_header(AV1_COMP *cpi,
         assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
         aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
                              REF_FRAMES_LOG2);
+#if !CONFIG_FRAME_SIGN_BIAS
         aom_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
+#endif  // !CONFIG_FRAME_SIGN_BIAS
 #if CONFIG_REFERENCE_BUFFER
-        if (cpi->seq_params.frame_id_numbers_present_flag) {
+        if (cm->seq_params.frame_id_numbers_present_flag) {
           int i = get_ref_frame_map_idx(cpi, ref_frame);
-          int frame_id_len = cpi->seq_params.frame_id_length_minus7 + 7;
-          int diff_len = cpi->seq_params.delta_frame_id_length_minus2 + 2;
+          int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
+          int diff_len = cm->seq_params.delta_frame_id_length_minus2 + 2;
           int delta_frame_id_minus1 =
               ((cm->current_frame_id - cm->ref_frame_id[i] +
                 (1 << frame_id_len)) %
@@ -4362,8 +4708,24 @@ static void write_uncompressed_header(AV1_COMP *cpi,
             cm->invalid_delta_frame_id_minus1 = 1;
           aom_wb_write_literal(wb, delta_frame_id_minus1, diff_len);
         }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
+      }
+
+#if CONFIG_FRAME_SIGN_BIAS
+#define FRAME_SIGN_BIAS_DEBUG 0
+#if FRAME_SIGN_BIAS_DEBUG
+      {
+        printf("\n\nENCODER: Frame=%d, show_frame=%d:", cm->current_video_frame,
+               cm->show_frame);
+        for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+          printf(" sign_bias[%d]=%d", ref_frame,
+                 cm->ref_frame_sign_bias[ref_frame]);
+        }
+        printf("\n");
       }
+#endif  // FRAME_SIGN_BIAS_DEBUG
+#undef FRAME_SIGN_BIAS_DEBUG
+#endif  // CONFIG_FRAME_SIGN_BIAS
 
 #if CONFIG_FRAME_SIZE
       if (cm->error_resilient_mode == 0) {
@@ -4375,42 +4737,55 @@ static void write_uncompressed_header(AV1_COMP *cpi,
       write_frame_size_with_refs(cpi, wb);
 #endif
 
+#if CONFIG_AMVR
+      if (cm->seq_mv_precision_level == 2) {
+        aom_wb_write_bit(wb, cm->cur_frame_mv_precision_level == 0);
+      }
+#endif
       aom_wb_write_bit(wb, cm->allow_high_precision_mv);
 
       fix_interp_filter(cm, cpi->td.counts);
       write_frame_interp_filter(cm->interp_filter, wb);
 #if CONFIG_TEMPMV_SIGNALING
-      if (!cm->error_resilient_mode) {
+      if (frame_might_use_prev_frame_mvs(cm)) {
         aom_wb_write_bit(wb, cm->use_prev_frame_mvs);
       }
 #endif
     }
   }
 
-#if CONFIG_REFERENCE_BUFFER
-  cm->refresh_mask = cm->frame_type == KEY_FRAME ? 0xFF : get_refresh_mask(cpi);
+#if CONFIG_FRAME_MARKER
+  if (cm->show_frame == 0) {
+    int arf_offset = AOMMIN(
+        (MAX_GF_INTERVAL - 1),
+        cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
+#if CONFIG_EXT_REFS
+    int brf_offset =
+        cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
+
+    arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
+#endif
+    aom_wb_write_literal(wb, arf_offset, 4);
+  }
 #endif
 
+#if CONFIG_REFERENCE_BUFFER
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    cm->refresh_mask =
+        cm->frame_type == KEY_FRAME ? 0xFF : get_refresh_mask(cpi);
+  }
+#endif  // CONFIG_REFERENCE_BUFFER
+
   if (!cm->error_resilient_mode) {
     aom_wb_write_bit(
         wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD);
   }
-
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   aom_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
-
-  assert(cm->mib_size == mi_size_wide[cm->sb_size]);
-  assert(cm->mib_size == 1 << cm->mib_size_log2);
-#if CONFIG_EXT_PARTITION
-  assert(cm->sb_size == BLOCK_128X128 || cm->sb_size == BLOCK_64X64);
-  aom_wb_write_bit(wb, cm->sb_size == BLOCK_128X128 ? 1 : 0);
-#else
-  assert(cm->sb_size == BLOCK_64X64);
-#endif  // CONFIG_EXT_PARTITION
-
+#endif
   encode_loopfilter(cm, wb);
   encode_quantization(cm, wb);
   encode_segmentation(cm, xd, wb);
-#if CONFIG_DELTA_Q
   {
     int i;
     struct segmentation *const seg = &cm->seg;
@@ -4434,12 +4809,16 @@ static void write_uncompressed_header(AV1_COMP *cpi,
         if (cm->delta_lf_present_flag) {
           aom_wb_write_literal(wb, OD_ILOG_NZ(cm->delta_lf_res) - 1, 2);
           xd->prev_delta_lf_from_base = 0;
+#if CONFIG_LOOPFILTER_LEVEL
+          aom_wb_write_bit(wb, cm->delta_lf_multi);
+          for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+            xd->prev_delta_lf[lf_id] = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
         }
 #endif  // CONFIG_EXT_DELTA_Q
       }
     }
   }
-#endif
 #if CONFIG_CDEF
   if (!cm->all_lossless) {
     encode_cdef(cm, wb);
@@ -4461,113 +4840,372 @@ static void write_uncompressed_header(AV1_COMP *cpi,
     if (!use_hybrid_pred) aom_wb_write_bit(wb, use_compound_pred);
 #endif  // !CONFIG_REF_ADAPT
   }
-#if CONFIG_EXT_INTER
   write_compound_tools(cm, wb);
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_EXT_TX
   aom_wb_write_bit(wb, cm->reduced_tx_set_used);
 #endif  // CONFIG_EXT_TX
 
-  write_tile_info(cm, wb);
-}
+#if CONFIG_ADAPT_SCAN
+  aom_wb_write_bit(wb, cm->use_adapt_scan);
+#endif
 
 #if CONFIG_GLOBAL_MOTION
-static void write_global_motion_params(WarpedMotionParams *params,
-                                       WarpedMotionParams *ref_params,
-                                       aom_writer *w, int allow_hp) {
-  TransformationType type = params->wmtype;
-  int trans_bits;
-  int trans_prec_diff;
-  aom_write_bit(w, type != IDENTITY);
-  if (type != IDENTITY) aom_write_literal(w, type - 1, GLOBAL_TYPE_BITS);
+  if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb);
+#endif  // CONFIG_GLOBAL_MOTION
 
-  switch (type) {
-    case HOMOGRAPHY:
-    case HORTRAPEZOID:
-    case VERTRAPEZOID:
-      if (type != HORTRAPEZOID)
-        aom_write_signed_primitive_refsubexpfin(
-            w, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
-            (ref_params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF),
-            (params->wmmat[6] >> GM_ROW3HOMO_PREC_DIFF));
-      if (type != VERTRAPEZOID)
-        aom_write_signed_primitive_refsubexpfin(
-            w, GM_ROW3HOMO_MAX + 1, SUBEXPFIN_K,
-            (ref_params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF),
-            (params->wmmat[7] >> GM_ROW3HOMO_PREC_DIFF));
-    // fallthrough intended
-    case AFFINE:
-    case ROTZOOM:
-      aom_write_signed_primitive_refsubexpfin(
-          w, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-          (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) -
-              (1 << GM_ALPHA_PREC_BITS),
-          (params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
-      if (type != VERTRAPEZOID)
-        aom_write_signed_primitive_refsubexpfin(
-            w, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-            (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF),
-            (params->wmmat[3] >> GM_ALPHA_PREC_DIFF));
-      if (type >= AFFINE) {
-        if (type != HORTRAPEZOID)
-          aom_write_signed_primitive_refsubexpfin(
-              w, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-              (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF),
-              (params->wmmat[4] >> GM_ALPHA_PREC_DIFF));
-        aom_write_signed_primitive_refsubexpfin(
-            w, GM_ALPHA_MAX + 1, SUBEXPFIN_K,
-            (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
-                (1 << GM_ALPHA_PREC_BITS),
-            (params->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
-                (1 << GM_ALPHA_PREC_BITS));
-      }
-    // fallthrough intended
-    case TRANSLATION:
-      trans_bits = (type == TRANSLATION) ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
-                                         : GM_ABS_TRANS_BITS;
-      trans_prec_diff = (type == TRANSLATION)
-                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
-                            : GM_TRANS_PREC_DIFF;
-      aom_write_signed_primitive_refsubexpfin(
-          w, (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_params->wmmat[0] >> trans_prec_diff),
-          (params->wmmat[0] >> trans_prec_diff));
-      aom_write_signed_primitive_refsubexpfin(
-          w, (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_params->wmmat[1] >> trans_prec_diff),
-          (params->wmmat[1] >> trans_prec_diff));
-      break;
-    case IDENTITY: break;
-    default: assert(0);
-  }
+  write_tile_info(cm, wb);
 }
 
-static void write_global_motion(AV1_COMP *cpi, aom_writer *w) {
+#else
+// New function based on HLS R18
+static void write_uncompressed_header_obu(AV1_COMP *cpi,
+                                          struct aom_write_bit_buffer *wb) {
   AV1_COMMON *const cm = &cpi->common;
-  int frame;
-  YV12_BUFFER_CONFIG *ref_buf;
-  for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
-    ref_buf = get_ref_frame_buffer(cpi, frame);
-    if (cpi->source->y_crop_width == ref_buf->y_crop_width &&
-        cpi->source->y_crop_height == ref_buf->y_crop_height) {
-      write_global_motion_params(&cm->global_motion[frame],
-                                 &cm->prev_frame->global_motion[frame], w,
-                                 cm->allow_high_precision_mv);
+  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+
+#if CONFIG_EXT_TILE
+  aom_wb_write_literal(wb, cm->large_scale_tile, 1);
+#endif  // CONFIG_EXT_TILE
+
+#if CONFIG_EXT_REFS
+  // NOTE: By default all coded frames to be used as a reference
+  cm->is_reference_frame = 1;
+
+  if (cm->show_existing_frame) {
+    RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+    const int frame_to_show = cm->ref_frame_map[cpi->existing_fb_idx_to_show];
+
+    if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
+      aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+                         "Buffer %d does not contain a reconstructed frame",
+                         frame_to_show);
+    }
+    ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
+
+    aom_wb_write_bit(wb, 1);  // show_existing_frame
+    aom_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
+
+#if CONFIG_REFERENCE_BUFFER
+    if (cm->seq_params.frame_id_numbers_present_flag) {
+      int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
+      int display_frame_id = cm->ref_frame_id[cpi->existing_fb_idx_to_show];
+      aom_wb_write_literal(wb, display_frame_id, frame_id_len);
+      /* Add a zero byte to prevent emulation of superframe marker */
+      /* Same logic as when when terminating the entropy coder */
+      /* Consider to have this logic only one place */
+      aom_wb_write_literal(wb, 0, 8);
+    }
+#endif  // CONFIG_REFERENCE_BUFFER
+
+    return;
+  } else {
+#endif  // CONFIG_EXT_REFS
+    aom_wb_write_bit(wb, 0);  // show_existing_frame
+#if CONFIG_EXT_REFS
+  }
+#endif  // CONFIG_EXT_REFS
+
+  cm->frame_type = cm->intra_only ? INTRA_ONLY_FRAME : cm->frame_type;
+  aom_wb_write_literal(wb, cm->frame_type, 2);
+
+  if (cm->intra_only) cm->frame_type = INTRA_ONLY_FRAME;
+
+  aom_wb_write_bit(wb, cm->show_frame);
+  aom_wb_write_bit(wb, cm->error_resilient_mode);
+
+#if CONFIG_REFERENCE_BUFFER
+  cm->invalid_delta_frame_id_minus1 = 0;
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
+    aom_wb_write_literal(wb, cm->current_frame_id, frame_id_len);
+  }
+#endif  // CONFIG_REFERENCE_BUFFER
+  if (cm->frame_type == KEY_FRAME) {
+    write_frame_size(cm, wb);
+    write_sb_size(cm, wb);
+
+#if CONFIG_ANS && ANS_MAX_SYMBOLS
+    assert(cpi->common.ans_window_size_log2 >= 8);
+    assert(cpi->common.ans_window_size_log2 < 24);
+    aom_wb_write_literal(wb, cpi->common.ans_window_size_log2 - 8, 4);
+#endif  // CONFIG_ANS && ANS_MAX_SYMBOLS
+    aom_wb_write_bit(wb, cm->allow_screen_content_tools);
+#if CONFIG_AMVR
+    if (cm->allow_screen_content_tools) {
+      if (cm->seq_mv_precision_level == 2) {
+        aom_wb_write_bit(wb, 1);
+      } else {
+        aom_wb_write_bit(wb, 0);
+        aom_wb_write_bit(wb, cm->seq_mv_precision_level == 0);
+      }
+    }
+#endif
+  } else if (cm->frame_type == INTRA_ONLY_FRAME) {
+    if (cm->intra_only) aom_wb_write_bit(wb, cm->allow_screen_content_tools);
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    if (!cm->error_resilient_mode) {
+      if (cm->intra_only) {
+        aom_wb_write_bit(wb,
+                         cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
+      }
+    }
+#endif
+#if CONFIG_EXT_REFS
+    cpi->refresh_frame_mask = get_refresh_mask(cpi);
+#endif  // CONFIG_EXT_REFS
+
+    if (cm->intra_only) {
+#if CONFIG_EXT_REFS
+      aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
+      aom_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif  // CONFIG_EXT_REFS
+      write_frame_size(cm, wb);
+
+#if CONFIG_ANS && ANS_MAX_SYMBOLS
+      assert(cpi->common.ans_window_size_log2 >= 8);
+      assert(cpi->common.ans_window_size_log2 < 24);
+      aom_wb_write_literal(wb, cpi->common.ans_window_size_log2 - 8, 4);
+#endif  // CONFIG_ANS && ANS_MAX_SYMBOLS
+    }
+  } else if (cm->frame_type == INTER_FRAME) {
+    MV_REFERENCE_FRAME ref_frame;
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    if (!cm->error_resilient_mode) {
+      aom_wb_write_bit(wb, cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE);
+      if (cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE)
+        aom_wb_write_bit(wb,
+                         cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
+    }
+#endif
+
+#if CONFIG_EXT_REFS
+    cpi->refresh_frame_mask = get_refresh_mask(cpi);
+    aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
+    aom_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif  // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+    if (!cpi->refresh_frame_mask) {
+      // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
+      //       will not be used as a reference
+      cm->is_reference_frame = 0;
+    }
+#endif  // CONFIG_EXT_REFS
+
+    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+      assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
+      aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
+                           REF_FRAMES_LOG2);
+#if !CONFIG_FRAME_SIGN_BIAS
+      aom_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
+#endif  // !CONFIG_FRAME_SIGN_BIAS
+#if CONFIG_REFERENCE_BUFFER
+      if (cm->seq_params.frame_id_numbers_present_flag) {
+        int i = get_ref_frame_map_idx(cpi, ref_frame);
+        int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
+        int diff_len = cm->seq_params.delta_frame_id_length_minus2 + 2;
+        int delta_frame_id_minus1 =
+            ((cm->current_frame_id - cm->ref_frame_id[i] +
+              (1 << frame_id_len)) %
+             (1 << frame_id_len)) -
+            1;
+        if (delta_frame_id_minus1 < 0 ||
+            delta_frame_id_minus1 >= (1 << diff_len))
+          cm->invalid_delta_frame_id_minus1 = 1;
+        aom_wb_write_literal(wb, delta_frame_id_minus1, diff_len);
+      }
+#endif  // CONFIG_REFERENCE_BUFFER
+    }
+
+#if CONFIG_FRAME_SIZE
+    if (cm->error_resilient_mode == 0) {
+      write_frame_size_with_refs(cpi, wb);
     } else {
-      assert(cm->global_motion[frame].wmtype == IDENTITY &&
-             "Invalid warp type for frames of different resolutions");
+      write_frame_size(cm, wb);
     }
-    /*
-    printf("Frame %d/%d: Enc Ref %d (used %d): %d %d %d %d\n",
-           cm->current_video_frame, cm->show_frame, frame,
-           cpi->global_motion_used[frame], cm->global_motion[frame].wmmat[0],
-           cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2],
-           cm->global_motion[frame].wmmat[3]);
-           */
+#else
+    write_frame_size_with_refs(cpi, wb);
+#endif
+
+#if CONFIG_AMVR
+    if (cm->seq_mv_precision_level == 2) {
+      aom_wb_write_bit(wb, cm->cur_frame_mv_precision_level == 0);
+    }
+#endif
+    aom_wb_write_bit(wb, cm->allow_high_precision_mv);
+
+    fix_interp_filter(cm, cpi->td.counts);
+    write_frame_interp_filter(cm->interp_filter, wb);
+#if CONFIG_TEMPMV_SIGNALING
+    if (frame_might_use_prev_frame_mvs(cm)) {
+      aom_wb_write_bit(wb, cm->use_prev_frame_mvs);
+    }
+#endif
+  } else if (cm->frame_type == S_FRAME) {
+    MV_REFERENCE_FRAME ref_frame;
+
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    if (!cm->error_resilient_mode) {
+      aom_wb_write_bit(wb, cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE);
+      if (cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE)
+        aom_wb_write_bit(wb,
+                         cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
+    }
+#endif
+
+#if CONFIG_EXT_REFS
+    if (!cpi->refresh_frame_mask) {
+      // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
+      //       will not be used as a reference
+      cm->is_reference_frame = 0;
+    }
+#endif  // CONFIG_EXT_REFS
+
+    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+      assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
+      aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
+                           REF_FRAMES_LOG2);
+      assert(cm->ref_frame_sign_bias[ref_frame] == 0);
+#if CONFIG_REFERENCE_BUFFER
+      if (cm->seq_params.frame_id_numbers_present_flag) {
+        int i = get_ref_frame_map_idx(cpi, ref_frame);
+        int frame_id_len = cm->seq_params.frame_id_length_minus7 + 7;
+        int diff_len = cm->seq_params.delta_frame_id_length_minus2 + 2;
+        int delta_frame_id_minus1 =
+            ((cm->current_frame_id - cm->ref_frame_id[i] +
+              (1 << frame_id_len)) %
+             (1 << frame_id_len)) -
+            1;
+        if (delta_frame_id_minus1 < 0 ||
+            delta_frame_id_minus1 >= (1 << diff_len))
+          cm->invalid_delta_frame_id_minus1 = 1;
+        aom_wb_write_literal(wb, delta_frame_id_minus1, diff_len);
+      }
+#endif  // CONFIG_REFERENCE_BUFFER
+    }
+
+#if CONFIG_FRAME_SIZE
+    if (cm->error_resilient_mode == 0) {
+      write_frame_size_with_refs(cpi, wb);
+    } else {
+      write_frame_size(cm, wb);
+    }
+#else
+    write_frame_size_with_refs(cpi, wb);
+#endif
+
+    aom_wb_write_bit(wb, cm->allow_high_precision_mv);
+
+    fix_interp_filter(cm, cpi->td.counts);
+    write_frame_interp_filter(cm->interp_filter, wb);
+#if CONFIG_TEMPMV_SIGNALING
+    if (frame_might_use_prev_frame_mvs(cm)) {
+      aom_wb_write_bit(wb, cm->use_prev_frame_mvs);
+    }
+#endif
+  }
+
+#if CONFIG_MFMV
+  if (cm->show_frame == 0) {
+    int arf_offset = AOMMIN(
+        (MAX_GF_INTERVAL - 1),
+        cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
+#if CONFIG_EXT_REFS
+    int brf_offset =
+        cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
+
+    arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
+#endif
+    aom_wb_write_literal(wb, arf_offset, 4);
+  }
+#endif
+
+#if CONFIG_REFERENCE_BUFFER
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    cm->refresh_mask =
+        cm->frame_type == KEY_FRAME ? 0xFF : get_refresh_mask(cpi);
+  }
+#endif  // CONFIG_REFERENCE_BUFFER
+
+  if (!cm->error_resilient_mode) {
+    aom_wb_write_bit(
+        wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD);
+  }
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  aom_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
+#endif
+  encode_loopfilter(cm, wb);
+  encode_quantization(cm, wb);
+  encode_segmentation(cm, xd, wb);
+  {
+    int i;
+    struct segmentation *const seg = &cm->seg;
+    int segment_quantizer_active = 0;
+    for (i = 0; i < MAX_SEGMENTS; i++) {
+      if (segfeature_active(seg, i, SEG_LVL_ALT_Q)) {
+        segment_quantizer_active = 1;
+      }
+    }
+
+    if (cm->delta_q_present_flag)
+      assert(segment_quantizer_active == 0 && cm->base_qindex > 0);
+    if (segment_quantizer_active == 0 && cm->base_qindex > 0) {
+      aom_wb_write_bit(wb, cm->delta_q_present_flag);
+      if (cm->delta_q_present_flag) {
+        aom_wb_write_literal(wb, OD_ILOG_NZ(cm->delta_q_res) - 1, 2);
+        xd->prev_qindex = cm->base_qindex;
+#if CONFIG_EXT_DELTA_Q
+        assert(seg->abs_delta == SEGMENT_DELTADATA);
+        aom_wb_write_bit(wb, cm->delta_lf_present_flag);
+        if (cm->delta_lf_present_flag) {
+          aom_wb_write_literal(wb, OD_ILOG_NZ(cm->delta_lf_res) - 1, 2);
+#if CONFIG_LOOPFILTER_LEVEL
+          for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+            xd->prev_delta_lf[lf_id] = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
+          xd->prev_delta_lf_from_base = 0;
+        }
+#endif  // CONFIG_EXT_DELTA_Q
+      }
+    }
+  }
+#if CONFIG_CDEF
+  if (!cm->all_lossless) {
+    encode_cdef(cm, wb);
   }
-}
 #endif
+#if CONFIG_LOOP_RESTORATION
+  encode_restoration_mode(cm, wb);
+#endif  // CONFIG_LOOP_RESTORATION
+  write_tx_mode(cm, &cm->tx_mode, wb);
+
+  if (cpi->allow_comp_inter_inter) {
+    const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
+#if !CONFIG_REF_ADAPT
+    const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
+#endif  // !CONFIG_REF_ADAPT
+
+    aom_wb_write_bit(wb, use_hybrid_pred);
+#if !CONFIG_REF_ADAPT
+    if (!use_hybrid_pred) aom_wb_write_bit(wb, use_compound_pred);
+#endif  // !CONFIG_REF_ADAPT
+  }
+  write_compound_tools(cm, wb);
+
+#if CONFIG_EXT_TX
+  aom_wb_write_bit(wb, cm->reduced_tx_set_used);
+#endif  // CONFIG_EXT_TX
+
+#if CONFIG_GLOBAL_MOTION
+  if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb);
+#endif  // CONFIG_GLOBAL_MOTION
+
+  write_tile_info(cm, wb);
+}
+#endif  // CONFIG_OBU
 
 static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
   AV1_COMMON *const cm = &cpi->common;
@@ -4587,19 +5225,13 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
   (void)i;
   (void)fc;
 
-#if CONFIG_ANS
-  int header_size;
-  header_bc = &cpi->buf_ans;
-  buf_ans_write_init(header_bc, data);
-#else
   aom_writer real_header_bc;
   header_bc = &real_header_bc;
-  aom_start_encode(header_bc, data);
+#if CONFIG_ANS
+  header_bc->size = 1 << cpi->common.ans_window_size_log2;
 #endif
+  aom_start_encode(header_bc, data);
 
-#if CONFIG_LOOP_RESTORATION
-  encode_restoration(cm, header_bc);
-#endif  // CONFIG_LOOP_RESTORATION
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   if (cm->tx_mode == TX_MODE_SELECT)
     av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob,
@@ -4610,27 +5242,18 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
 #endif  // CONFIG_LV_MAP
 
 #if CONFIG_VAR_TX && !CONFIG_NEW_MULTISYMBOL
-  update_txfm_partition_probs(cm, header_bc, counts, probwt);
+  if (cm->tx_mode == TX_MODE_SELECT)
+    update_txfm_partition_probs(cm, header_bc, counts, probwt);
 #endif
 
 #if !CONFIG_NEW_MULTISYMBOL
   update_skip_probs(cm, header_bc, counts);
 #endif
 
-  if (frame_is_intra_only(cm)) {
-    av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf);
-
-#if CONFIG_INTRABC
-    if (cm->allow_screen_content_tools) {
-      av1_cond_prob_diff_update(header_bc, &fc->intrabc_prob,
-                                cm->counts.intrabc, probwt);
-    }
-#endif
-  } else {
+  if (!frame_is_intra_only(cm)) {
 #if !CONFIG_NEW_MULTISYMBOL
     update_inter_mode_probs(cm, header_bc, counts);
 #endif
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
     if (cm->reference_mode != COMPOUND_REFERENCE &&
         cm->allow_interintra_compound) {
@@ -4656,17 +5279,6 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
 #endif  // CONFIG_WEDGE && CONFIG_NEW_MULTISYMBOL
     }
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
-
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-    for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) {
-      prob_diff_update(av1_ncobmc_mode_tree, fc->ncobmc_mode_prob[i],
-                       counts->ncobmc_mode[i], MAX_NCOBMC_MODES, probwt,
-                       header_bc);
-    }
-#endif
-#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
 #if !CONFIG_NEW_MULTISYMBOL
     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
@@ -4724,11 +5336,11 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
     }
 #endif  // CONFIG_NEW_MULTISYMBOL
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     for (i = 0; i < COMP_INTER_MODE_CONTEXTS; i++)
       av1_cond_prob_diff_update(header_bc, &fc->comp_inter_mode_prob[i],
                                 counts->comp_inter_mode[i], probwt);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if !CONFIG_NEW_MULTISYMBOL
     av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, counts->mv);
@@ -4736,22 +5348,13 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) {
 #if CONFIG_SUPERTX
     if (!xd->lossless[0]) update_supertx_probs(cm, probwt, header_bc);
 #endif  // CONFIG_SUPERTX
-#if CONFIG_GLOBAL_MOTION
-    write_global_motion(cpi, header_bc);
-#endif  // CONFIG_GLOBAL_MOTION
   }
-#if CONFIG_ANS
-  aom_buf_ans_flush(header_bc);
-  header_size = buf_ans_write_end(header_bc);
-  assert(header_size <= 0xffff);
-  return header_size;
-#else
   aom_stop_encode(header_bc);
   assert(header_bc->pos <= 0xffff);
   return header_bc->pos;
-#endif  // CONFIG_ANS
 }
 
+#if !CONFIG_OBU || CONFIG_EXT_TILE
 static int choose_size_bytes(uint32_t size, int spare_msbs) {
   // Choose the number of bytes required to represent size, without
   // using the 'spare_msbs' number of most significant bits.
@@ -4781,6 +5384,7 @@ static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
     default: assert(0 && "Invalid size"); break;
   }
 }
+
 static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
                        const uint32_t data_size, const uint32_t max_tile_size,
                        const uint32_t max_tile_col_size,
@@ -4889,14 +5493,334 @@ static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst,
     return wpos;
   }
 }
+#endif
+
+#if CONFIG_OBU
+static uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension,
+                                 uint8_t *const dst) {
+  struct aom_write_bit_buffer wb = { dst, 0 };
+  uint32_t size = 0;
+
+  aom_wb_write_literal(&wb, (int)obu_type, 5);
+  aom_wb_write_literal(&wb, 0, 2);
+  aom_wb_write_literal(&wb, obu_extension ? 1 : 0, 1);
+  if (obu_extension) {
+    aom_wb_write_literal(&wb, obu_extension & 0xFF, 8);
+  }
+
+  size = aom_wb_bytes_written(&wb);
+  return size;
+}
+
+static uint32_t write_temporal_delimiter_obu() { return 0; }
+
+static uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst) {
+  AV1_COMMON *const cm = &cpi->common;
+  SequenceHeader *const seq_params = &cm->seq_params;
+  struct aom_write_bit_buffer wb = { dst, 0 };
+  uint32_t size = 0;
+
+  write_profile(cm->profile, &wb);
+
+  aom_wb_write_literal(&wb, 0, 4);
+
+  seq_params->frame_id_numbers_present_flag = FRAME_ID_NUMBERS_PRESENT_FLAG;
+  aom_wb_write_literal(&wb, seq_params->frame_id_numbers_present_flag, 1);
+  if (seq_params->frame_id_numbers_present_flag) {
+    seq_params->frame_id_length_minus7 = FRAME_ID_LENGTH_MINUS7;
+    seq_params->delta_frame_id_length_minus2 = DELTA_FRAME_ID_LENGTH_MINUS2;
+    aom_wb_write_literal(&wb, seq_params->frame_id_length_minus7, 4);
+    aom_wb_write_literal(&wb, seq_params->delta_frame_id_length_minus2, 4);
+  }
+
+  // color_config
+  write_bitdepth_colorspace_sampling(cm, &wb);
+
+  size = aom_wb_bytes_written(&wb);
+  return size;
+}
+
+static uint32_t write_frame_header_obu(AV1_COMP *cpi, uint8_t *const dst) {
+  AV1_COMMON *const cm = &cpi->common;
+  struct aom_write_bit_buffer wb = { dst, 0 };
+  uint32_t total_size = 0;
+  uint32_t compressed_hdr_size, uncompressed_hdr_size;
+
+  write_uncompressed_header_obu(cpi, &wb);
+
+  if (cm->show_existing_frame) {
+    total_size = aom_wb_bytes_written(&wb);
+    return total_size;
+  }
+
+  // write the tile length code  (Always 4 bytes for now)
+  aom_wb_write_literal(&wb, 3, 2);
+
+  if (!use_compressed_header(cm)) {
+    uncompressed_hdr_size = aom_wb_bytes_written(&wb);
+    compressed_hdr_size = 0;
+  } else {
+    // placeholder for the compressed header length
+    struct aom_write_bit_buffer compr_hdr_len_wb = wb;
+    aom_wb_write_literal(&wb, 0, 16);
+
+    uncompressed_hdr_size = aom_wb_bytes_written(&wb);
+    compressed_hdr_size =
+        write_compressed_header(cpi, dst + uncompressed_hdr_size);
+    aom_wb_overwrite_literal(&compr_hdr_len_wb, (int)(compressed_hdr_size), 16);
+  }
+
+  total_size = uncompressed_hdr_size + compressed_hdr_size;
+  return total_size;
+}
+
+static uint32_t write_tile_group_header(uint8_t *const dst, int startTile,
+                                        int endTile, int tiles_log2) {
+  struct aom_write_bit_buffer wb = { dst, 0 };
+  uint32_t size = 0;
+
+  aom_wb_write_literal(&wb, startTile, tiles_log2);
+  aom_wb_write_literal(&wb, endTile, tiles_log2);
+
+  size = aom_wb_bytes_written(&wb);
+  return size;
+}
+
+static uint32_t write_tiles_in_tg_obus(AV1_COMP *const cpi, uint8_t *const dst,
+                                       unsigned int *max_tile_size,
+                                       unsigned int *max_tile_col_size,
+                                       uint8_t *const frame_header_obu_location,
+                                       uint32_t frame_header_obu_size,
+                                       int insert_frame_header_obu_flag) {
+  const AV1_COMMON *const cm = &cpi->common;
+  aom_writer mode_bc;
+  int tile_row, tile_col;
+  TOKENEXTRA *(*const tok_buffers)[MAX_TILE_COLS] = cpi->tile_tok;
+  TileBufferEnc(*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
+  uint32_t total_size = 0;
+  const int tile_cols = cm->tile_cols;
+  const int tile_rows = cm->tile_rows;
+  unsigned int tile_size = 0;
+  const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
+  // Fixed size tile groups for the moment
+  const int num_tg_hdrs = cm->num_tg;
+  const int tg_size =
+#if CONFIG_EXT_TILE
+      (cm->large_scale_tile)
+          ? 1
+          :
+#endif  // CONFIG_EXT_TILE
+          (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
+  int tile_count = 0;
+  int curr_tg_data_size = 0;
+  uint8_t *data = dst;
+  int new_tg = 1;
+#if CONFIG_EXT_TILE
+  const int have_tiles = tile_cols * tile_rows > 1;
+#endif
+
+  *max_tile_size = 0;
+  *max_tile_col_size = 0;
+
+#if CONFIG_EXT_TILE
+  if (cm->large_scale_tile) {
+    for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+      TileInfo tile_info;
+      const int is_last_col = (tile_col == tile_cols - 1);
+      const uint32_t col_offset = total_size;
+
+      av1_tile_set_col(&tile_info, cm, tile_col);
+
+      // The last column does not have a column header
+      if (!is_last_col) total_size += 4;
+
+      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+        TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+        const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+        const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+        const int data_offset = have_tiles ? 4 : 0;
+        const int tile_idx = tile_row * tile_cols + tile_col;
+        TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+        av1_tile_set_row(&tile_info, cm, tile_row);
+
+        buf->data = dst + total_size;
+
+        // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
+        // even for the last one, unless no tiling is used at all.
+        total_size += data_offset;
+        // Initialise tile context from the frame context
+        this_tile->tctx = *cm->fc;
+        cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
+#if CONFIG_PVQ
+        cpi->td.mb.pvq_q = &this_tile->pvq_q;
+        cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
+#endif  // CONFIG_PVQ
+#if CONFIG_ANS
+        mode_bc.size = 1 << cpi->common.ans_window_size_log2;
+#endif
+        aom_start_encode(&mode_bc, buf->data + data_offset);
+        write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+        assert(tok == tok_end);
+        aom_stop_encode(&mode_bc);
+        tile_size = mode_bc.pos;
+#if CONFIG_PVQ
+        cpi->td.mb.pvq_q = NULL;
+#endif
+        buf->size = tile_size;
+
+        // Record the maximum tile size we see, so we can compact headers later.
+        *max_tile_size = AOMMAX(*max_tile_size, tile_size);
+
+        if (have_tiles) {
+          // tile header: size of this tile, or copy offset
+          uint32_t tile_header = tile_size;
+          const int tile_copy_mode =
+              ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256)
+                  ? 1
+                  : 0;
+
+          // If tile_copy_mode = 1, check if this tile is a copy tile.
+          // Very low chances to have copy tiles on the key frames, so don't
+          // search on key frames to reduce unnecessary search.
+          if (cm->frame_type != KEY_FRAME && tile_copy_mode) {
+            const int idendical_tile_offset =
+                find_identical_tile(tile_row, tile_col, tile_buffers);
+
+            if (idendical_tile_offset > 0) {
+              tile_size = 0;
+              tile_header = idendical_tile_offset | 0x80;
+              tile_header <<= 24;
+            }
+          }
+
+          mem_put_le32(buf->data, tile_header);
+        }
+
+        total_size += tile_size;
+      }
+
+      if (!is_last_col) {
+        uint32_t col_size = total_size - col_offset - 4;
+        mem_put_le32(dst + col_offset, col_size);
+
+        // If it is not final packing, record the maximum tile column size we
+        // see, otherwise, check if the tile size is out of the range.
+        *max_tile_col_size = AOMMAX(*max_tile_col_size, col_size);
+      }
+    }
+  } else {
+#endif  // CONFIG_EXT_TILE
+
+    for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+      TileInfo tile_info;
+      const int is_last_row = (tile_row == tile_rows - 1);
+      av1_tile_set_row(&tile_info, cm, tile_row);
+
+      for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+        const int tile_idx = tile_row * tile_cols + tile_col;
+        TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+        TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+        const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+        const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+        const int is_last_col = (tile_col == tile_cols - 1);
+        const int is_last_tile = is_last_col && is_last_row;
+        int is_last_tile_in_tg = 0;
+
+        if (new_tg) {
+          if (insert_frame_header_obu_flag && tile_idx) {
+            // insert a copy of frame header OBU (including 4-byte size),
+            // except before the first tile group
+            data = dst + total_size;
+            memmove(data, frame_header_obu_location, frame_header_obu_size);
+            total_size += frame_header_obu_size;
+          }
+          data = dst + total_size;
+          // A new tile group begins at this tile.  Write the obu header and
+          // tile group header
+          curr_tg_data_size = write_obu_header(OBU_TILE_GROUP, 0, data + 4);
+          if (n_log2_tiles)
+            curr_tg_data_size += write_tile_group_header(
+                data + curr_tg_data_size + 4, tile_idx,
+                AOMMIN(tile_idx + tg_size - 1, tile_cols * tile_rows - 1),
+                n_log2_tiles);
+          total_size += curr_tg_data_size + 4;
+          new_tg = 0;
+          tile_count = 0;
+        }
+        tile_count++;
+        av1_tile_set_col(&tile_info, cm, tile_col);
+
+        if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1)) {
+          is_last_tile_in_tg = 1;
+          new_tg = 1;
+        } else {
+          is_last_tile_in_tg = 0;
+        }
+
+#if CONFIG_DEPENDENT_HORZTILES
+        av1_tile_set_tg_boundary(&tile_info, cm, tile_row, tile_col);
+#endif
+        buf->data = dst + total_size;
+
+        // The last tile of the tile group does not have a header.
+        if (!is_last_tile_in_tg) total_size += 4;
+
+        // Initialise tile context from the frame context
+        this_tile->tctx = *cm->fc;
+        cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
+#if CONFIG_PVQ
+        cpi->td.mb.pvq_q = &this_tile->pvq_q;
+        cpi->td.mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
+#endif  // CONFIG_PVQ
+#if CONFIG_ANS
+        mode_bc.size = 1 << cpi->common.ans_window_size_log2;
+#endif  // CONFIG_ANS
+        aom_start_encode(&mode_bc, dst + total_size);
+        write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+#if !CONFIG_LV_MAP
+#if !CONFIG_PVQ
+        assert(tok == tok_end);
+#endif  // !CONFIG_PVQ
+#endif  // !CONFIG_LV_MAP
+        aom_stop_encode(&mode_bc);
+        tile_size = mode_bc.pos;
+#if CONFIG_PVQ
+        cpi->td.mb.pvq_q = NULL;
+#endif
+        assert(tile_size > 0);
+
+        curr_tg_data_size += (tile_size + (is_last_tile_in_tg ? 0 : 4));
+        buf->size = tile_size;
+
+        if (!is_last_tile) {
+          *max_tile_size = AOMMAX(*max_tile_size, tile_size);
+        }
+        if (!is_last_tile_in_tg) {
+          // size of this tile
+          mem_put_le32(buf->data, tile_size);
+        } else {
+          // write current tile group size
+          mem_put_le32(data, curr_tg_data_size);
+        }
+
+        total_size += tile_size;
+      }
+    }
+#if CONFIG_EXT_TILE
+  }
+#endif  // CONFIG_EXT_TILE
+  return (uint32_t)total_size;
+}
+
+#endif
 
 void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
   uint8_t *data = dst;
   uint32_t data_size;
 #if CONFIG_EXT_TILE
   AV1_COMMON *const cm = &cpi->common;
-  uint32_t compressed_header_size = 0;
-  uint32_t uncompressed_header_size;
+  uint32_t compressed_hdr_size = 0;
+  uint32_t uncompressed_hdr_size;
   struct aom_write_bit_buffer saved_wb;
   struct aom_write_bit_buffer wb = { data, 0 };
   const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
@@ -4905,15 +5829,59 @@ void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
 #endif  // CONFIG_EXT_TILE
   unsigned int max_tile_size;
   unsigned int max_tile_col_size;
+#if CONFIG_OBU
+#if !CONFIG_EXT_TILE
+  AV1_COMMON *const cm = &cpi->common;
+#endif
+  uint32_t obu_size;
+  uint8_t *frame_header_location;
+  uint32_t frame_header_size;
+#endif
 
 #if CONFIG_BITSTREAM_DEBUG
   bitstream_queue_reset_write();
 #endif
 
+#if CONFIG_OBU
+  // write temporal delimiter obu, preceded by 4-byte size
+  obu_size = write_obu_header(OBU_TD, 0, data + 4);
+  obu_size += write_temporal_delimiter_obu(/*data + 4 + obu_size*/);
+  mem_put_le32(data, obu_size);
+  data += obu_size + 4;
+
+  // write sequence header obu if KEY_FRAME, preceded by 4-byte size
+  if (cm->frame_type == KEY_FRAME) {
+    obu_size = write_obu_header(OBU_SEQUENCE_HEADER, 0, data + 4);
+    obu_size += write_sequence_header_obu(cpi, data + 4 + obu_size);
+    mem_put_le32(data, obu_size);
+    data += obu_size + 4;
+  }
+
+  // write frame header obu, preceded by 4-byte size
+  frame_header_location = data + 4;
+  obu_size = write_obu_header(OBU_FRAME_HEADER, 0, frame_header_location);
+  frame_header_size = write_frame_header_obu(cpi, data + 4 + obu_size);
+  obu_size += frame_header_size;
+  mem_put_le32(data, obu_size);
+  data += obu_size + 4;
+
+  if (cm->show_existing_frame) {
+    data_size = 0;
+  } else {
+    //  Each tile group obu will be preceded by 4-byte size of the tile group
+    //  obu
+    data_size =
+        write_tiles_in_tg_obus(cpi, data, &max_tile_size, &max_tile_col_size,
+                               frame_header_location - 4, obu_size + 4,
+                               1 /* cm->error_resilient_mode */);
+  }
+
+#endif
+
 #if CONFIG_EXT_TILE
   if (cm->large_scale_tile) {
     // Write the uncompressed header
-    write_uncompressed_header(cpi, &wb);
+    write_uncompressed_header_frame(cpi, &wb);
 
 #if CONFIG_EXT_REFS
     if (cm->show_existing_frame) {
@@ -4934,23 +5902,29 @@ void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
       // Number of bytes in tile size - 1
       aom_wb_write_literal(&wb, 0, 2);
     }
-    // Size of compressed header
-    aom_wb_write_literal(&wb, 0, 16);
-
-    uncompressed_header_size = (uint32_t)aom_wb_bytes_written(&wb);
-    data += uncompressed_header_size;
 
-    aom_clear_system_state();
-
-    // Write the compressed header
-    compressed_header_size = write_compressed_header(cpi, data);
-    data += compressed_header_size;
+    if (!use_compressed_header(cm)) {
+      uncompressed_hdr_size = (uint32_t)aom_wb_bytes_written(&wb);
+      aom_clear_system_state();
+      compressed_hdr_size = 0;
+    } else {
+      // Size of compressed header
+      aom_wb_write_literal(&wb, 0, 16);
+      uncompressed_hdr_size = (uint32_t)aom_wb_bytes_written(&wb);
+      aom_clear_system_state();
+      // Write the compressed header
+      compressed_hdr_size =
+          write_compressed_header(cpi, data + uncompressed_hdr_size);
+    }
+    data += uncompressed_hdr_size + compressed_hdr_size;
 
     // Write the encoded tile data
     data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
   } else {
 #endif  // CONFIG_EXT_TILE
+#if !CONFIG_OBU
     data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
+#endif
 #if CONFIG_EXT_TILE
   }
 #endif  // CONFIG_EXT_TILE
@@ -4972,9 +5946,9 @@ void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) {
       assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
       aom_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2);
     }
-    // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits.
-    assert(compressed_header_size <= 0xffff);
-    aom_wb_write_literal(&saved_wb, compressed_header_size, 16);
+    // TODO(jbb): Figure out what to do if compressed_hdr_size > 16 bits.
+    assert(compressed_hdr_size <= 0xffff);
+    aom_wb_write_literal(&saved_wb, compressed_hdr_size, 16);
   } else {
 #endif  // CONFIG_EXT_TILE
     data += data_size;
diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h
index 29c930356..76eb85116 100644
--- a/third_party/aom/av1/encoder/bitstream.h
+++ b/third_party/aom/av1/encoder/bitstream.h
@@ -18,12 +18,11 @@ extern "C" {
 
 #include "av1/encoder/encoder.h"
 
+struct aom_write_bit_buffer;
+
 #if CONFIG_REFERENCE_BUFFER
-void write_sequence_header(
-#if CONFIG_EXT_TILE
-    AV1_COMMON *const cm,
-#endif  // CONFIG_EXT_TILE
-    SequenceHeader *seq_params);
+void write_sequence_header(AV1_COMMON *const cm,
+                           struct aom_write_bit_buffer *wb);
 #endif
 
 void av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size);
diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h
index 7b6eb0b0e..8b6627825 100644
--- a/third_party/aom/av1/encoder/block.h
+++ b/third_party/aom/av1/encoder/block.h
@@ -18,6 +18,10 @@
 #include "av1/encoder/encint.h"
 #endif
 #include "av1/common/mvref_common.h"
+#include "av1/encoder/hash.h"
+#if CONFIG_DIST_8X8
+#include "aom/aomcx.h"
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -60,28 +64,52 @@ typedef struct macroblock_plane {
 #endif  // CONFIG_NEW_QUANT
 } MACROBLOCK_PLANE;
 
-/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
- * coefficient in this block was zero) or not. */
-typedef unsigned int av1_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
-                                   [COEFF_CONTEXTS][ENTROPY_TOKENS];
+typedef int av1_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+                          [TAIL_TOKENS];
 
-typedef struct {
-  int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
-  int16_t mode_context[MODE_CTX_REF_FRAMES];
 #if CONFIG_LV_MAP
-  // TODO(angiebird): Reduce the buffer size according to sb_type
+typedef struct {
+  int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
+  int nz_map_cost[SIG_COEF_CONTEXTS][2];
+  int eob_cost[EOB_COEF_CONTEXTS][2];
+  int dc_sign_cost[DC_SIGN_CONTEXTS][2];
+  int base_cost[NUM_BASE_LEVELS][COEFF_BASE_CONTEXTS][2];
+#if BR_NODE
+  int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
+  int br_cost[BASE_RANGE_SETS][LEVEL_CONTEXTS][2];
+#else   // BR_NODE
+  int lps_cost[LEVEL_CONTEXTS][2];
+#endif  // BR_NODE
+#if CONFIG_CTX1D
+  int eob_mode_cost[TX_CLASSES][2];
+  int empty_line_cost[TX_CLASSES][EMPTY_LINE_CONTEXTS][2];
+  int hv_eob_cost[TX_CLASSES][HV_EOB_CONTEXTS][2];
+#endif
+} LV_MAP_COEFF_COST;
+
+typedef struct {
   tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
   uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
   uint8_t txb_skip_ctx[MAX_MB_PLANE]
                       [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
   int dc_sign_ctx[MAX_MB_PLANE]
                  [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+} CB_COEFF_BUFFER;
+#endif
+
+typedef struct {
+  int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+  int16_t mode_context[MODE_CTX_REF_FRAMES];
+#if CONFIG_LV_MAP
+  // TODO(angiebird): Reduce the buffer size according to sb_type
+  tran_low_t *tcoeff[MAX_MB_PLANE];
+  uint16_t *eobs[MAX_MB_PLANE];
+  uint8_t *txb_skip_ctx[MAX_MB_PLANE];
+  int *dc_sign_ctx[MAX_MB_PLANE];
 #endif
   uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
-#if CONFIG_EXT_INTER
   int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
-#endif  // CONFIG_EXT_INTER
 } MB_MODE_INFO_EXT;
 
 typedef struct {
@@ -91,17 +119,41 @@ typedef struct {
   int row_max;
 } MvLimits;
 
-#if CONFIG_PALETTE
 typedef struct {
   uint8_t best_palette_color_map[MAX_SB_SQUARE];
   float kmeans_data_buf[2 * MAX_SB_SQUARE];
 } PALETTE_BUFFER;
-#endif  // CONFIG_PALETTE
+
+typedef struct {
+  TX_TYPE tx_type;
+  TX_SIZE tx_size;
+#if CONFIG_VAR_TX
+  TX_SIZE min_tx_size;
+  TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
+  uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+#endif  // CONFIG_VAR_TX
+#if CONFIG_TXK_SEL
+  TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#endif  // CONFIG_TXK_SEL
+  RD_STATS rd_stats;
+  uint32_t hash_value;
+} TX_RD_INFO;
+
+#define RD_RECORD_BUFFER_LEN 8
+typedef struct {
+  TX_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN];  // Circular buffer.
+  int index_start;
+  int num;
+  CRC_CALCULATOR crc_calculator;  // Hash function.
+} TX_RD_RECORD;
 
 typedef struct macroblock MACROBLOCK;
 struct macroblock {
   struct macroblock_plane plane[MAX_MB_PLANE];
 
+  // Save the transform RD search info.
+  TX_RD_RECORD tx_rd_record;
+
   MACROBLOCKD e_mbd;
   MB_MODE_INFO_EXT *mbmi_ext;
   int skip_block;
@@ -150,9 +202,7 @@ struct macroblock {
   uint8_t *left_pred_buf;
 #endif  // CONFIG_MOTION_VAR
 
-#if CONFIG_PALETTE
   PALETTE_BUFFER *palette_buffer;
-#endif  // CONFIG_PALETTE
 
   // These define limits to motion vector components to prevent them
   // from extending outside the UMV borders
@@ -169,8 +219,92 @@ struct macroblock {
   int skip_chroma_rd;
 #endif
 
-  // note that token_costs is the cost when eob node is skipped
-  av1_coeff_cost token_costs[TX_SIZES];
+#if CONFIG_LV_MAP
+  LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
+  uint16_t cb_offset;
+#endif
+
+  av1_coeff_cost token_head_costs[TX_SIZES];
+  av1_coeff_cost token_tail_costs[TX_SIZES];
+
+  // mode costs
+  int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
+  int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
+  int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
+  int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
+  int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
+
+  int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+  int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES];
+#if CONFIG_COMPOUND_SINGLEREF
+  int inter_singleref_comp_mode_cost[INTER_MODE_CONTEXTS]
+                                    [INTER_SINGLEREF_COMP_MODES];
+#endif  // CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_INTERINTRA
+  int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
+#endif  // CONFIG_INTERINTRA
+#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+  int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
+#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+  int motion_mode_cost1[BLOCK_SIZES_ALL][2];
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  int motion_mode_cost2[BLOCK_SIZES_ALL][OBMC_FAMILY_MODES];
+#endif
+#endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+  int ncobmc_mode_cost[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
+#endif  // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+  int intra_uv_mode_cost[INTRA_MODES][UV_INTRA_MODES];
+  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+#if CONFIG_EXT_PARTITION_TYPES
+  int partition_cost[PARTITION_CONTEXTS + CONFIG_UNPOISON_PARTITION_CTX]
+                    [EXT_PARTITION_TYPES];
+#else
+  int partition_cost[PARTITION_CONTEXTS + CONFIG_UNPOISON_PARTITION_CTX]
+                    [PARTITION_TYPES];
+#endif  // CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_MRC_TX
+  int mrc_mask_inter_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                         [PALETTE_COLORS];
+  int mrc_mask_intra_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                         [PALETTE_COLORS];
+#endif  // CONFIG_MRC_TX
+  int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+  int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+  int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                          [PALETTE_COLORS];
+  int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                           [PALETTE_COLORS];
+#if CONFIG_CFL
+  // The rate associated with each alpha codeword
+  int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
+#endif  // CONFIG_CFL
+  int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+#if CONFIG_EXT_TX
+#if CONFIG_LGT_FROM_PRED
+  int intra_lgt_cost[LGT_SIZES][INTRA_MODES][2];
+  int inter_lgt_cost[LGT_SIZES][2];
+#endif
+  int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+  int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+                         [TX_TYPES];
+#else
+  int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+  int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
+#endif  // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+#if CONFIG_INTRA_INTERP
+  int intra_filter_cost[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif  // CONFIG_INTRA_INTERP
+#endif  // CONFIG_EXT_INTRA
+#if CONFIG_LOOP_RESTORATION
+  int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
+#endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_INTRABC
+  int intrabc_cost[2];
+#endif  // CONFIG_INTRABC
 
   int optimize;
 
@@ -206,6 +340,8 @@ struct macroblock {
   int pvq_coded;  // Indicates whether pvq_info needs be stored to tokenize
 #endif
 #if CONFIG_DIST_8X8
+  int using_dist_8x8;
+  aom_tune_metric tune_metric;
 #if CONFIG_CB4X4
 #if CONFIG_HIGHBITDEPTH
   DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]);
@@ -214,10 +350,6 @@ struct macroblock {
 #endif
 #endif  // CONFIG_CB4X4
 #endif  // CONFIG_DIST_8X8
-#if CONFIG_CFL
-  // Whether luma needs to be stored during RDO.
-  int cfl_store_y;
-#endif
 };
 
 #ifdef __cplusplus
diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c
index b1c01b28e..4bbf0e5fb 100644
--- a/third_party/aom/av1/encoder/context_tree.c
+++ b/third_party/aom/av1/encoder/context_tree.c
@@ -22,19 +22,14 @@ static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = {
 #endif  // CONFIG_EXT_PARTITION
 };
 
-static void alloc_mode_context(AV1_COMMON *cm, int num_4x4_blk,
+static void alloc_mode_context(AV1_COMMON *cm, int num_pix,
 #if CONFIG_EXT_PARTITION_TYPES
                                PARTITION_TYPE partition,
 #endif
                                PICK_MODE_CONTEXT *ctx) {
-  const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
-  const int num_pix = num_blk * tx_size_2d[0];
   int i;
-#if CONFIG_CB4X4 && CONFIG_VAR_TX
-  ctx->num_4x4_blk = num_blk / 4;
-#else
+  const int num_blk = num_pix / 16;
   ctx->num_4x4_blk = num_blk;
-#endif
 
 #if CONFIG_EXT_PARTITION_TYPES
   ctx->partition = partition;
@@ -64,13 +59,15 @@ static void alloc_mode_context(AV1_COMMON *cm, int num_4x4_blk,
 #endif
   }
 
-#if CONFIG_PALETTE
   for (i = 0; i < 2; ++i) {
     CHECK_MEM_ERROR(
         cm, ctx->color_index_map[i],
         aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
   }
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  CHECK_MEM_ERROR(cm, ctx->mrc_mask,
+                  aom_memalign(32, num_pix * sizeof(*ctx->mrc_mask)));
+#endif  // CONFIG_MRC_TX
 }
 
 static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
@@ -98,80 +95,63 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
 #endif
   }
 
-#if CONFIG_PALETTE
   for (i = 0; i < 2; ++i) {
     aom_free(ctx->color_index_map[i]);
     ctx->color_index_map[i] = 0;
   }
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  aom_free(ctx->mrc_mask);
+  ctx->mrc_mask = 0;
+#endif  // CONFIG_MRC_TX
 }
 
-static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree,
-                                int num_4x4_blk) {
+static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, int num_pix) {
 #if CONFIG_EXT_PARTITION_TYPES
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_NONE, &tree->none);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ, &tree->horizontal[0]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[0]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->horizontal[1]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[1]);
+  alloc_mode_context(cm, num_pix, PARTITION_NONE, &tree->none);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_HORZ, &tree->horizontal[0]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_VERT, &tree->vertical[0]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_VERT, &tree->horizontal[1]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_VERT, &tree->vertical[1]);
 
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
-                     &tree->horizontala[0]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
-                     &tree->horizontala[1]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_A,
-                     &tree->horizontala[2]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_B,
-                     &tree->horizontalb[0]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
-                     &tree->horizontalb[1]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
-                     &tree->horizontalb[2]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
-                     &tree->verticala[0]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
-                     &tree->verticala[1]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_A,
-                     &tree->verticala[2]);
-  alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_B,
-                     &tree->verticalb[0]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
-                     &tree->verticalb[1]);
-  alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
-                     &tree->verticalb[2]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_A, &tree->horizontala[0]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_A, &tree->horizontala[1]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_HORZ_A, &tree->horizontala[2]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_HORZ_B, &tree->horizontalb[0]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_B, &tree->horizontalb[1]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_B, &tree->horizontalb[2]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_VERT_A, &tree->verticala[0]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_VERT_A, &tree->verticala[1]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_VERT_A, &tree->verticala[2]);
+  alloc_mode_context(cm, num_pix / 2, PARTITION_VERT_B, &tree->verticalb[0]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_VERT_B, &tree->verticalb[1]);
+  alloc_mode_context(cm, num_pix / 4, PARTITION_VERT_B, &tree->verticalb[2]);
   for (int i = 0; i < 4; ++i) {
-    alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
+    alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_4,
                        &tree->horizontal4[i]);
-    alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_4,
-                       &tree->vertical4[i]);
+    alloc_mode_context(cm, num_pix / 4, PARTITION_HORZ_4, &tree->vertical4[i]);
   }
 #if CONFIG_SUPERTX
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
-                     &tree->horizontal_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT, &tree->vertical_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_SPLIT, &tree->split_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_A,
-                     &tree->horizontala_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_B,
-                     &tree->horizontalb_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_A,
-                     &tree->verticala_supertx);
-  alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_B,
-                     &tree->verticalb_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_HORZ, &tree->horizontal_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_VERT, &tree->vertical_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_SPLIT, &tree->split_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_HORZ_A, &tree->horizontala_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_HORZ_B, &tree->horizontalb_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_VERT_A, &tree->verticala_supertx);
+  alloc_mode_context(cm, num_pix, PARTITION_VERT_B, &tree->verticalb_supertx);
 #endif  // CONFIG_SUPERTX
 #else
-  alloc_mode_context(cm, num_4x4_blk, &tree->none);
-  alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]);
-  alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]);
+  alloc_mode_context(cm, num_pix, &tree->none);
+  alloc_mode_context(cm, num_pix / 2, &tree->horizontal[0]);
+  alloc_mode_context(cm, num_pix / 2, &tree->vertical[0]);
 #if CONFIG_SUPERTX
-  alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx);
-  alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx);
-  alloc_mode_context(cm, num_4x4_blk, &tree->split_supertx);
+  alloc_mode_context(cm, num_pix, &tree->horizontal_supertx);
+  alloc_mode_context(cm, num_pix, &tree->vertical_supertx);
+  alloc_mode_context(cm, num_pix, &tree->split_supertx);
 #endif
 
-  if (num_4x4_blk > 4) {
-    alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[1]);
-    alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[1]);
+  if (num_pix > 16) {
+    alloc_mode_context(cm, num_pix / 2, &tree->horizontal[1]);
+    alloc_mode_context(cm, num_pix / 2, &tree->vertical[1]);
   } else {
     memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1]));
     memset(&tree->vertical[1], 0, sizeof(tree->vertical[1]));
@@ -217,8 +197,6 @@ static void free_tree_contexts(PC_TREE *tree) {
 // represents the state of our search.
 void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
   int i, j;
-// TODO(jingning): The pc_tree allocation is redundant. We can take out all
-// the leaf nodes after cb4x4 mode is enabled.
 #if CONFIG_CB4X4
 #if CONFIG_EXT_PARTITION
   const int tree_nodes_inc = 1024;
@@ -239,20 +217,21 @@ void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
 #endif  // CONFIG_EXT_PARTITION
   int pc_tree_index = 0;
   PC_TREE *this_pc;
-  PICK_MODE_CONTEXT *this_leaf;
   int square_index = 1;
   int nodes;
 
+#if !CONFIG_CB4X4
   aom_free(td->leaf_tree);
   CHECK_MEM_ERROR(cm, td->leaf_tree,
                   aom_calloc(leaf_nodes, sizeof(*td->leaf_tree)));
+  PICK_MODE_CONTEXT *this_leaf = &td->leaf_tree[0];
+#endif
   aom_free(td->pc_tree);
   CHECK_MEM_ERROR(cm, td->pc_tree,
                   aom_calloc(tree_nodes, sizeof(*td->pc_tree)));
-
   this_pc = &td->pc_tree[0];
-  this_leaf = &td->leaf_tree[0];
 
+#if !CONFIG_CB4X4
   // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
   // context so we only need to allocate 1 for each 8x8 block.
   for (i = 0; i < leaf_nodes; ++i) {
@@ -262,6 +241,7 @@ void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
     alloc_mode_context(cm, 16, &td->leaf_tree[i]);
 #endif
   }
+#endif
 
   // Sets up all the leaf nodes in the tree.
   for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
@@ -272,8 +252,10 @@ void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) {
 #else
     alloc_tree_contexts(cm, tree, 4);
 #endif
+#if !CONFIG_CB4X4
     tree->leaf_split[0] = this_leaf++;
     for (j = 1; j < 4; j++) tree->leaf_split[j] = tree->leaf_split[0];
+#endif
   }
 
   // Each node has 4 leaf nodes, fill each block_size level of the tree
@@ -311,29 +293,28 @@ void av1_free_pc_tree(ThreadData *td) {
 #else
   const int tree_nodes_inc = 256;
 #endif  // CONFIG_EXT_PARTITION
-  const int leaf_factor = 4;
 #else
   const int tree_nodes_inc = 0;
-  const int leaf_factor = 1;
 #endif
 
 #if CONFIG_EXT_PARTITION
-  const int leaf_nodes = 256 * leaf_factor;
   const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1;
 #else
-  const int leaf_nodes = 64 * leaf_factor;
   const int tree_nodes = tree_nodes_inc + 64 + 16 + 4 + 1;
 #endif  // CONFIG_EXT_PARTITION
   int i;
-
-  // Set up all 4x4 mode contexts
-  for (i = 0; i < leaf_nodes; ++i) free_mode_context(&td->leaf_tree[i]);
-
-  // Sets up all the leaf nodes in the tree.
   for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
-
   aom_free(td->pc_tree);
   td->pc_tree = NULL;
+#if !CONFIG_CB4X4
+  const int leaf_factor = 1;
+#if CONFIG_EXT_PARTITION
+  const int leaf_nodes = 256 * leaf_factor;
+#else
+  const int leaf_nodes = 64 * leaf_factor;
+#endif  // CONFIG_EXT_PARTITION
+  for (i = 0; i < leaf_nodes; ++i) free_mode_context(&td->leaf_tree[i]);
   aom_free(td->leaf_tree);
   td->leaf_tree = NULL;
+#endif
 }
diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h
index bcfcc274a..38052ba27 100644
--- a/third_party/aom/av1/encoder/context_tree.h
+++ b/third_party/aom/av1/encoder/context_tree.h
@@ -27,9 +27,10 @@ struct ThreadData;
 typedef struct {
   MODE_INFO mic;
   MB_MODE_INFO_EXT mbmi_ext;
-#if CONFIG_PALETTE
   uint8_t *color_index_map[2];
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  uint8_t *mrc_mask;
+#endif  // CONFIG_MRC_TX
 #if CONFIG_VAR_TX
   uint8_t *blk_skip[MAX_MB_PLANE];
 #endif
@@ -84,6 +85,7 @@ typedef struct PC_TREE {
   PICK_MODE_CONTEXT horizontal4[4];
   PICK_MODE_CONTEXT vertical4[4];
 #endif
+  // TODO(jingning): remove leaf_split[] when cb4x4 experiment flag is removed.
   union {
     struct PC_TREE *split[4];
     PICK_MODE_CONTEXT *leaf_split[4];
diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c
index 850b84ca9..a04d46b72 100644
--- a/third_party/aom/av1/encoder/dct.c
+++ b/third_party/aom/av1/encoder/dct.c
@@ -21,7 +21,8 @@
 #include "av1/common/av1_fwd_txfm1d.h"
 #include "av1/common/av1_fwd_txfm1d_cfg.h"
 #include "av1/common/idct.h"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
+    CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
 #include "av1/common/daala_tx.h"
 #endif
 
@@ -42,18 +43,6 @@ static INLINE void range_check(const tran_low_t *input, const int size,
 #endif
 }
 
-#if CONFIG_DAALA_DCT4
-static void fdct4(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[4];
-  od_coeff y[4];
-  for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i];
-  od_bin_fdct4(y, x, 1);
-  for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
-
 static void fdct4(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
   tran_low_t step[4];
@@ -89,19 +78,6 @@ static void fdct4(const tran_low_t *input, tran_low_t *output) {
 
   range_check(output, 4, 16);
 }
-#endif
-
-#if CONFIG_DAALA_DCT8
-static void fdct8(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[8];
-  od_coeff y[8];
-  for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
-  od_bin_fdct8(y, x, 1);
-  for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
 
 static void fdct8(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
@@ -180,7 +156,6 @@ static void fdct8(const tran_low_t *input, tran_low_t *output) {
 
   range_check(output, 8, 16);
 }
-#endif
 
 static void fdct16(const tran_low_t *input, tran_low_t *output) {
   tran_high_t temp;
@@ -755,7 +730,6 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) {
 }
 
 #ifndef AV1_DCT_GTEST
-
 static void fadst4(const tran_low_t *input, tran_low_t *output) {
   tran_high_t x0, x1, x2, x3;
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
@@ -796,18 +770,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
   output[3] = (tran_low_t)fdct_round_shift(s3);
 }
 
-#if CONFIG_DAALA_DCT8
-static void fadst8(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  od_coeff x[8];
-  od_coeff y[8];
-  for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i];
-  od_bin_fdst8(y, x, 1);
-  for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i];
-}
-
-#else
-
 static void fadst8(const tran_low_t *input, tran_low_t *output) {
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
 
@@ -878,7 +840,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
   output[6] = (tran_low_t)x5;
   output[7] = (tran_low_t)-x1;
 }
-#endif
 
 static void fadst16(const tran_low_t *input, tran_low_t *output) {
   tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
@@ -1066,9 +1027,27 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
 #if CONFIG_MRC_TX
 static void get_masked_residual32(const int16_t **input, int *input_stride,
                                   const uint8_t *pred, int pred_stride,
-                                  int16_t *masked_input) {
-  int mrc_mask[32 * 32];
-  get_mrc_mask(pred, pred_stride, mrc_mask, 32, 32, 32);
+                                  int16_t *masked_input,
+                                  TxfmParam *txfm_param) {
+  int n_masked_vals = 0;
+  uint8_t *mrc_mask;
+  uint8_t mask_tmp[32 * 32];
+  if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
+      (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
+    mrc_mask = txfm_param->mask;
+    n_masked_vals = get_mrc_diff_mask(*input, *input_stride, mrc_mask, 32, 32,
+                                      32, txfm_param->is_inter);
+  } else {
+    mrc_mask = mask_tmp;
+    n_masked_vals = get_mrc_pred_mask(pred, pred_stride, mrc_mask, 32, 32, 32,
+                                      txfm_param->is_inter);
+  }
+
+  // Do not use MRC_DCT if mask is invalid. DCT_DCT will be used instead.
+  if (!is_valid_mrc_mask(n_masked_vals, 32, 32)) {
+    *txfm_param->valid_mask = 0;
+    return;
+  }
   int32_t sum = 0;
   int16_t avg;
   // Get the masked average of the prediction
@@ -1077,7 +1056,7 @@ static void get_masked_residual32(const int16_t **input, int *input_stride,
       sum += mrc_mask[i * 32 + j] * (*input)[i * (*input_stride) + j];
     }
   }
-  avg = ROUND_POWER_OF_TWO_SIGNED(sum, 10);
+  avg = sum / n_masked_vals;
   // Replace all of the unmasked pixels in the prediction with the average
   // of the masked pixels
   for (int i = 0; i < 32; ++i) {
@@ -1087,16 +1066,24 @@ static void get_masked_residual32(const int16_t **input, int *input_stride,
   }
   *input = masked_input;
   *input_stride = 32;
+  *txfm_param->valid_mask = 1;
 }
 #endif  // CONFIG_MRC_TX
 
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_LGT_FROM_PRED
 static void flgt4(const tran_low_t *input, tran_low_t *output,
                   const tran_high_t *lgtmtx) {
-  if (!(input[0] | input[1] | input[2] | input[3])) {
-    output[0] = output[1] = output[2] = output[3] = 0;
+  if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+  // For DCT/ADST, use butterfly implementations
+  if (lgtmtx[0] == DCT4) {
+    fdct4(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST4) {
+    fadst4(input, output);
     return;
   }
+#endif  // CONFIG_LGT_FROM_PRED
 
   // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,4
   tran_high_t s[4] = { 0 };
@@ -1108,6 +1095,18 @@ static void flgt4(const tran_low_t *input, tran_low_t *output,
 
 static void flgt8(const tran_low_t *input, tran_low_t *output,
                   const tran_high_t *lgtmtx) {
+  if (!lgtmtx) assert(0);
+#if CONFIG_LGT_FROM_PRED
+  // For DCT/ADST, use butterfly implementations
+  if (lgtmtx[0] == DCT8) {
+    fdct8(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST8) {
+    fadst8(input, output);
+    return;
+  }
+#endif  // CONFIG_LGT_FROM_PRED
+
   // evaluate s[j] = sum of all lgtmtx[j][i]*input[i] over i=1,...,8
   tran_high_t s[8] = { 0 };
   for (int i = 0; i < 8; ++i)
@@ -1115,30 +1114,140 @@ static void flgt8(const tran_low_t *input, tran_low_t *output,
 
   for (int i = 0; i < 8; ++i) output[i] = (tran_low_t)fdct_round_shift(s[i]);
 }
+#endif  // CONFIG_LGT || CONFIG_LGT_FROM_PRED
 
-// The get_fwd_lgt functions return 1 if LGT is chosen to apply, and 0 otherwise
-int get_fwd_lgt4(transform_1d tx_orig, TxfmParam *txfm_param,
-                 const tran_high_t *lgtmtx[], int ntx) {
-  // inter/intra split
-  if (tx_orig == &fadst4) {
-    for (int i = 0; i < ntx; ++i)
-      lgtmtx[i] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
-    return 1;
+#if CONFIG_LGT_FROM_PRED
+static void flgt16up(const tran_low_t *input, tran_low_t *output,
+                     const tran_high_t *lgtmtx) {
+  if (lgtmtx[0] == DCT16) {
+    fdct16(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST16) {
+    fadst16(input, output);
+    return;
+  } else if (lgtmtx[0] == DCT32) {
+    fdct32(input, output);
+    return;
+  } else if (lgtmtx[0] == ADST32) {
+    fhalfright32(input, output);
+    return;
+  } else {
+    assert(0);
+  }
+}
+
+typedef void (*FlgtFunc)(const tran_low_t *input, tran_low_t *output,
+                         const tran_high_t *lgtmtx);
+
+static FlgtFunc flgt_func[4] = { flgt4, flgt8, flgt16up, flgt16up };
+
+typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
+                           const tran_high_t *lgtmtx[], int ntx);
+
+static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
+                                      get_lgt16up_from_pred,
+                                      get_lgt16up_from_pred };
+
+// this inline function corresponds to the up scaling before the first
+// transform in the av1_fht* functions
+static INLINE tran_low_t fwd_upscale_wrt_txsize(const tran_high_t val,
+                                                const TX_SIZE tx_size) {
+  switch (tx_size) {
+    case TX_4X4: return (tran_low_t)val << 4;
+    case TX_8X8:
+    case TX_4X16:
+    case TX_16X4:
+    case TX_8X32:
+    case TX_32X8: return (tran_low_t)val << 2;
+    case TX_4X8:
+    case TX_8X4:
+    case TX_8X16:
+    case TX_16X8: return (tran_low_t)fdct_round_shift(val * 4 * Sqrt2);
+    default: assert(0); break;
   }
   return 0;
 }
 
-int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param,
-                 const tran_high_t *lgtmtx[], int ntx) {
-  // inter/intra split
-  if (tx_orig == &fadst8) {
-    for (int i = 0; i < ntx; ++i)
-      lgtmtx[i] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
-    return 1;
+// This inline function corresponds to the bit shift after the second
+// transform in the av1_fht* functions
+static INLINE tran_low_t fwd_downscale_wrt_txsize(const tran_low_t val,
+                                                  const TX_SIZE tx_size) {
+  switch (tx_size) {
+    case TX_4X4: return (val + 1) >> 2;
+    case TX_4X8:
+    case TX_8X4:
+    case TX_8X8:
+    case TX_4X16:
+    case TX_16X4: return (val + (val < 0)) >> 1;
+    case TX_8X16:
+    case TX_16X8: return val;
+    case TX_8X32:
+    case TX_32X8: return ROUND_POWER_OF_TWO_SIGNED(val, 2);
+    default: assert(0); break;
   }
   return 0;
 }
-#endif  // CONFIG_LGT
+
+void flgt2d_from_pred_c(const int16_t *input, tran_low_t *output, int stride,
+                        TxfmParam *txfm_param) {
+  const TX_SIZE tx_size = txfm_param->tx_size;
+  const int w = tx_size_wide[tx_size];
+  const int h = tx_size_high[tx_size];
+  const int wlog2 = tx_size_wide_log2[tx_size];
+  const int hlog2 = tx_size_high_log2[tx_size];
+  assert(w <= 8 || h <= 8);
+
+  int i, j;
+  tran_low_t out[256];  // max size: 8x32 and 32x8
+  tran_low_t temp_in[32], temp_out[32];
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
+  get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
+
+  // For forward transforms, to be consistent with av1_fht functions, we apply
+  // short transform first and long transform second.
+  if (w < h) {
+    // Row transforms
+    for (i = 0; i < h; ++i) {
+      for (j = 0; j < w; ++j)
+        temp_in[j] = fwd_upscale_wrt_txsize(input[i * stride + j], tx_size);
+      flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]);
+      // right shift of 2 bits here in fht8x16 and fht16x8
+      for (j = 0; j < w; ++j)
+        out[j * h + i] = (tx_size == TX_16X8 || tx_size == TX_8X16)
+                             ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2)
+                             : temp_out[j];
+    }
+    // Column transforms
+    for (i = 0; i < w; ++i) {
+      for (j = 0; j < h; ++j) temp_in[j] = out[j + i * h];
+      flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]);
+      for (j = 0; j < h; ++j)
+        output[j * w + i] = fwd_downscale_wrt_txsize(temp_out[j], tx_size);
+    }
+  } else {
+    // Column transforms
+    for (i = 0; i < w; ++i) {
+      for (j = 0; j < h; ++j)
+        temp_in[j] = fwd_upscale_wrt_txsize(input[j * stride + i], tx_size);
+      flgt_func[hlog2 - 2](temp_in, temp_out, lgtmtx_col[0]);
+      // fht8x16 and fht16x8 have right shift of 2 bits here
+      for (j = 0; j < h; ++j)
+        out[j * w + i] = (tx_size == TX_16X8 || tx_size == TX_8X16)
+                             ? ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2)
+                             : temp_out[j];
+    }
+    // Row transforms
+    for (i = 0; i < h; ++i) {
+      for (j = 0; j < w; ++j) temp_in[j] = out[j + i * w];
+      flgt_func[wlog2 - 2](temp_in, temp_out, lgtmtx_row[0]);
+      for (j = 0; j < w; ++j)
+        output[j + i * w] = fwd_downscale_wrt_txsize(temp_out[j], tx_size);
+    }
+  }
+}
+#endif  // CONFIG_LGT_FROM_PRED
 
 #if CONFIG_EXT_TX
 // TODO(sarahparker) these functions will be removed once the highbitdepth
@@ -1148,34 +1257,29 @@ int get_fwd_lgt8(transform_1d tx_orig, TxfmParam *txfm_param,
 static void fidtx4(const tran_low_t *input, tran_low_t *output) {
   int i;
   for (i = 0; i < 4; ++i) {
-#if CONFIG_DAALA_DCT4
-    output[i] = input[i];
-#else
     output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
-#endif
   }
 }
 
 static void fidtx8(const tran_low_t *input, tran_low_t *output) {
   int i;
   for (i = 0; i < 8; ++i) {
-#if CONFIG_DAALA_DCT8
-    output[i] = input[i];
-#else
     output[i] = input[i] * 2;
-#endif
   }
 }
 
 static void fidtx16(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 16; ++i)
+  for (i = 0; i < 16; ++i) {
     output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+  }
 }
 
 static void fidtx32(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+  for (i = 0; i < 32; ++i) {
+    output[i] = input[i] * 4;
+  }
 }
 
 static void copy_block(const int16_t *src, int src_stride, int l, int w,
@@ -1238,7 +1342,7 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w,
 }
 
 static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
-                             int16_t *buff, int tx_type) {
+                             int16_t *buff, TX_TYPE tx_type) {
   switch (tx_type) {
 #if CONFIG_MRC_TX
     case MRC_DCT:
@@ -1278,7 +1382,7 @@ static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
 
 void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
                   TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -1293,6 +1397,26 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
   {
     static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT4
+      { daala_fdct4, daala_fdct4 },  // DCT_DCT
+      { daala_fdst4, daala_fdct4 },  // ADST_DCT
+      { daala_fdct4, daala_fdst4 },  // DCT_ADST
+      { daala_fdst4, daala_fdst4 },  // ADST_ADST
+#if CONFIG_EXT_TX
+      { daala_fdst4, daala_fdct4 },  // FLIPADST_DCT
+      { daala_fdct4, daala_fdst4 },  // DCT_FLIPADST
+      { daala_fdst4, daala_fdst4 },  // FLIPADST_FLIPADST
+      { daala_fdst4, daala_fdst4 },  // ADST_FLIPADST
+      { daala_fdst4, daala_fdst4 },  // FLIPADST_ADST
+      { daala_idtx4, daala_idtx4 },  // IDTX
+      { daala_fdct4, daala_idtx4 },  // V_DCT
+      { daala_idtx4, daala_fdct4 },  // H_DCT
+      { daala_fdst4, daala_idtx4 },  // V_ADST
+      { daala_idtx4, daala_fdst4 },  // H_ADST
+      { daala_fdst4, daala_idtx4 },  // V_FLIPADST
+      { daala_idtx4, daala_fdst4 },  // H_FLIPADST
+#endif
+#else
       { fdct4, fdct4 },    // DCT_DCT
       { fadst4, fdct4 },   // ADST_DCT
       { fdct4, fadst4 },   // DCT_ADST
@@ -1311,6 +1435,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
       { fadst4, fidtx4 },  // V_FLIPADST
       { fidtx4, fadst4 },  // H_FLIPADST
 #endif
+#endif
     };
     const transform_2d ht = FHT[tx_type];
     tran_low_t out[4 * 4];
@@ -1325,10 +1450,10 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
 #if CONFIG_LGT
     // Choose LGT adaptive to the prediction. We may apply different LGTs for
     // different rows/columns, indicated by the pointers to 2D arrays
-    const tran_high_t *lgtmtx_col[4];
-    const tran_high_t *lgtmtx_row[4];
-    int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 4);
-    int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 4);
+    const tran_high_t *lgtmtx_col[1];
+    const tran_high_t *lgtmtx_row[1];
+    int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+    int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
     // Columns
@@ -1340,7 +1465,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 #if CONFIG_LGT
       if (use_lgt_col)
-        flgt4(temp_in, temp_out, lgtmtx_col[i]);
+        flgt4(temp_in, temp_out, lgtmtx_col[0]);
       else
 #endif
         ht.cols(temp_in, temp_out);
@@ -1352,7 +1477,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
       for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
 #if CONFIG_LGT
       if (use_lgt_row)
-        flgt4(temp_in, temp_out, lgtmtx_row[i]);
+        flgt4(temp_in, temp_out, lgtmtx_row[0]);
       else
 #endif
         ht.rows(temp_in, temp_out);
@@ -1369,7 +1494,7 @@ void av1_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
                   TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1408,10 +1533,10 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[4];
-  const tran_high_t *lgtmtx_row[8];
-  int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 4);
-  int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 8);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
   // Rows
@@ -1421,7 +1546,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
           (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
 #if CONFIG_LGT
     if (use_lgt_row)
-      flgt4(temp_in, temp_out, lgtmtx_row[i]);
+      flgt4(temp_in, temp_out, lgtmtx_row[0]);
     else
 #endif
       ht.rows(temp_in, temp_out);
@@ -1433,7 +1558,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
 #if CONFIG_LGT
     if (use_lgt_col)
-      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+      flgt8(temp_in, temp_out, lgtmtx_col[0]);
     else
 #endif
       ht.cols(temp_in, temp_out);
@@ -1445,7 +1570,7 @@ void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
                   TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1484,10 +1609,10 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[8];
-  const tran_high_t *lgtmtx_row[4];
-  int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 8);
-  int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 4);
+  const tran_high_t *lgtmtx_col[1];
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // Columns
@@ -1497,7 +1622,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
           (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
 #if CONFIG_LGT
     if (use_lgt_col)
-      flgt4(temp_in, temp_out, lgtmtx_col[i]);
+      flgt4(temp_in, temp_out, lgtmtx_col[0]);
     else
 #endif
       ht.cols(temp_in, temp_out);
@@ -1509,7 +1634,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
 #if CONFIG_LGT
     if (use_lgt_row)
-      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+      flgt8(temp_in, temp_out, lgtmtx_row[0]);
     else
 #endif
       ht.rows(temp_in, temp_out);
@@ -1521,7 +1646,7 @@ void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1560,8 +1685,8 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[16];
-  int use_lgt_row = get_fwd_lgt4(ht.rows, txfm_param, lgtmtx_row, 16);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
 #endif
 
   // Rows
@@ -1569,7 +1694,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
 #if CONFIG_LGT
     if (use_lgt_row)
-      flgt4(temp_in, temp_out, lgtmtx_row[i]);
+      flgt4(temp_in, temp_out, lgtmtx_row[0]);
     else
 #endif
       ht.rows(temp_in, temp_out);
@@ -1588,7 +1713,7 @@ void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1627,8 +1752,8 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[16];
-  int use_lgt_col = get_fwd_lgt4(ht.cols, txfm_param, lgtmtx_col, 16);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
 #endif
 
   // Columns
@@ -1636,7 +1761,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
 #if CONFIG_LGT
     if (use_lgt_col)
-      flgt4(temp_in, temp_out, lgtmtx_col[i]);
+      flgt4(temp_in, temp_out, lgtmtx_col[0]);
     else
 #endif
       ht.cols(temp_in, temp_out);
@@ -1655,7 +1780,7 @@ void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1694,8 +1819,8 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[16];
-  int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 16);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // Rows
@@ -1705,7 +1830,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
           (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
 #if CONFIG_LGT
     if (use_lgt_row)
-      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+      flgt8(temp_in, temp_out, lgtmtx_row[0]);
     else
 #endif
       ht.rows(temp_in, temp_out);
@@ -1724,7 +1849,7 @@ void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1763,8 +1888,8 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[16];
-  int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 16);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
 #endif
 
   // Columns
@@ -1774,7 +1899,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
           (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
 #if CONFIG_LGT
     if (use_lgt_col)
-      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+      flgt8(temp_in, temp_out, lgtmtx_col[0]);
     else
 #endif
       ht.cols(temp_in, temp_out);
@@ -1793,7 +1918,7 @@ void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1832,8 +1957,8 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_row[32];
-  int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 32);
+  const tran_high_t *lgtmtx_row[1];
+  int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
   // Rows
@@ -1841,7 +1966,7 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
 #if CONFIG_LGT
     if (use_lgt_row)
-      flgt8(temp_in, temp_out, lgtmtx_row[i]);
+      flgt8(temp_in, temp_out, lgtmtx_row[0]);
     else
 #endif
       ht.rows(temp_in, temp_out);
@@ -1855,12 +1980,12 @@ void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n4; ++j)
       output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
   }
-  // Note: overall scale factor of transform is 4 times unitary
+  // Note: overall scale factor of transform is 8 times unitary
 }
 
 void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
                    TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1899,8 +2024,8 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-  const tran_high_t *lgtmtx_col[32];
-  int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 32);
+  const tran_high_t *lgtmtx_col[1];
+  int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
 #endif
 
   // Columns
@@ -1908,7 +2033,7 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
 #if CONFIG_LGT
     if (use_lgt_col)
-      flgt8(temp_in, temp_out, lgtmtx_col[i]);
+      flgt8(temp_in, temp_out, lgtmtx_col[0]);
     else
 #endif
       ht.cols(temp_in, temp_out);
@@ -1922,12 +2047,12 @@ void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < n4; ++j)
       output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
   }
-  // Note: overall scale factor of transform is 4 times unitary
+  // Note: overall scale factor of transform is 8 times unitary
 }
 
 void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
                     TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -1986,7 +2111,7 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
 
 void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
                     TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -2043,134 +2168,9 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
   // Note: overall scale factor of transform is 4 times unitary
 }
 
-void av1_fdct8x8_quant_c(const int16_t *input, int stride,
-                         tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                         int skip_block, const int16_t *zbin_ptr,
-                         const int16_t *round_ptr, const int16_t *quant_ptr,
-                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
-                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                         uint16_t *eob_ptr, const int16_t *scan,
-                         const int16_t *iscan
-#if CONFIG_AOM_QM
-                         ,
-                         const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
-#endif
-                         ) {
-  int eob = -1;
-
-  int i, j;
-  tran_low_t intermediate[64];
-
-  // Transform columns
-  {
-    tran_low_t *output = intermediate;
-    tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;  // canbe16
-    tran_high_t t0, t1, t2, t3;                  // needs32
-    tran_high_t x0, x1, x2, x3;                  // canbe16
-
-    for (i = 0; i < 8; i++) {
-      // stage 1
-      s0 = (input[0 * stride] + input[7 * stride]) * 4;
-      s1 = (input[1 * stride] + input[6 * stride]) * 4;
-      s2 = (input[2 * stride] + input[5 * stride]) * 4;
-      s3 = (input[3 * stride] + input[4 * stride]) * 4;
-      s4 = (input[3 * stride] - input[4 * stride]) * 4;
-      s5 = (input[2 * stride] - input[5 * stride]) * 4;
-      s6 = (input[1 * stride] - input[6 * stride]) * 4;
-      s7 = (input[0 * stride] - input[7 * stride]) * 4;
-
-      // fdct4(step, step);
-      x0 = s0 + s3;
-      x1 = s1 + s2;
-      x2 = s1 - s2;
-      x3 = s0 - s3;
-      t0 = (x0 + x1) * cospi_16_64;
-      t1 = (x0 - x1) * cospi_16_64;
-      t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
-      t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
-      output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
-      output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
-      output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
-      output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
-
-      // stage 2
-      t0 = (s6 - s5) * cospi_16_64;
-      t1 = (s6 + s5) * cospi_16_64;
-      t2 = fdct_round_shift(t0);
-      t3 = fdct_round_shift(t1);
-
-      // stage 3
-      x0 = s4 + t2;
-      x1 = s4 - t2;
-      x2 = s7 - t3;
-      x3 = s7 + t3;
-
-      // stage 4
-      t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
-      t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
-      t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
-      t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
-      output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
-      output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
-      output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
-      output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
-      input++;
-      output++;
-    }
-  }
-
-  // Rows
-  for (i = 0; i < 8; ++i) {
-    fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
-    for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
-  }
-
-  // TODO(jingning) Decide the need of these arguments after the
-  // quantization process is completed.
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)iscan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  if (!skip_block) {
-    // Quantization pass: All coefficients with index >= zero_flag are
-    // skippable. Note: zero_flag can be zero.
-    for (i = 0; i < n_coeffs; i++) {
-      const int rc = scan[i];
-      const int coeff = coeff_ptr[rc];
-#if CONFIG_AOM_QM
-      const qm_val_t wt = qm_ptr[rc];
-      const qm_val_t iwt = iqm_ptr[rc];
-      const int dequant =
-          (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
-          AOM_QM_BITS;
-#endif
-      const int coeff_sign = (coeff >> 31);
-      const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-      int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-      int tmp32;
-#if CONFIG_AOM_QM
-      tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
-      qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
-#else
-      tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
-      qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
-      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-#endif
-
-      if (tmp32) eob = i;
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
 void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
                   TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -2185,6 +2185,26 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
   {
     static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT8
+      { daala_fdct8, daala_fdct8 },  // DCT_DCT
+      { daala_fdst8, daala_fdct8 },  // ADST_DCT
+      { daala_fdct8, daala_fdst8 },  // DCT_ADST
+      { daala_fdst8, daala_fdst8 },  // ADST_ADST
+#if CONFIG_EXT_TX
+      { daala_fdst8, daala_fdct8 },  // FLIPADST_DCT
+      { daala_fdct8, daala_fdst8 },  // DCT_FLIPADST
+      { daala_fdst8, daala_fdst8 },  // FLIPADST_FLIPADST
+      { daala_fdst8, daala_fdst8 },  // ADST_FLIPADST
+      { daala_fdst8, daala_fdst8 },  // FLIPADST_ADST
+      { daala_idtx8, daala_idtx8 },  // IDTX
+      { daala_fdct8, daala_idtx8 },  // V_DCT
+      { daala_idtx8, daala_fdct8 },  // H_DCT
+      { daala_fdst8, daala_idtx8 },  // V_ADST
+      { daala_idtx8, daala_fdst8 },  // H_ADST
+      { daala_fdst8, daala_idtx8 },  // V_FLIPADST
+      { daala_idtx8, daala_fdst8 },  // H_FLIPADST
+#endif
+#else
       { fdct8, fdct8 },    // DCT_DCT
       { fadst8, fdct8 },   // ADST_DCT
       { fdct8, fadst8 },   // DCT_ADST
@@ -2203,6 +2223,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
       { fadst8, fidtx8 },  // V_FLIPADST
       { fidtx8, fadst8 },  // H_FLIPADST
 #endif
+#endif
     };
     const transform_2d ht = FHT[tx_type];
     tran_low_t out[64];
@@ -2215,10 +2236,10 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 
 #if CONFIG_LGT
-    const tran_high_t *lgtmtx_col[8];
-    const tran_high_t *lgtmtx_row[8];
-    int use_lgt_col = get_fwd_lgt8(ht.cols, txfm_param, lgtmtx_col, 8);
-    int use_lgt_row = get_fwd_lgt8(ht.rows, txfm_param, lgtmtx_row, 8);
+    const tran_high_t *lgtmtx_col[1];
+    const tran_high_t *lgtmtx_row[1];
+    int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
+    int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
 #endif
 
     // Columns
@@ -2230,7 +2251,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
 #endif
 #if CONFIG_LGT
       if (use_lgt_col)
-        flgt8(temp_in, temp_out, lgtmtx_col[i]);
+        flgt8(temp_in, temp_out, lgtmtx_col[0]);
       else
 #endif
         ht.cols(temp_in, temp_out);
@@ -2242,7 +2263,7 @@ void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
       for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
 #if CONFIG_LGT
       if (use_lgt_row)
-        flgt8(temp_in, temp_out, lgtmtx_row[i]);
+        flgt8(temp_in, temp_out, lgtmtx_row[0]);
       else
 #endif
         ht.rows(temp_in, temp_out);
@@ -2315,7 +2336,7 @@ void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
 
 void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
                     TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -2323,6 +2344,26 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d FHT[] = {
+#if CONFIG_DAALA_DCT16
+    { daala_fdct16, daala_fdct16 },  // DCT_DCT
+    { daala_fdst16, daala_fdct16 },  // ADST_DCT
+    { daala_fdct16, daala_fdst16 },  // DCT_ADST
+    { daala_fdst16, daala_fdst16 },  // ADST_ADST
+#if CONFIG_EXT_TX
+    { daala_fdst16, daala_fdct16 },  // FLIPADST_DCT
+    { daala_fdct16, daala_fdst16 },  // DCT_FLIPADST
+    { daala_fdst16, daala_fdst16 },  // FLIPADST_FLIPADST
+    { daala_fdst16, daala_fdst16 },  // ADST_FLIPADST
+    { daala_fdst16, daala_fdst16 },  // FLIPADST_ADST
+    { daala_idtx16, daala_idtx16 },  // IDTX
+    { daala_fdct16, daala_idtx16 },  // V_DCT
+    { daala_idtx16, daala_fdct16 },  // H_DCT
+    { daala_fdst16, daala_idtx16 },  // V_ADST
+    { daala_idtx16, daala_fdst16 },  // H_ADST
+    { daala_fdst16, daala_idtx16 },  // V_FLIPADST
+    { daala_idtx16, daala_fdst16 },  // H_FLIPADST
+#endif
+#else
     { fdct16, fdct16 },    // DCT_DCT
     { fadst16, fdct16 },   // ADST_DCT
     { fdct16, fadst16 },   // DCT_ADST
@@ -2341,6 +2382,7 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
     { fadst16, fidtx16 },  // V_FLIPADST
     { fidtx16, fadst16 },  // H_FLIPADST
 #endif
+#endif
   };
   const transform_2d ht = FHT[tx_type];
   tran_low_t out[256];
@@ -2354,17 +2396,34 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
 
   // Columns
   for (i = 0; i < 16; ++i) {
-    for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
+    for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+      temp_in[j] = input[j * stride + i] * 16;
+#else
+      temp_in[j] = input[j * stride + i] * 4;
+#endif
+    }
     ht.cols(temp_in, temp_out);
-    for (j = 0; j < 16; ++j)
+    for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+      out[j * 16 + i] = temp_out[j];
+#else
       out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+#endif
+    }
   }
 
   // Rows
   for (i = 0; i < 16; ++i) {
     for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
     ht.rows(temp_in, temp_out);
-    for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
+    for (j = 0; j < 16; ++j) {
+#if CONFIG_DAALA_DCT16
+      output[j + i * 16] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+#else
+      output[j + i * 16] = temp_out[j];
+#endif
+    }
   }
 }
 
@@ -2375,12 +2434,32 @@ void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
 
 void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
                     TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_DCT_ONLY
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d FHT[] = {
-    { fdct32, fdct32 },  // DCT_DCT
+#if CONFIG_DAALA_DCT32
+    { daala_fdct32, daala_fdct32 },  // DCT_DCT
+#if CONFIG_EXT_TX
+    { daala_fdst32, daala_fdct32 },  // ADST_DCT
+    { daala_fdct32, daala_fdst32 },  // DCT_ADST
+    { daala_fdst32, daala_fdst32 },  // ADST_ADST
+    { daala_fdst32, daala_fdct32 },  // FLIPADST_DCT
+    { daala_fdct32, daala_fdst32 },  // DCT_FLIPADST
+    { daala_fdst32, daala_fdst32 },  // FLIPADST_FLIPADST
+    { daala_fdst32, daala_fdst32 },  // ADST_FLIPADST
+    { daala_fdst32, daala_fdst32 },  // FLIPADST_ADST
+    { daala_idtx32, daala_idtx32 },  // IDTX
+    { daala_fdct32, daala_idtx32 },  // V_DCT
+    { daala_idtx32, daala_fdct32 },  // H_DCT
+    { daala_fdst32, daala_idtx32 },  // V_ADST
+    { daala_idtx32, daala_fdst32 },  // H_ADST
+    { daala_fdst32, daala_idtx32 },  // V_FLIPADST
+    { daala_idtx32, daala_fdst32 },  // H_FLIPADST
+#endif
+#else
+    { fdct32, fdct32 },              // DCT_DCT
 #if CONFIG_EXT_TX
     { fhalfright32, fdct32 },        // ADST_DCT
     { fdct32, fhalfright32 },        // DCT_ADST
@@ -2398,6 +2477,7 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
     { fhalfright32, fidtx32 },       // V_FLIPADST
     { fidtx32, fhalfright32 },       // H_FLIPADST
 #endif
+#endif
 #if CONFIG_MRC_TX
     { fdct32, fdct32 },  // MRC_TX
 #endif                   // CONFIG_MRC_TX
@@ -2416,27 +2496,41 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
   if (tx_type == MRC_DCT) {
     int16_t masked_input[32 * 32];
     get_masked_residual32(&input, &stride, txfm_param->dst, txfm_param->stride,
-                          masked_input);
+                          masked_input, txfm_param);
   }
 #endif  // CONFIG_MRC_TX
 
   // Columns
   for (i = 0; i < 32; ++i) {
-    for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+    for (j = 0; j < 32; ++j) {
+#if CONFIG_DAALA_DCT32
+      temp_in[j] = input[j * stride + i] * 16;
+#else
+      temp_in[j] = input[j * stride + i] * 4;
+#endif
+    }
     ht.cols(temp_in, temp_out);
-    for (j = 0; j < 32; ++j)
+    for (j = 0; j < 32; ++j) {
+#if CONFIG_DAALA_DCT32
+      out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+#else
       out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
+#endif
+    }
   }
 
   // Rows
   for (i = 0; i < 32; ++i) {
     for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
     ht.rows(temp_in, temp_out);
-    for (j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j];
+    for (j = 0; j < 32; ++j) {
+      output[j + i * 32] = temp_out[j];
+    }
   }
 }
 
 #if CONFIG_TX64X64
+#if !CONFIG_DAALA_DCT64
 #if CONFIG_EXT_TX
 static void fidtx64(const tran_low_t *input, tran_low_t *output) {
   int i;
@@ -2475,10 +2569,11 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
   av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64);
   for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
 }
+#endif
 
 void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
                     TxfmParam *txfm_param) {
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif  // CONFIG_MRC_TX
@@ -2486,7 +2581,27 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
   assert(tx_type == DCT_DCT);
 #endif
   static const transform_2d FHT[] = {
-    { fdct64_col, fdct64_row },  // DCT_DCT
+#if CONFIG_DAALA_DCT64
+    { daala_fdct64, daala_fdct64 },  // DCT_DCT
+#if CONFIG_EXT_TX
+    { daala_fdst64, daala_fdct64 },  // ADST_DCT
+    { daala_fdct64, daala_fdst64 },  // DCT_ADST
+    { daala_fdst64, daala_fdst64 },  // ADST_ADST
+    { daala_fdst64, daala_fdct64 },  // FLIPADST_DCT
+    { daala_fdct64, daala_fdst64 },  // DCT_FLIPADST
+    { daala_fdst64, daala_fdst64 },  // FLIPADST_FLIPADST
+    { daala_fdst64, daala_fdst64 },  // ADST_FLIPADST
+    { daala_fdst64, daala_fdst64 },  // FLIPADST_ADST
+    { daala_idtx64, daala_idtx64 },  // IDTX
+    { daala_fdct64, daala_idtx64 },  // V_DCT
+    { daala_idtx64, daala_fdct64 },  // H_DCT
+    { daala_fdst64, daala_idtx64 },  // V_ADST
+    { daala_idtx64, daala_fdst64 },  // H_ADST
+    { daala_fdst64, daala_idtx64 },  // V_FLIPADST
+    { daala_idtx64, daala_fdst64 },  // H_FLIPADST
+#endif                               // CONFIG_EXT_TX
+#else
+    { fdct64_col, fdct64_row },      // DCT_DCT
 #if CONFIG_EXT_TX
     { fhalfright64, fdct64_row },    // ADST_DCT
     { fdct64_col, fhalfright64 },    // DCT_ADST
@@ -2503,7 +2618,8 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
     { fidtx64, fhalfright64 },       // H_ADST
     { fhalfright64, fidtx64 },       // V_FLIPADST
     { fidtx64, fhalfright64 },       // H_FLIPADST
-#endif
+#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_DAALA_DCT64
   };
   const transform_2d ht = FHT[tx_type];
   tran_low_t out[4096];
@@ -2516,10 +2632,18 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
 
   // Columns
   for (i = 0; i < 64; ++i) {
+#if CONFIG_DAALA_DCT64
+    for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i] * 16;
+    ht.cols(temp_in, temp_out);
+    for (j = 0; j < 64; ++j)
+      out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 3;
+
+#else
     for (j = 0; j < 64; ++j) temp_in[j] = input[j * stride + i];
     ht.cols(temp_in, temp_out);
     for (j = 0; j < 64; ++j)
       out[j * 64 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+#endif
   }
 
   // Rows
@@ -2527,8 +2651,129 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
     for (j = 0; j < 64; ++j) temp_in[j] = out[j + i * 64];
     ht.rows(temp_in, temp_out);
     for (j = 0; j < 64; ++j)
+#if CONFIG_DAALA_DCT64
+      output[j + i * 64] = temp_out[j];
+#else
       output[j + i * 64] =
           (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+#endif
+  }
+}
+
+void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
+                    TxfmParam *txfm_param) {
+  const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif  // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+  assert(tx_type == DCT_DCT);
+#endif
+  static const transform_2d FHT[] = {
+    { fdct32, fdct64_row },  // DCT_DCT
+#if CONFIG_EXT_TX
+    { fhalfright32, fdct64_row },    // ADST_DCT
+    { fdct32, fhalfright64 },        // DCT_ADST
+    { fhalfright32, fhalfright64 },  // ADST_ADST
+    { fhalfright32, fdct64_row },    // FLIPADST_DCT
+    { fdct32, fhalfright64 },        // DCT_FLIPADST
+    { fhalfright32, fhalfright64 },  // FLIPADST_FLIPADST
+    { fhalfright32, fhalfright64 },  // ADST_FLIPADST
+    { fhalfright32, fhalfright64 },  // FLIPADST_ADST
+    { fidtx32, fidtx64 },            // IDTX
+    { fdct32, fidtx64 },             // V_DCT
+    { fidtx32, fdct64_row },         // H_DCT
+    { fhalfright32, fidtx64 },       // V_ADST
+    { fidtx32, fhalfright64 },       // H_ADST
+    { fhalfright32, fidtx64 },       // V_FLIPADST
+    { fidtx32, fhalfright64 },       // H_FLIPADST
+#endif                               // CONFIG_EXT_TX
+  };
+  const transform_2d ht = FHT[tx_type];
+  tran_low_t out[2048];
+  int i, j;
+  tran_low_t temp_in[64], temp_out[64];
+  const int n = 32;
+  const int n2 = 64;
+#if CONFIG_EXT_TX
+  int16_t flipped_input[32 * 64];
+  maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+#endif
+
+  // Columns
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j)
+      temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+    ht.cols(temp_in, temp_out);
+    for (j = 0; j < n; ++j)
+      out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+  }
+
+  // Rows
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+    ht.rows(temp_in, temp_out);
+    for (j = 0; j < n2; ++j)
+      output[j + i * n2] =
+          (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+  }
+}
+
+void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
+                    TxfmParam *txfm_param) {
+  const TX_TYPE tx_type = txfm_param->tx_type;
+#if CONFIG_MRC_TX
+  assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
+#endif  // CONFIG_MRC_TX
+#if CONFIG_DCT_ONLY
+  assert(tx_type == DCT_DCT);
+#endif
+  static const transform_2d FHT[] = {
+    { fdct64_row, fdct32 },  // DCT_DCT
+#if CONFIG_EXT_TX
+    { fhalfright64, fdct32 },        // ADST_DCT
+    { fdct64_row, fhalfright32 },    // DCT_ADST
+    { fhalfright64, fhalfright32 },  // ADST_ADST
+    { fhalfright64, fdct32 },        // FLIPADST_DCT
+    { fdct64_row, fhalfright32 },    // DCT_FLIPADST
+    { fhalfright64, fhalfright32 },  // FLIPADST_FLIPADST
+    { fhalfright64, fhalfright32 },  // ADST_FLIPADST
+    { fhalfright64, fhalfright32 },  // FLIPADST_ADST
+    { fidtx64, fidtx32 },            // IDTX
+    { fdct64_row, fidtx32 },         // V_DCT
+    { fidtx64, fdct32 },             // H_DCT
+    { fhalfright64, fidtx32 },       // V_ADST
+    { fidtx64, fhalfright32 },       // H_ADST
+    { fhalfright64, fidtx32 },       // V_FLIPADST
+    { fidtx64, fhalfright32 },       // H_FLIPADST
+#endif                               // CONFIG_EXT_TX
+  };
+  const transform_2d ht = FHT[tx_type];
+  tran_low_t out[32 * 64];
+  int i, j;
+  tran_low_t temp_in[64], temp_out[64];
+  const int n = 32;
+  const int n2 = 64;
+#if CONFIG_EXT_TX
+  int16_t flipped_input[32 * 64];
+  maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+#endif
+
+  // Rows
+  for (i = 0; i < n2; ++i) {
+    for (j = 0; j < n; ++j)
+      temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * Sqrt2);
+    ht.rows(temp_in, temp_out);
+    for (j = 0; j < n; ++j)
+      out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+  }
+
+  // Columns
+  for (i = 0; i < n; ++i) {
+    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+    ht.cols(temp_in, temp_out);
+    for (j = 0; j < n2; ++j)
+      output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
   }
 }
 #endif  // CONFIG_TX64X64
@@ -2536,110 +2781,17 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
 #if CONFIG_EXT_TX
 // Forward identity transform.
 void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
-                    int bs, int tx_type) {
+                    int bsx, int bsy, TX_TYPE tx_type) {
   int r, c;
-  const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
+  const int pels = bsx * bsy;
+  const int shift = 3 - ((pels > 256) + (pels > 1024));
   if (tx_type == IDTX) {
-    for (r = 0; r < bs; ++r) {
-      for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] * (1 << shift);
+    for (r = 0; r < bsy; ++r) {
+      for (c = 0; c < bsx; ++c) coeff[c] = src_diff[c] * (1 << shift);
       src_diff += stride;
-      coeff += bs;
+      coeff += bsx;
     }
   }
 }
 #endif  // CONFIG_EXT_TX
-
-#if CONFIG_DPCM_INTRA
-void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                    tran_low_t *output) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[4];
-  for (int i = 0; i < 4; ++i)
-    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
-  ft(temp_in, output);
-}
-
-void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                    tran_low_t *output) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[8];
-  for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
-  ft(temp_in, output);
-}
-
-void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                     tran_low_t *output) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[16];
-  for (int i = 0; i < 16; ++i)
-    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
-  ft(temp_in, output);
-}
-
-void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                     tran_low_t *output) {
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
-                                      fidtx32 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[32];
-  for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
-  ft(temp_in, output);
-}
-
-#if CONFIG_HIGHBITDEPTH
-void av1_hbd_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                        tran_low_t *output, int dir) {
-  (void)dir;
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[4];
-  for (int i = 0; i < 4; ++i)
-    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2);
-  ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                        tran_low_t *output, int dir) {
-  (void)dir;
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[8];
-  for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4;
-  ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                         tran_low_t *output, int dir) {
-  (void)dir;
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[16];
-  for (int i = 0; i < 16; ++i)
-    temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2);
-  ft(temp_in, output);
-}
-
-void av1_hbd_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type,
-                         tran_low_t *output, int dir) {
-  (void)dir;
-  assert(tx_type < TX_TYPES_1D);
-  static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32,
-                                      fidtx32 };
-  const transform_1d ft = FHT[tx_type];
-  tran_low_t temp_in[32];
-  for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride];
-  ft(temp_in, output);
-}
-#endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_DPCM_INTRA
 #endif  // !AV1_DCT_GTEST
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
index d13eb42fb..f79a678fb 100644
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ b/third_party/aom/av1/encoder/encodeframe.c
@@ -81,10 +81,8 @@ static int check_intra_sb(const AV1_COMP *cpi, const TileInfo *const tile,
                           int mi_row, int mi_col, BLOCK_SIZE bsize,
                           PC_TREE *pc_tree);
 static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
-#if CONFIG_EXT_INTER
-                               int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                               int mi_row_pred, int mi_col_pred, int plane,
+                               int mi_row_ori, int mi_col_ori, int mi_row_pred,
+                               int mi_col_pred, int plane,
                                BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
 static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
                             PC_TREE *pc_tree);
@@ -273,6 +271,7 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
   const int mi_height = mi_size_high[bsize];
 
   set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
   set_skip_context(xd, mi_row, mi_col);
 #if CONFIG_VAR_TX
   xd->above_txfm_context =
@@ -455,16 +454,17 @@ static void set_segment_id_supertx(const AV1_COMP *const cpi,
 #if CONFIG_DUAL_FILTER
 static void reset_intmv_filter_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                                     MB_MODE_INFO *mbmi) {
-  int dir;
-  for (dir = 0; dir < 2; ++dir) {
-    if (!has_subpel_mv_component(xd->mi[0], xd, dir) &&
-        (mbmi->ref_frame[1] == NONE_FRAME ||
-         !has_subpel_mv_component(xd->mi[0], xd, dir + 2)))
-      mbmi->interp_filter[dir] = (cm->interp_filter == SWITCHABLE)
-                                     ? EIGHTTAP_REGULAR
-                                     : cm->interp_filter;
-    mbmi->interp_filter[dir + 2] = mbmi->interp_filter[dir];
+  InterpFilter filters[2];
+  InterpFilter default_filter = av1_unswitchable_filter(cm->interp_filter);
+
+  for (int dir = 0; dir < 2; ++dir) {
+    filters[dir] = ((!has_subpel_mv_component(xd->mi[0], xd, dir) &&
+                     (mbmi->ref_frame[1] == NONE_FRAME ||
+                      !has_subpel_mv_component(xd->mi[0], xd, dir + 2)))
+                        ? default_filter
+                        : av1_extract_interp_filter(mbmi->interp_filters, dir));
   }
+  mbmi->interp_filters = av1_make_interp_filters(filters[0], filters[1]);
 }
 
 static void update_filter_type_count(FRAME_COUNTS *counts,
@@ -476,7 +476,11 @@ static void update_filter_type_count(FRAME_COUNTS *counts,
         (mbmi->ref_frame[1] > INTRA_FRAME &&
          has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-      ++counts->switchable_interp[ctx][mbmi->interp_filter[dir]];
+      InterpFilter filter =
+          av1_extract_interp_filter(mbmi->interp_filters, dir);
+      ++counts->switchable_interp[ctx][filter];
+      update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
+                 SWITCHABLE_FILTERS);
     }
   }
 }
@@ -485,11 +489,7 @@ static void update_filter_type_count(FRAME_COUNTS *counts,
 static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
                                       const MB_MODE_INFO *mbmi,
                                       RD_COUNTS *rdc) {
-  if (mode == ZEROMV
-#if CONFIG_EXT_INTER
-      || mode == ZERO_ZEROMV
-#endif
-      ) {
+  if (mode == ZEROMV || mode == ZERO_ZEROMV) {
     const int num_4x4s =
         num_4x4_blocks_wide_lookup[bsize] * num_4x4_blocks_high_lookup[bsize];
     int ref;
@@ -521,7 +521,6 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   CANDIDATE_MV *const curr_ref_mv_stack = mbmi_ext->ref_mv_stack[rf_type];
 
-#if CONFIG_EXT_INTER
   if (has_second_ref(mbmi)) {
     // Special case: NEAR_NEWMV and NEW_NEARMV modes use 1 + mbmi->ref_mv_idx
     // (like NEARMV) instead
@@ -557,7 +556,6 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
     }
 #endif  // CONFIG_COMPOUND_SINGLEREF
   } else {
-#endif  // CONFIG_EXT_INTER
     if (mbmi->mode == NEWMV) {
       int i;
       for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
@@ -569,9 +567,7 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv,
         mi_pred_mv[i] = this_mv;
       }
     }
-#if CONFIG_EXT_INTER
   }
-#endif  // CONFIG_EXT_INTER
 }
 
 static void update_state(const AV1_COMP *const cpi, ThreadData *td,
@@ -590,11 +586,6 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
   const struct segmentation *const seg = &cm->seg;
   const int bw = mi_size_wide[mi->mbmi.sb_type];
   const int bh = mi_size_high[mi->mbmi.sb_type];
-  const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
-  const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
-  MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
-  int w, h;
-
   const int mis = cm->mi_stride;
   const int mi_width = mi_size_wide[bsize];
   const int mi_height = mi_size_high[bsize];
@@ -649,9 +640,10 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
 #endif  // CONFIG_LV_MAP
   }
-#if CONFIG_PALETTE
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  xd->mrc_mask = ctx->mrc_mask;
+#endif  // CONFIG_MRC_TX
   // Restore the coding context of the MB to that that was in place
   // when the mode was picked for it
   for (y = 0; y < mi_height; y++)
@@ -661,7 +653,7 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
         xd->mi[x_idx + y * mis] = mi_addr;
       }
 
-#if CONFIG_DELTA_Q && !CONFIG_EXT_DELTA_Q
+#if !CONFIG_EXT_DELTA_Q
   if (cpi->oxcf.aq_mode > NO_AQ && cpi->oxcf.aq_mode < DELTA_AQ)
     av1_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
 #else
@@ -699,13 +691,11 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
         THR_D153_PRED /*D153_PRED*/,
         THR_D207_PRED /*D207_PRED*/,
         THR_D63_PRED /*D63_PRED*/,
-#if CONFIG_ALT_INTRA
         THR_SMOOTH, /*SMOOTH_PRED*/
 #if CONFIG_SMOOTH_HV
         THR_SMOOTH_V, /*SMOOTH_V_PRED*/
         THR_SMOOTH_H, /*SMOOTH_H_PRED*/
 #endif                // CONFIG_SMOOTH_HV
-#endif                // CONFIG_ALT_INTRA
         THR_TM /*TM_PRED*/,
       };
       ++mode_chosen_counts[kf_mode_index[mbmi->mode]];
@@ -747,7 +737,9 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
         update_filter_type_count(td->counts, xd, mbmi);
 #else
         const int switchable_ctx = av1_get_pred_context_switchable_interp(xd);
-        ++td->counts->switchable_interp[switchable_ctx][mbmi->interp_filter];
+        const InterpFilter filter =
+            av1_extract_interp_filter(mbmi->interp_filters, 0);
+        ++td->counts->switchable_interp[switchable_ctx][filter];
 #endif
       }
     }
@@ -757,16 +749,9 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td,
     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
   }
 
-  for (h = 0; h < y_mis; ++h) {
-    MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
-    for (w = 0; w < x_mis; ++w) {
-      MV_REF *const mv = frame_mv + w;
-      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
-      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
-      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
-      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
-    }
-  }
+  const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
+  const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
+  av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
 }
 
 #if CONFIG_SUPERTX
@@ -788,12 +773,7 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
   const int mis = cm->mi_stride;
   const int mi_width = mi_size_wide[bsize];
   const int mi_height = mi_size_high[bsize];
-  const int x_mis = AOMMIN(mi_width, cm->mi_cols - mi_col);
-  const int y_mis = AOMMIN(mi_height, cm->mi_rows - mi_row);
   const int unify_bsize = CONFIG_CB4X4;
-  MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
-  int w, h;
-
   int8_t rf_type;
 
   *mi_addr = *mi;
@@ -915,16 +895,9 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
   }
 
-  for (h = 0; h < y_mis; ++h) {
-    MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
-    for (w = 0; w < x_mis; ++w) {
-      MV_REF *const mv = frame_mv + w;
-      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
-      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
-      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
-      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
-    }
-  }
+  const int x_mis = AOMMIN(mi_width, cm->mi_cols - mi_col);
+  const int y_mis = AOMMIN(mi_height, cm->mi_rows - mi_row);
+  av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
 }
 
 static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
@@ -1005,6 +978,9 @@ static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
       pmc = &pc_tree->split_supertx;
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       set_offsets_supertx(cpi, td, tile, mi_row, mi_col, bsize2);
       update_state_supertx(cpi, td, &pc_tree->horizontala[0], mi_row, mi_col,
@@ -1138,6 +1114,9 @@ static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td,
       }
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       for (i = 0; i < 3; i++)
         update_supertx_param(td, &pc_tree->horizontala[i], best_tx,
@@ -1162,7 +1141,7 @@ static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td,
 }
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
+#if CONFIG_MOTION_VAR && NC_MODE_INFO
 static void set_mode_info_b(const AV1_COMP *const cpi,
                             const TileInfo *const tile, ThreadData *td,
                             int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -1229,6 +1208,9 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td,
       }
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error NC_MODE_INFO+MOTION_VAR not yet supported for new HORZ/VERT_AB partitions
+#endif
     case PARTITION_HORZ_A:
       set_mode_info_b(cpi, tile, td, mi_row, mi_col, bsize2,
                       &pc_tree->horizontala[0]);
@@ -1283,7 +1265,60 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td,
     default: assert(0 && "Invalid partition type."); break;
   }
 }
-#endif
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+static void av1_get_ncobmc_mode_rd(const AV1_COMP *const cpi,
+                                   MACROBLOCK *const x, MACROBLOCKD *const xd,
+                                   int bsize, const int mi_row,
+                                   const int mi_col, NCOBMC_MODE *mode) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+
+  assert(bsize >= BLOCK_8X8);
+
+  reset_xd_boundary(xd, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
+                    cm->mi_cols);
+
+  // set up source buffers before calling the mode searching function
+  av1_setup_src_planes(x, cpi->source, mi_row, mi_col);
+
+  *mode = get_ncobmc_mode(cpi, x, xd, mi_row, mi_col, bsize);
+}
+static void get_ncobmc_intrpl_pred(const AV1_COMP *const cpi, ThreadData *td,
+                                   int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+  const int hbs = AOMMAX(mi_size_wide[bsize] / 2, mi_size_high[bsize] / 2);
+  const BLOCK_SIZE sqr_blk = bsize_2_sqr_bsize[bsize];
+
+  if (mi_width > mi_height) {
+    // horizontal partition
+    av1_get_ncobmc_mode_rd(cpi, x, xd, sqr_blk, mi_row, mi_col,
+                           &mbmi->ncobmc_mode[0]);
+    xd->mi += hbs;
+    av1_get_ncobmc_mode_rd(cpi, x, xd, sqr_blk, mi_row, mi_col + hbs,
+                           &mbmi->ncobmc_mode[1]);
+  } else if (mi_height > mi_width) {
+    // vertical partition
+    av1_get_ncobmc_mode_rd(cpi, x, xd, sqr_blk, mi_row, mi_col,
+                           &mbmi->ncobmc_mode[0]);
+    xd->mi += hbs * xd->mi_stride;
+    av1_get_ncobmc_mode_rd(cpi, x, xd, sqr_blk, mi_row + hbs, mi_col,
+                           &mbmi->ncobmc_mode[1]);
+  } else {
+    av1_get_ncobmc_mode_rd(cpi, x, xd, sqr_blk, mi_row, mi_col,
+                           &mbmi->ncobmc_mode[0]);
+  }
+  // restore the info
+  av1_setup_src_planes(x, cpi->source, mi_row, mi_col);
+  set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+}
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
 
 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col) {
@@ -1384,10 +1419,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
   mbmi->mi_row = mi_row;
   mbmi->mi_col = mi_col;
 #endif
-#if CONFIG_CFL
-  // Don't store luma during RDO. Only store luma when best luma is known
-  x->cfl_store_y = 0;
-#endif
 #if CONFIG_SUPERTX
   // We set tx_size here as skip blocks would otherwise not set it.
   // tx_size needs to be set at this point as supertx_enable in
@@ -1413,9 +1444,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
 #endif
   }
 
-#if CONFIG_PALETTE
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  xd->mrc_mask = ctx->mrc_mask;
+#endif  // CONFIG_MRC_TX
 
   ctx->skippable = 0;
 
@@ -1491,6 +1523,9 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
   if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
       (bsize >= BLOCK_16X16) &&
       (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
+#if CONFIG_EXT_REFS
+       cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
     av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
   }
@@ -1542,23 +1577,19 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
                          int supertx_enabled
 #endif
                          ) {
-#if CONFIG_DELTA_Q
   MACROBLOCK *x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-#else
-  const MACROBLOCK *x = &td->mb;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-#endif
   const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const BLOCK_SIZE bsize = mbmi->sb_type;
+  FRAME_CONTEXT *fc = xd->tile_ctx;
 
-#if CONFIG_DELTA_Q
   // delta quant applies to both intra and inter
-  const int super_block_upper_left = ((mi_row & 7) == 0) && ((mi_col & 7) == 0);
+  int super_block_upper_left =
+      ((mi_row & MAX_MIB_MASK) == 0) && ((mi_col & MAX_MIB_MASK) == 0);
 
-  if (cm->delta_q_present_flag && (bsize != BLOCK_64X64 || !mbmi->skip) &&
+  if (cm->delta_q_present_flag && (bsize != cm->sb_size || !mbmi->skip) &&
       super_block_upper_left) {
     const int dq = (mbmi->current_q_index - xd->prev_qindex) / cm->delta_q_res;
     const int absdq = abs(dq);
@@ -1569,6 +1600,35 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
     if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
     xd->prev_qindex = mbmi->current_q_index;
 #if CONFIG_EXT_DELTA_Q
+#if CONFIG_LOOPFILTER_LEVEL
+    if (cm->delta_lf_present_flag) {
+      if (cm->delta_lf_multi) {
+        for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+          const int delta_lf =
+              (mbmi->curr_delta_lf[lf_id] - xd->prev_delta_lf[lf_id]) /
+              cm->delta_lf_res;
+          const int abs_delta_lf = abs(delta_lf);
+          for (i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
+            td->counts->delta_lf_multi[lf_id][i][1]++;
+          }
+          if (abs_delta_lf < DELTA_LF_SMALL)
+            td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
+          xd->prev_delta_lf[lf_id] = mbmi->curr_delta_lf[lf_id];
+        }
+      } else {
+        const int delta_lf =
+            (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
+            cm->delta_lf_res;
+        const int abs_delta_lf = abs(delta_lf);
+        for (i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
+          td->counts->delta_lf[i][1]++;
+        }
+        if (abs_delta_lf < DELTA_LF_SMALL)
+          td->counts->delta_lf[abs_delta_lf][0]++;
+        xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
+      }
+    }
+#else
     if (cm->delta_lf_present_flag) {
       const int dlf =
           (mbmi->current_delta_lf_from_base - xd->prev_delta_lf_from_base) /
@@ -1580,12 +1640,9 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
       if (absdlf < DELTA_LF_SMALL) td->counts->delta_lf[absdlf][0]++;
       xd->prev_delta_lf_from_base = mbmi->current_delta_lf_from_base;
     }
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
   }
-#else
-  (void)mi_row;
-  (void)mi_col;
-#endif
   if (!frame_is_intra_only(cm)) {
     FRAME_COUNTS *const counts = td->counts;
     RD_COUNTS *rdc = &td->rd_counts;
@@ -1597,6 +1654,10 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
       if (!supertx_enabled)
 #endif
         counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
+#if CONFIG_NEW_MULTISYMBOL
+      update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
+                 inter_block, 2);
+#endif
       // If the segment reference feature is enabled we have only a single
       // reference frame allowed for the segment so exclude it from
       // the reference frame counts used to work out probabilities.
@@ -1613,14 +1674,14 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
           else
             // This flag is also updated for 4x4 blocks
             rdc->single_ref_used_flag = 1;
-#if !SUB8X8_COMP_REF
-          if (mbmi->sb_type != BLOCK_4X4)
+          if (is_comp_ref_allowed(mbmi->sb_type)) {
             counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
                               [has_second_ref(mbmi)]++;
-#else
-          counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
-                            [has_second_ref(mbmi)]++;
-#endif
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(av1_get_reference_mode_cdf(cm, xd), has_second_ref(mbmi),
+                       2);
+#endif  // CONFIG_NEW_MULTISYMBOL
+          }
         }
 
         if (has_second_ref(mbmi)) {
@@ -1664,6 +1725,9 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
 
             counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0]
                                [ref1 == ALTREF_FRAME]++;
+            if (ref1 != ALTREF_FRAME)
+              counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(cm, xd)]
+                                 [1][ref1 == ALTREF2_FRAME]++;
 #else   // !CONFIG_EXT_REFS
           counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0]
                           [ref0 == GOLDEN_FRAME]++;
@@ -1673,12 +1737,16 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
 #endif  // CONFIG_EXT_COMP_REFS
         } else {
 #if CONFIG_EXT_REFS
-          const int bit = (ref0 == ALTREF_FRAME || ref0 == BWDREF_FRAME);
+          const int bit = (ref0 >= BWDREF_FRAME);
 
           counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
           if (bit) {
+            assert(ref0 <= ALTREF_FRAME);
             counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
-                              [ref0 != BWDREF_FRAME]++;
+                              [ref0 == ALTREF_FRAME]++;
+            if (ref0 != ALTREF_FRAME)
+              counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
+                                [ref0 == ALTREF2_FRAME]++;
           } else {
             const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
             counts
@@ -1701,7 +1769,6 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
 #endif  // CONFIG_EXT_REFS
         }
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
         if (!has_second_ref(mbmi))
           counts->comp_inter_mode[av1_get_inter_mode_context(xd)]
@@ -1717,31 +1784,32 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
           const int bsize_group = size_group_lookup[bsize];
           if (mbmi->ref_frame[1] == INTRA_FRAME) {
             counts->interintra[bsize_group][1]++;
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
+#endif
             counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
-            if (is_interintra_wedge_used(bsize))
+            update_cdf(fc->interintra_mode_cdf[bsize_group],
+                       mbmi->interintra_mode, INTERINTRA_MODES);
+            if (is_interintra_wedge_used(bsize)) {
               counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
+#if CONFIG_NEW_MULTISYMBOL
+              update_cdf(fc->wedge_interintra_cdf[bsize],
+                         mbmi->use_wedge_interintra, 2);
+#endif
+            }
           } else {
             counts->interintra[bsize_group][0]++;
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
+#endif
           }
         }
 #endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #if CONFIG_WARPED_MOTION
         set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
 #endif
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-        const MOTION_MODE motion_allowed =
-            motion_mode_allowed_wrapper(0,
-#if CONFIG_GLOBAL_MOTION
-                                        0, xd->global_motion,
-#endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_WARPED_MOTION
-                                        xd,
-#endif
-                                        mi);
-#else
         const MOTION_MODE motion_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
             0, xd->global_motion,
@@ -1750,23 +1818,41 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
             xd,
 #endif
             mi);
-#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #if CONFIG_SUPERTX
         if (!supertx_enabled)
 #endif  // CONFIG_SUPERTX
-#if CONFIG_EXT_INTER
           if (mbmi->ref_frame[1] != INTRA_FRAME)
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
           {
-            if (motion_allowed == WARPED_CAUSAL)
+            if (motion_allowed == WARPED_CAUSAL) {
               counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
-            else if (motion_allowed == OBMC_CAUSAL)
+              update_cdf(fc->motion_mode_cdf[mbmi->sb_type], mbmi->motion_mode,
+                         MOTION_MODES);
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+            } else if (motion_allowed == NCOBMC_ADAPT_WEIGHT) {
+              counts->ncobmc[mbmi->sb_type][mbmi->motion_mode]++;
+              update_cdf(fc->ncobmc_cdf[mbmi->sb_type], mbmi->motion_mode,
+                         OBMC_FAMILY_MODES);
+            } else if (motion_allowed == OBMC_CAUSAL) {
+              counts->obmc[mbmi->sb_type][mbmi->motion_mode == OBMC_CAUSAL]++;
+              update_cdf(fc->obmc_cdf[mbmi->sb_type], mbmi->motion_mode, 2);
+            }
+#else
+            } else if (motion_allowed == OBMC_CAUSAL) {
               counts->obmc[mbmi->sb_type][mbmi->motion_mode == OBMC_CAUSAL]++;
+#if CONFIG_NEW_MULTISYMBOL
+              update_cdf(fc->obmc_cdf[mbmi->sb_type],
+                         mbmi->motion_mode == OBMC_CAUSAL, 2);
+#endif
+            }
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
           }
 #else
-        if (motion_allowed > SIMPLE_TRANSLATION)
-          counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
+          if (motion_allowed > SIMPLE_TRANSLATION) {
+            counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
+            update_cdf(fc->motion_mode_cdf[mbmi->sb_type], mbmi->motion_mode,
+                       MOTION_MODES);
+          }
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
@@ -1774,15 +1860,18 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
           ADAPT_OVERLAP_BLOCK ao_block =
               adapt_overlap_block_lookup[mbmi->sb_type];
           ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[0]];
+          update_cdf(fc->ncobmc_mode_cdf[ao_block], mbmi->ncobmc_mode[0],
+                     MAX_NCOBMC_MODES);
           if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) {
             ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[1]];
+            update_cdf(fc->ncobmc_mode_cdf[ao_block], mbmi->ncobmc_mode[1],
+                       MAX_NCOBMC_MODES);
           }
         }
 #endif
 
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
-#if CONFIG_EXT_INTER
         if (
 #if CONFIG_COMPOUND_SINGLEREF
             is_inter_anyref_comp_mode(mbmi->mode)
@@ -1794,9 +1883,19 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
             && mbmi->motion_mode == SIMPLE_TRANSLATION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
             ) {
-          counts->compound_interinter[bsize][mbmi->interinter_compound_type]++;
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
+#endif
+            counts
+                ->compound_interinter[bsize][mbmi->interinter_compound_type]++;
+            update_cdf(fc->compound_type_cdf[bsize],
+                       mbmi->interinter_compound_type, COMPOUND_TYPES);
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          }
+#endif
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
         }
-#endif  // CONFIG_EXT_INTER
       }
     }
 
@@ -1804,10 +1903,11 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       int16_t mode_ctx;
       const PREDICTION_MODE mode = mbmi->mode;
-#if CONFIG_EXT_INTER
       if (has_second_ref(mbmi)) {
         mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
         ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+        update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
+                   INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
 #if CONFIG_COMPOUND_SINGLEREF
       } else if (is_inter_singleref_comp_mode(mode)) {
         mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
@@ -1815,24 +1915,17 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
                                            [INTER_SINGLEREF_COMP_OFFSET(mode)];
 #endif  // CONFIG_COMPOUND_SINGLEREF
       } else {
-#endif  // CONFIG_EXT_INTER
         mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
                                              mbmi->ref_frame, bsize, -1);
         update_inter_mode_stats(counts, mode, mode_ctx);
-#if CONFIG_EXT_INTER
       }
-#endif  // CONFIG_EXT_INTER
 
-#if CONFIG_EXT_INTER
+      int mode_allowed = (mbmi->mode == NEWMV);
+      mode_allowed |= (mbmi->mode == NEW_NEWMV);
 #if CONFIG_COMPOUND_SINGLEREF
-      if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV ||
-          mbmi->mode == SR_NEW_NEWMV) {
-#else   // !CONFIG_COMPOUND_SINGLEREF
-      if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
+      mode_allowed |= (mbmi->mode == SR_NEW_NEWMV);
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-      if (mbmi->mode == NEWMV) {
-#endif  // CONFIG_EXT_INTER
+      if (mode_allowed) {
         uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
         int idx;
 
@@ -1847,11 +1940,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
         }
       }
 
-#if CONFIG_EXT_INTER
       if (have_nearmv_in_inter_mode(mbmi->mode)) {
-#else
-      if (mbmi->mode == NEARMV) {
-#endif
         uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
         int idx;
 
@@ -1868,7 +1957,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
     }
 #if CONFIG_INTRABC
   } else {
-    if (cm->allow_screen_content_tools && bsize >= BLOCK_8X8) {
+    if (av1_allow_intrabc(bsize, cm)) {
       FRAME_COUNTS *const counts = td->counts;
       ++counts->intrabc[mbmi->use_intrabc];
     } else {
@@ -1992,7 +2081,8 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
 #endif
                      PICK_MODE_CONTEXT *ctx, int *rate) {
   MACROBLOCK *const x = &td->mb;
-#if (CONFIG_MOTION_VAR && CONFIG_NCOBMC) | CONFIG_EXT_DELTA_Q
+#if (CONFIG_MOTION_VAR && CONFIG_NCOBMC) | CONFIG_EXT_DELTA_Q | \
+    CONFIG_NCOBMC_ADAPT_WEIGHT
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi;
 #if CONFIG_MOTION_VAR && CONFIG_NCOBMC
@@ -2005,11 +2095,14 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
   x->e_mbd.mi[0]->mbmi.partition = partition;
 #endif
   update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
   mbmi = &xd->mi[0]->mbmi;
 #if CONFIG_WARPED_MOTION
   set_ref_ptrs(&cpi->common, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
 #endif
+#endif
+
+#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
   const MOTION_MODE motion_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
       0, xd->global_motion,
@@ -2018,6 +2111,9 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
       xd,
 #endif
       xd->mi[0]);
+#endif  // CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT)
+
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
   check_ncobmc = is_inter_block(mbmi) && motion_allowed >= OBMC_CAUSAL;
   if (!dry_run && check_ncobmc) {
     av1_check_ncobmc_rd(cpi, x, mi_row, mi_col);
@@ -2025,13 +2121,38 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
                          get_frame_new_buffer(&cpi->common), mi_row, mi_col);
   }
 #endif
+
+#if CONFIG_LV_MAP
+  av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
+#endif
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  if (dry_run == OUTPUT_ENABLED && !frame_is_intra_only(&cpi->common)) {
+    if (motion_allowed >= NCOBMC_ADAPT_WEIGHT && is_inter_block(mbmi)) {
+      get_ncobmc_intrpl_pred(cpi, td, mi_row, mi_col, bsize);
+      av1_check_ncobmc_adapt_weight_rd(cpi, x, mi_row, mi_col);
+    }
+    av1_setup_dst_planes(x->e_mbd.plane, bsize,
+                         get_frame_new_buffer(&cpi->common), mi_row, mi_col);
+  }
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
+
   encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, rate);
 
+#if CONFIG_LV_MAP
+  if (dry_run == 0)
+    x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
+#endif
+
   if (!dry_run) {
 #if CONFIG_EXT_DELTA_Q
     mbmi = &xd->mi[0]->mbmi;
-    if (bsize == BLOCK_64X64 && mbmi->skip == 1 && is_inter_block(mbmi) &&
+    if (bsize == cpi->common.sb_size && mbmi->skip == 1 &&
         cpi->common.delta_lf_present_flag) {
+#if CONFIG_LOOPFILTER_LEVEL
+      for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+        mbmi->curr_delta_lf[lf_id] = xd->prev_delta_lf[lf_id];
+#endif  // CONFIG_LOOPFILTER_LEVEL
       mbmi->current_delta_lf_from_base = xd->prev_delta_lf_from_base;
     }
 #endif
@@ -2051,6 +2172,9 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int hbs = mi_size_wide[bsize] / 2;
+#if CONFIG_EXT_PARTITION_TYPES && CONFIG_EXT_PARTITION_TYPES_AB
+  const int qbs = mi_size_wide[bsize] / 4;
+#endif
   const int is_partition_root = bsize >= BLOCK_8X8;
   const int ctx = is_partition_root
                       ? partition_plane_context(xd, mi_row, mi_col,
@@ -2063,9 +2187,11 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
   const PARTITION_TYPE partition = pc_tree->partitioning;
   const BLOCK_SIZE subsize = get_subsize(bsize, partition);
 #if CONFIG_EXT_PARTITION_TYPES
-  const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
   int quarter_step = mi_size_wide[bsize] / 4;
   int i;
+#if !CONFIG_EXT_PARTITION_TYPES_AB
+  BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
 #endif
 
 #if CONFIG_CB4X4
@@ -2077,11 +2203,6 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
 
-#if CONFIG_SPEED_REFS
-  // First scanning pass of an SB is dry run only.
-  if (cpi->sb_scanning_pass_idx == 0) assert(dry_run == DRY_RUN_NORMAL);
-#endif  // CONFIG_SPEED_REFS
-
   if (!dry_run && ctx >= 0) td->counts->partition[ctx][partition]++;
 
 #if CONFIG_SUPERTX
@@ -2138,6 +2259,7 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
         td->counts->supertx[partition_supertx_context_lookup[partition]]
                            [supertx_size][1]++;
         td->counts->supertx_size[supertx_size]++;
+#if CONFIG_ENTROPY_STATS
 #if CONFIG_EXT_TX
         if (get_ext_tx_types(supertx_size, bsize, 1, cm->reduced_tx_set_used) >
                 1 &&
@@ -2154,6 +2276,7 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
           ++td->counts->inter_ext_tx[supertx_size][xd->mi[0]->mbmi.tx_type];
         }
 #endif  // CONFIG_EXT_TX
+#endif  // CONFIG_ENTROPY_STATS
       }
 #if CONFIG_EXT_PARTITION_TYPES
       update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize,
@@ -2230,7 +2353,53 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
                   subsize, pc_tree->split[3], rate);
       }
       break;
+
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+    case PARTITION_HORZ_A:
+      encode_b(cpi, tile, td, tp, mi_row, mi_col, dry_run,
+               get_subsize(bsize, PARTITION_HORZ_4), partition,
+               &pc_tree->horizontala[0], rate);
+      encode_b(cpi, tile, td, tp, mi_row + qbs, mi_col, dry_run,
+               get_subsize(bsize, PARTITION_HORZ_4), partition,
+               &pc_tree->horizontala[1], rate);
+      encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
+               partition, &pc_tree->horizontala[2], rate);
+      break;
+    case PARTITION_HORZ_B:
+      encode_b(cpi, tile, td, tp, mi_row, mi_col, dry_run, subsize, partition,
+               &pc_tree->horizontalb[0], rate);
+      encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, dry_run,
+               get_subsize(bsize, PARTITION_HORZ_4), partition,
+               &pc_tree->horizontalb[1], rate);
+      if (mi_row + 3 * qbs < cm->mi_rows)
+        encode_b(cpi, tile, td, tp, mi_row + 3 * qbs, mi_col, dry_run,
+                 get_subsize(bsize, PARTITION_HORZ_4), partition,
+                 &pc_tree->horizontalb[2], rate);
+      break;
+    case PARTITION_VERT_A:
+      encode_b(cpi, tile, td, tp, mi_row, mi_col, dry_run,
+               get_subsize(bsize, PARTITION_VERT_4), partition,
+               &pc_tree->verticala[0], rate);
+      encode_b(cpi, tile, td, tp, mi_row, mi_col + qbs, dry_run,
+               get_subsize(bsize, PARTITION_VERT_4), partition,
+               &pc_tree->verticala[1], rate);
+      encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
+               partition, &pc_tree->verticala[2], rate);
+
+      break;
+    case PARTITION_VERT_B:
+      encode_b(cpi, tile, td, tp, mi_row, mi_col, dry_run, subsize, partition,
+               &pc_tree->verticalb[0], rate);
+      encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, dry_run,
+               get_subsize(bsize, PARTITION_VERT_4), partition,
+               &pc_tree->verticalb[1], rate);
+      if (mi_col + 3 * qbs < cm->mi_cols)
+        encode_b(cpi, tile, td, tp, mi_row, mi_col + 3 * qbs, dry_run,
+                 get_subsize(bsize, PARTITION_VERT_4), partition,
+                 &pc_tree->verticalb[2], rate);
+      break;
+#else
     case PARTITION_HORZ_A:
       encode_b(cpi, tile, td, tp, mi_row, mi_col, dry_run, bsize2, partition,
                &pc_tree->horizontala[0], rate);
@@ -2264,6 +2433,7 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
       encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, dry_run, bsize2,
                partition, &pc_tree->verticalb[2], rate);
       break;
+#endif
     case PARTITION_HORZ_4:
       for (i = 0; i < 4; ++i) {
         int this_mi_row = mi_row + i * quarter_step;
@@ -2468,10 +2638,10 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
                        bsize, ctx_none, INT64_MAX);
 
       if (none_rdc.rate < INT_MAX) {
-        none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+        none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
 #if CONFIG_SUPERTX
-        none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+        none_rate_nocoef += x->partition_cost[pl][PARTITION_NONE];
 #endif
       }
 
@@ -2647,11 +2817,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
   }
 
   if (last_part_rdc.rate < INT_MAX) {
-    last_part_rdc.rate += cpi->partition_cost[pl][partition];
+    last_part_rdc.rate += x->partition_cost[pl][partition];
     last_part_rdc.rdcost =
         RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
 #if CONFIG_SUPERTX
-    last_part_rate_nocoef += cpi->partition_cost[pl][partition];
+    last_part_rate_nocoef += x->partition_cost[pl][partition];
 #endif
   }
 
@@ -2726,16 +2896,16 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
         encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx,
                   OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
 
-      chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+      chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
 #if CONFIG_SUPERTX
-      chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT];
+      chosen_rate_nocoef += x->partition_cost[pl][PARTITION_SPLIT];
 #endif
     }
     if (chosen_rdc.rate < INT_MAX) {
-      chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+      chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
       chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
 #if CONFIG_SUPERTX
-      chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+      chosen_rate_nocoef += x->partition_cost[pl][PARTITION_NONE];
 #endif
     }
   }
@@ -2803,8 +2973,11 @@ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = {
 #if CONFIG_EXT_PARTITION
   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
-  BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,      //   4x16,   16x4,    8x32
-  BLOCK_8X8                                 //   32x8
+  BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,    //   4x16,   16x4,    8x32
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,  //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_16X16, BLOCK_16X16                // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
@@ -2820,7 +2993,10 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
   BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
   BLOCK_16X16,   BLOCK_16X16,   BLOCK_32X32,    //   4x16,   16x4,    8x32
-  BLOCK_32X32                                   //   32x8
+  BLOCK_32X32,   BLOCK_LARGEST, BLOCK_LARGEST,  //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_LARGEST, BLOCK_LARGEST                  // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 // Next square block size less or equal than current block size.
@@ -2837,7 +3013,10 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = {
   BLOCK_64X64, BLOCK_64X64, BLOCK_128X128,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,      //   4x16,   16x4,    8x32
-  BLOCK_8X8                                 //   32x8
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,    //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_32X32, BLOCK_32X32                  // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 /* clang-format on */
 
@@ -2953,7 +3132,7 @@ static void set_partition_range(const AV1_COMMON *const cm,
 
   const int idx_str = cm->mi_stride * mi_row + mi_col;
   MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str];
-  BLOCK_SIZE min_size = BLOCK_64X64;  // default values
+  BLOCK_SIZE min_size = cm->sb_size;  // default values
   BLOCK_SIZE max_size = BLOCK_4X4;
 
   if (prev_mi) {
@@ -3004,66 +3183,24 @@ static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
 
 #if CONFIG_FP_MB_STATS
 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
-  0,
-  10,
-  10,
-  30,
-  40,
-  40,
-  60,
-  80,
-  80,
-  90,
-  100,
-  100,
-  120,
+  0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  130,
-  130,
-  150
+  130, 130, 150
 #endif  // CONFIG_EXT_PARTITION
 };
 const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
-  0,
-  3,
-  3,
-  7,
-  15,
-  15,
-  30,
-  40,
-  40,
-  60,
-  80,
-  80,
-  120,
+  0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  160,
-  160,
-  240
+  160, 160, 240
 #endif  // CONFIG_EXT_PARTITION
 };
 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
-  1,
-  1,
-  1,
-  1,
-  1,
-  1,
-  1,
-  1,
-  1,
-  1,
-  4,
-  4,
-  6,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6,
 #if CONFIG_EXT_PARTITION
   // TODO(debargha): What are the correct numbers here?
-  8,
-  8,
-  10
+  8, 8, 10
 #endif  // CONFIG_EXT_PARTITION
 };
 
@@ -3101,6 +3238,78 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
 #endif
 
 #if CONFIG_EXT_PARTITION_TYPES
+// Try searching for an encoding for the given subblock. Returns zero if the
+// rdcost is already too high (to tell the caller not to bother searching for
+// encodings of further subblocks)
+static int rd_try_subblock(const AV1_COMP *const cpi, ThreadData *td,
+                           TileDataEnc *tile_data, TOKENEXTRA **tp,
+                           int is_first, int is_last, int mi_row, int mi_col,
+                           BLOCK_SIZE subsize, RD_STATS *best_rdc,
+                           RD_STATS *sum_rdc, RD_STATS *this_rdc,
+#if CONFIG_SUPERTX
+                           int64_t best_rd, int *sum_rate_nocoef,
+                           int *this_rate_nocoef, int *abort_flag,
+#endif
+                           PARTITION_TYPE partition,
+                           PICK_MODE_CONTEXT *prev_ctx,
+                           PICK_MODE_CONTEXT *this_ctx) {
+#if CONFIG_SUPERTX
+#define RTS_X_RATE_NOCOEF_ARG ((is_first) ? sum_rate_nocoef : this_rate_nocoef),
+#define RTS_MAX_RDCOST INT64_MAX
+#else
+#define RTS_X_RATE_NOCOEF_ARG
+#define RTS_MAX_RDCOST best_rdc->rdcost
+#endif
+
+  MACROBLOCK *const x = &td->mb;
+
+  if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
+
+  // On the first time around, write the rd stats straight to sum_rdc. Also, we
+  // should treat sum_rdc as containing zeros (even if it doesn't) to avoid
+  // having to zero it at the start.
+  if (is_first) this_rdc = sum_rdc;
+  const int64_t spent_rdcost = is_first ? 0 : sum_rdc->rdcost;
+  const int64_t rdcost_remaining = best_rdc->rdcost - spent_rdcost;
+
+  rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
+                   RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
+                   rdcost_remaining);
+
+#if CONFIG_SUPERTX
+  if (is_first) *abort_flag = sum_rdc->rdcost >= best_rd;
+#endif
+
+  if (!is_first) {
+    if (this_rdc->rate == INT_MAX) {
+      sum_rdc->rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+      *sum_rate_nocoef = INT_MAX;
+#endif
+    } else {
+      sum_rdc->rate += this_rdc->rate;
+      sum_rdc->dist += this_rdc->dist;
+      sum_rdc->rdcost += this_rdc->rdcost;
+#if CONFIG_SUPERTX
+      *sum_rate_nocoef += *this_rate_nocoef;
+#endif
+    }
+  }
+
+  if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
+
+  if (!is_last) {
+    update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1);
+    encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
+                      NULL);
+  }
+
+  return 1;
+
+#undef RTS_X_RATE_NOCOEF_ARG
+#undef RTS_MAX_RDCOST
+}
+
 static void rd_test_partition3(
     const AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
     TOKENEXTRA **tp, PC_TREE *pc_tree, RD_STATS *best_rdc,
@@ -3113,172 +3322,165 @@ static void rd_test_partition3(
     BLOCK_SIZE subsize1, int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  RD_STATS this_rdc, sum_rdc;
-#if CONFIG_SUPERTX
+  RD_STATS sum_rdc, this_rdc;
+#if CONFIG_UNPOISON_PARTITION_CTX
   const AV1_COMMON *const cm = &cpi->common;
+  const int hbs = mi_size_wide[bsize] / 2;
+  const int has_rows = mi_row + hbs < cm->mi_rows;
+  const int has_cols = mi_col + hbs < cm->mi_cols;
+#endif  // CONFIG_UNPOISON_PARTITION_CTX
+#if CONFIG_SUPERTX || CONFIG_EXT_PARTITION_TYPES_AB
+  const AV1_COMMON *const cm = &cpi->common;
+#endif
+#if CONFIG_SUPERTX
   TileInfo *const tile_info = &tile_data->tile_info;
-  int this_rate_nocoef, sum_rate_nocoef;
+  int sum_rate_nocoef, this_rate_nocoef;
   int abort_flag;
   const int supertx_allowed = !frame_is_intra_only(cm) &&
                               bsize <= MAX_SUPERTX_BLOCK_SIZE &&
                               !xd->lossless[0];
-#endif
-  if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
-
-  rd_pick_sb_modes(cpi, tile_data, x, mi_row0, mi_col0, &sum_rdc,
-#if CONFIG_SUPERTX
-                   &sum_rate_nocoef,
-#endif
-#if CONFIG_EXT_PARTITION_TYPES
-                   partition,
-#endif
-                   subsize0, &ctxs[0], best_rdc->rdcost);
-#if CONFIG_SUPERTX
-  abort_flag = sum_rdc.rdcost >= best_rd;
-#endif
 
-#if CONFIG_SUPERTX
-  if (sum_rdc.rdcost < INT64_MAX) {
+#define RTP_STX_TRY_ARGS \
+  best_rd, &sum_rate_nocoef, &this_rate_nocoef, &abort_flag,
 #else
-  if (sum_rdc.rdcost < best_rdc->rdcost) {
+#define RTP_STX_TRY_ARGS
 #endif
-    PICK_MODE_CONTEXT *ctx_0 = &ctxs[0];
-    update_state(cpi, td, ctx_0, mi_row0, mi_col0, subsize0, 1);
-    encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row0, mi_col0, subsize0,
-                      NULL);
 
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_0);
+  if (!rd_try_subblock(cpi, td, tile_data, tp, 1, 0, mi_row0, mi_col0, subsize0,
+                       best_rdc, &sum_rdc, &this_rdc,
+                       RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
+    return;
 
-#if CONFIG_SUPERTX
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
-                     &this_rate_nocoef,
-#if CONFIG_EXT_PARTITION_TYPES
-                     partition,
-#endif
-                     subsize1, &ctxs[1], INT64_MAX - sum_rdc.rdcost);
+  if (!rd_try_subblock(cpi, td, tile_data, tp, 0, 0, mi_row1, mi_col1, subsize1,
+                       best_rdc, &sum_rdc, &this_rdc,
+                       RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
+    return;
+
+// With the new layout of mixed partitions for PARTITION_HORZ_B and
+// PARTITION_VERT_B, the last subblock might start past halfway through the
+// main block, so we might signal it even though the subblock lies strictly
+// outside the image. In that case, we won't spend any bits coding it and the
+// difference (obviously) doesn't contribute to the error.
+#if CONFIG_EXT_PARTITION_TYPES_AB
+  const int try_block2 = mi_row2 < cm->mi_rows && mi_col2 < cm->mi_cols;
 #else
-    rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
-#if CONFIG_EXT_PARTITION_TYPES
-                     partition,
+  const int try_block2 = 1;
 #endif
-                     subsize1, &ctxs[1], best_rdc->rdcost - sum_rdc.rdcost);
-#endif  // CONFIG_SUPERTX
+  if (try_block2 &&
+      !rd_try_subblock(cpi, td, tile_data, tp, 0, 1, mi_row2, mi_col2, subsize2,
+                       best_rdc, &sum_rdc, &this_rdc,
+                       RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
+    return;
 
-    if (this_rdc.rate == INT_MAX) {
-      sum_rdc.rdcost = INT64_MAX;
-#if CONFIG_SUPERTX
-      sum_rate_nocoef = INT_MAX;
-#endif
-    } else {
-      sum_rdc.rate += this_rdc.rate;
-      sum_rdc.dist += this_rdc.dist;
-      sum_rdc.rdcost += this_rdc.rdcost;
 #if CONFIG_SUPERTX
-      sum_rate_nocoef += this_rate_nocoef;
-#endif
-    }
+  if (supertx_allowed && !abort_flag && sum_rdc.rdcost < INT64_MAX) {
+    TX_SIZE supertx_size = max_txsize_lookup[bsize];
+    const PARTITION_TYPE best_partition = pc_tree->partitioning;
+    pc_tree->partitioning = partition;
+    sum_rdc.rate += av1_cost_bit(
+        cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+                            [supertx_size],
+        0);
+    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
 
-#if CONFIG_SUPERTX
-    if (sum_rdc.rdcost < INT64_MAX) {
-#else
-    if (sum_rdc.rdcost < best_rdc->rdcost) {
-#endif
-      PICK_MODE_CONTEXT *ctx_1 = &ctxs[1];
-      update_state(cpi, td, ctx_1, mi_row1, mi_col1, subsize1, 1);
-      encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row1, mi_col1, subsize1,
-                        NULL);
+    if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+      TX_TYPE best_tx = DCT_DCT;
+      RD_STATS tmp_rdc = { sum_rate_nocoef, 0, 0 };
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_1);
+      restore_context(x, x_ctx, mi_row, mi_col, bsize);
 
-#if CONFIG_SUPERTX
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
-                       &this_rate_nocoef,
-#if CONFIG_EXT_PARTITION_TYPES
-                       partition,
-#endif
-                       subsize2, &ctxs[2], INT64_MAX - sum_rdc.rdcost);
-#else
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
-#if CONFIG_EXT_PARTITION_TYPES
-                       partition,
+      rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, &tmp_rdc.rate,
+                    &tmp_rdc.dist, &best_tx, pc_tree);
+
+      tmp_rdc.rate += av1_cost_bit(
+          cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+                              [supertx_size],
+          1);
+      tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
+      if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+        sum_rdc = tmp_rdc;
+        update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+                                supertx_size, pc_tree);
+      }
+    }
+
+    pc_tree->partitioning = best_partition;
+  }
 #endif
-                       subsize2, &ctxs[2], best_rdc->rdcost - sum_rdc.rdcost);
-#endif  // CONFIG_SUPERTX
 
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-#if CONFIG_SUPERTX
-        sum_rate_nocoef = INT_MAX;
+  if (sum_rdc.rdcost >= best_rdc->rdcost) return;
+
+  int pl = partition_plane_context(xd, mi_row, mi_col,
+#if CONFIG_UNPOISON_PARTITION_CTX
+                                   has_rows, has_cols,
 #endif
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
+                                   bsize);
+  sum_rdc.rate += x->partition_cost[pl][partition];
+  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
 #if CONFIG_SUPERTX
-        sum_rate_nocoef += this_rate_nocoef;
+  sum_rate_nocoef += x->partition_cost[pl][partition];
 #endif
-      }
+
+  if (sum_rdc.rdcost >= best_rdc->rdcost) return;
 
 #if CONFIG_SUPERTX
-      if (supertx_allowed && !abort_flag && sum_rdc.rdcost < INT64_MAX) {
-        TX_SIZE supertx_size = max_txsize_lookup[bsize];
-        const PARTITION_TYPE best_partition = pc_tree->partitioning;
-        pc_tree->partitioning = partition;
-        sum_rdc.rate += av1_cost_bit(
-            cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
-                                [supertx_size],
-            0);
-        sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
+  *best_rate_nocoef = sum_rate_nocoef;
+  assert(*best_rate_nocoef >= 0);
+#endif
+  *best_rdc = sum_rdc;
+  pc_tree->partitioning = partition;
 
-        if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
-          TX_TYPE best_tx = DCT_DCT;
-          RD_STATS tmp_rdc = { sum_rate_nocoef, 0, 0 };
+#undef RTP_STX_TRY_ARGS
+}
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
-          restore_context(x, x_ctx, mi_row, mi_col, bsize);
+#if CONFIG_DIST_8X8 && CONFIG_CB4X4
+static int64_t dist_8x8_yuv(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                            uint8_t *y_src_8x8) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int64_t dist_8x8, dist_8x8_uv, total_dist;
+  const int src_stride = x->plane[0].src.stride;
+  uint8_t *decoded_8x8;
+  int plane;
 
-          rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
-                        &tmp_rdc.rate, &tmp_rdc.dist, &best_tx, pc_tree);
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
+  else
+#endif
+    decoded_8x8 = (uint8_t *)x->decoded_8x8;
 
-          tmp_rdc.rate += av1_cost_bit(
-              cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
-                                  [supertx_size],
-              1);
-          tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
-          if (tmp_rdc.rdcost < sum_rdc.rdcost) {
-            sum_rdc = tmp_rdc;
-            update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
-                                    supertx_size, pc_tree);
-          }
-        }
+  dist_8x8 = av1_dist_8x8(cpi, x, y_src_8x8, src_stride, decoded_8x8, 8,
+                          BLOCK_8X8, 8, 8, 8, 8, x->qindex)
+             << 4;
 
-        pc_tree->partitioning = best_partition;
-      }
-#endif  // CONFIG_SUPERTX
+  // Compute chroma distortion for a luma 8x8 block
+  dist_8x8_uv = 0;
 
-      if (sum_rdc.rdcost < best_rdc->rdcost) {
-        int pl = partition_plane_context(xd, mi_row, mi_col,
-#if CONFIG_UNPOISON_PARTITION_CTX
-                                         has_rows, has_cols,
-#endif
-                                         bsize);
-        sum_rdc.rate += cpi->partition_cost[pl][partition];
-        sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
-#if CONFIG_SUPERTX
-        sum_rate_nocoef += cpi->partition_cost[pl][partition];
-#endif
-        if (sum_rdc.rdcost < best_rdc->rdcost) {
-#if CONFIG_SUPERTX
-          *best_rate_nocoef = sum_rate_nocoef;
-          assert(*best_rate_nocoef >= 0);
+  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+    const int src_stride_uv = x->plane[plane].src.stride;
+    const int dst_stride_uv = xd->plane[plane].dst.stride;
+    // uv buff pointers now (i.e. the last sub8x8 block) is the same
+    // to those at the first sub8x8 block because
+    // uv buff pointer is set only once at first sub8x8 block in a 8x8.
+    uint8_t *src_uv = x->plane[plane].src.buf;
+    uint8_t *dst_uv = xd->plane[plane].dst.buf;
+    unsigned sse;
+#if CONFIG_CHROMA_SUB8X8
+    const BLOCK_SIZE plane_bsize =
+        AOMMAX(BLOCK_4X4, get_plane_block_size(BLOCK_8X8, &xd->plane[plane]));
+#else
+    const BLOCK_SIZE plane_bsize =
+        get_plane_block_size(BLOCK_8X8, &xd->plane[plane]);
 #endif
-          *best_rdc = sum_rdc;
-          pc_tree->partitioning = partition;
-        }
-      }
-    }
+    cpi->fn_ptr[plane_bsize].vf(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
+                                &sse);
+    dist_8x8_uv += (int64_t)sse << 4;
   }
+
+  return total_dist = dist_8x8 + dist_8x8_uv;
 }
-#endif  // CONFIG_EXT_PARTITION_TYPES
+#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
@@ -3327,7 +3529,8 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
                                          bsize);
 #endif  // CONFIG_CB4X4
-  const int *partition_cost = cpi->partition_cost[pl];
+  const int *partition_cost =
+      pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
 #if CONFIG_SUPERTX
   int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX;
   int abort_flag;
@@ -3337,7 +3540,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // CONFIG_SUPERTX
 
   int do_rectangular_split = 1;
-#if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EXT_PARTITION_TYPES_AB
   BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
 #endif
 
@@ -3458,9 +3661,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
                                                   mi_col, bsize);
   }
-#endif
 
-#if CONFIG_FP_MB_STATS
   // Decide whether we shall split directly and skip searching NONE by using
   // the first pass block statistics
   if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split &&
@@ -3511,17 +3712,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
   }
 #endif
 
-#if CONFIG_SPEED_REFS
-  if (cpi->sb_scanning_pass_idx == 0) {
-    // NOTE: For the 1st pass of scanning, check all the subblocks of equal size
-    //       only.
-    partition_none_allowed = (bsize == MIN_SPEED_REFS_BLKSIZE);
-    partition_horz_allowed = 0;
-    partition_vert_allowed = 0;
-    do_square_split = (bsize > MIN_SPEED_REFS_BLKSIZE);
-  }
-#endif  // CONFIG_SPEED_REFS
-
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
@@ -3534,10 +3724,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
                      bsize, ctx_none, best_rdc.rdcost);
     if (this_rdc.rate != INT_MAX) {
       if (bsize_at_least_8x8) {
-        this_rdc.rate += partition_cost[PARTITION_NONE];
+        const int pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
+                                ? partition_cost[PARTITION_NONE]
+                                : 0;
+        this_rdc.rate += pt_cost;
         this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
 #if CONFIG_SUPERTX
-        this_rate_nocoef += partition_cost[PARTITION_NONE];
+        this_rate_nocoef += pt_cost;
 #endif
       }
 
@@ -3622,11 +3815,22 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #else
     restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
 #endif
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+    if (!x->skip_chroma_rd) {
+      cfl_clear_sub8x8_val(xd->cfl);
+    }
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
   }
 
   // store estimated motion vector
   if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
 
+#if CONFIG_SUPERTX
+  int64_t temp_best_rdcost = INT64_MAX;
+#else
+  int64_t temp_best_rdcost = best_rdc.rdcost;
+#endif
+
   // PARTITION_SPLIT
   // TODO(jingning): use the motion vectors given by the above search as
   // the starting point of motion search in the following partition type check.
@@ -3634,29 +3838,18 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     int reached_last_index = 0;
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     if (bsize == BLOCK_8X8 && !unify_bsize) {
-#if CONFIG_DUAL_FILTER
       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
         pc_tree->leaf_split[0]->pred_interp_filter =
-            ctx_none->mic.mbmi.interp_filter[0];
-#else
-      if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
-        pc_tree->leaf_split[0]->pred_interp_filter =
-            ctx_none->mic.mbmi.interp_filter;
-#endif
-#if CONFIG_SUPERTX
+            av1_extract_interp_filter(ctx_none->mic.mbmi.interp_filters, 0);
+
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_SUPERTX
                        &sum_rate_nocoef,
-#if CONFIG_EXT_PARTITION_TYPES
-                       PARTITION_SPLIT,
 #endif
-                       subsize, pc_tree->leaf_split[0], INT64_MAX);
-#else
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
 #if CONFIG_EXT_PARTITION_TYPES
                        PARTITION_SPLIT,
 #endif
-                       subsize, pc_tree->leaf_split[0], best_rdc.rdcost);
-#endif  // CONFIG_SUPERTX
+                       subsize, pc_tree->leaf_split[0], temp_best_rdcost);
       if (sum_rdc.rate == INT_MAX) {
         sum_rdc.rdcost = INT64_MAX;
 #if CONFIG_SUPERTX
@@ -3705,11 +3898,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
       reached_last_index = 1;
     } else {
       int idx;
-#if CONFIG_SUPERTX
-      for (idx = 0; idx < 4 && sum_rdc.rdcost < INT64_MAX; ++idx) {
-#else
-      for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
-#endif  // CONFIG_SUPERTX
+      for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
         const int x_idx = (idx & 1) * mi_step;
         const int y_idx = (idx >> 1) * mi_step;
 
@@ -3719,21 +3908,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
         if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
         pc_tree->split[idx]->index = idx;
-#if CONFIG_SUPERTX
         rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
-                          mi_col + x_idx, subsize, &this_rdc, &this_rate_nocoef,
-                          INT64_MAX - sum_rdc.rdcost, pc_tree->split[idx]);
-#else
-        rd_pick_partition(
-            cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
-            &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
-#endif  // CONFIG_SUPERTX
-
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
-          assert(this_rdc.dist_y < INT64_MAX);
-        }
+                          mi_col + x_idx, subsize, &this_rdc,
+#if CONFIG_SUPERTX
+                          &this_rate_nocoef,
 #endif
+                          temp_best_rdcost - sum_rdc.rdcost,
+                          pc_tree->split[idx]);
+
         if (this_rdc.rate == INT_MAX) {
           sum_rdc.rdcost = INT64_MAX;
 #if CONFIG_SUPERTX
@@ -3747,37 +3929,18 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_SUPERTX
           sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-          if (bsize == BLOCK_8X8) {
-            assert(this_rdc.dist_y < INT64_MAX);
-            sum_rdc.dist_y += this_rdc.dist_y;
-          }
-#endif
         }
       }
       reached_last_index = (idx == 4);
 
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
-          bsize == BLOCK_8X8) {
-        int64_t dist_8x8;
+      if (x->using_dist_8x8 && reached_last_index &&
+          sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
         const int src_stride = x->plane[0].src.stride;
-        uint8_t *decoded_8x8;
-
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-          decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
-        else
-#endif
-          decoded_8x8 = (uint8_t *)x->decoded_8x8;
-
+        int64_t dist_8x8;
         dist_8x8 =
-            av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
-                         src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
-                         x->qindex)
-            << 4;
-        assert(sum_rdc.dist_y < INT64_MAX);
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+            dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride - 4);
+        sum_rdc.dist = dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
 #endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
@@ -3823,6 +3986,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // CONFIG_SUPERTX
     }
 
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+    if (!reached_last_index && sum_rdc.rdcost >= best_rdc.rdcost)
+      cfl_clear_sub8x8_val(xd->cfl);
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+
     if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
       sum_rdc.rate += partition_cost[PARTITION_SPLIT];
       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -3835,6 +4003,8 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_SUPERTX
         best_rate_nocoef = sum_rate_nocoef;
         assert(best_rate_nocoef >= 0);
+#else
+        temp_best_rdcost = best_rdc.rdcost;
 #endif  // CONFIG_SUPERTX
         pc_tree->partitioning = PARTITION_SPLIT;
       }
@@ -3855,17 +4025,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
       (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
-#if CONFIG_DUAL_FILTER
-    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-        partition_none_allowed)
-      pc_tree->horizontal[0].pred_interp_filter =
-          ctx_none->mic.mbmi.interp_filter[0];
-#else
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
       pc_tree->horizontal[0].pred_interp_filter =
-          ctx_none->mic.mbmi.interp_filter;
-#endif
+          av1_extract_interp_filter(ctx_none->mic.mbmi.interp_filters, 0);
+
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
 #if CONFIG_SUPERTX
                      &sum_rate_nocoef,
@@ -3879,11 +4043,9 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     abort_flag =
         (sum_rdc.rdcost >= best_rd && (bsize > BLOCK_8X8 || unify_bsize)) ||
         (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
-    if (sum_rdc.rdcost < INT64_MAX &&
-#else
-    if (sum_rdc.rdcost < best_rdc.rdcost &&
-#endif  // CONFIG_SUPERTX
-        !force_horz_split && (bsize > BLOCK_8X8 || unify_bsize)) {
+#endif
+    if (sum_rdc.rdcost < temp_best_rdcost && !force_horz_split &&
+        (bsize > BLOCK_8X8 || unify_bsize)) {
       PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
       update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
       encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
@@ -3891,17 +4053,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 
       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
 
-#if CONFIG_DUAL_FILTER
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed)
-        pc_tree->horizontal[1].pred_interp_filter =
-            ctx_h->mic.mbmi.interp_filter[0];
-#else
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         pc_tree->horizontal[1].pred_interp_filter =
-            ctx_none->mic.mbmi.interp_filter;
-#endif
+            av1_extract_interp_filter(ctx_h->mic.mbmi.interp_filters, 0);
+
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
                        &this_rate_nocoef,
@@ -3919,7 +4075,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // CONFIG_SUPERTX
 
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+      if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
         update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
                      subsize, DRY_RUN_NORMAL);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
@@ -3939,28 +4095,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_SUPERTX
         sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        sum_rdc.dist_y += this_rdc.dist_y;
-#endif
       }
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
-        int64_t dist_8x8;
+      if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
+          bsize == BLOCK_8X8) {
         const int src_stride = x->plane[0].src.stride;
-        uint8_t *decoded_8x8;
-
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-          decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
-        else
-#endif
-          decoded_8x8 = (uint8_t *)x->decoded_8x8;
-
-        dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
-                                src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
-                                8, x->qindex)
-                   << 4;
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+        int64_t dist_8x8;
+        dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride);
+        sum_rdc.dist = dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
 #endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
@@ -4007,6 +4149,9 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     }
 #endif  // CONFIG_SUPERTX
 
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+    cfl_clear_sub8x8_val(xd->cfl);
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
     if (sum_rdc.rdcost < best_rdc.rdcost) {
       sum_rdc.rate += partition_cost[PARTITION_HORZ];
       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -4036,17 +4181,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 
     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
-#if CONFIG_DUAL_FILTER
-    if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-        partition_none_allowed)
-      pc_tree->vertical[0].pred_interp_filter =
-          ctx_none->mic.mbmi.interp_filter[0];
-#else
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
       pc_tree->vertical[0].pred_interp_filter =
-          ctx_none->mic.mbmi.interp_filter;
-#endif
+          av1_extract_interp_filter(ctx_none->mic.mbmi.interp_filters, 0);
+
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
 #if CONFIG_SUPERTX
                      &sum_rate_nocoef,
@@ -4059,28 +4198,23 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     abort_flag =
         (sum_rdc.rdcost >= best_rd && (bsize > BLOCK_8X8 || unify_bsize)) ||
         (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
-    if (sum_rdc.rdcost < INT64_MAX &&
+    const int64_t vert_max_rdcost = INT64_MAX;
 #else
-    if (sum_rdc.rdcost < best_rdc.rdcost &&
+    const int64_t vert_max_rdcost = best_rdc.rdcost;
 #endif  // CONFIG_SUPERTX
-        !force_vert_split && (bsize > BLOCK_8X8 || unify_bsize)) {
+    if (sum_rdc.rdcost < vert_max_rdcost && !force_vert_split &&
+        (bsize > BLOCK_8X8 || unify_bsize)) {
       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
       encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
                         NULL);
 
       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
-#if CONFIG_DUAL_FILTER
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         pc_tree->vertical[1].pred_interp_filter =
-            ctx_none->mic.mbmi.interp_filter[0];
-#else
-      if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
-          partition_none_allowed)
-        pc_tree->vertical[1].pred_interp_filter =
-            ctx_none->mic.mbmi.interp_filter;
-#endif
+            av1_extract_interp_filter(ctx_none->mic.mbmi.interp_filters, 0);
+
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
                        &this_rate_nocoef,
@@ -4099,7 +4233,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // CONFIG_SUPERTX
 
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
+      if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
         update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
                      subsize, DRY_RUN_NORMAL);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
@@ -4119,28 +4253,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_SUPERTX
         sum_rate_nocoef += this_rate_nocoef;
 #endif  // CONFIG_SUPERTX
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        sum_rdc.dist_y += this_rdc.dist_y;
-#endif
       }
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
+      if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
+          bsize == BLOCK_8X8) {
         int64_t dist_8x8;
-        const int src_stride = x->plane[0].src.stride;
-        uint8_t *decoded_8x8;
-
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-          decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
-        else
-#endif
-          decoded_8x8 = (uint8_t *)x->decoded_8x8;
-
-        dist_8x8 =
-            av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
-                         decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
-            << 4;
-        sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
+        dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4);
+        sum_rdc.dist = dist_8x8;
         sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
       }
 #endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
@@ -4186,6 +4305,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
     }
 #endif  // CONFIG_SUPERTX
 
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+    cfl_clear_sub8x8_val(xd->cfl);
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+
     if (sum_rdc.rdcost < best_rdc.rdcost) {
       sum_rdc.rate += partition_cost[PARTITION_VERT];
       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -4209,9 +4332,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
   }
 
 #if CONFIG_EXT_PARTITION_TYPES
+  const int ext_partition_allowed =
+      do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
+
+#if CONFIG_EXT_PARTITION && CONFIG_EXT_PARTITION_TYPES_AB
+  // Don't allow A/B partitions on 128x128 blocks for now (support for
+  // 128x32 and 32x128 blocks doesn't yet exist).
+  const int ab_partition_allowed =
+      ext_partition_allowed && bsize < BLOCK_128X128;
+#else
+  const int ab_partition_allowed = ext_partition_allowed;
+#endif
+
   // PARTITION_HORZ_A
-  if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
-      partition_none_allowed) {
+  if (partition_horz_allowed && ab_partition_allowed) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+    rd_test_partition3(
+        cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontala,
+        ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_A,
+#if CONFIG_SUPERTX
+        best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+        mi_row, mi_col, get_subsize(bsize, PARTITION_HORZ_4),
+        mi_row + mi_step / 2, mi_col, get_subsize(bsize, PARTITION_HORZ_4),
+        mi_row + mi_step, mi_col, get_subsize(bsize, PARTITION_HORZ));
+#else
     subsize = get_subsize(bsize, PARTITION_HORZ_A);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
@@ -4221,11 +4366,26 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
                        mi_row, mi_col, bsize2, mi_row, mi_col + mi_step, bsize2,
                        mi_row + mi_step, mi_col, subsize);
+#endif
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif  // !CONFIG_PVQ
   }
   // PARTITION_HORZ_B
-  if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
-      partition_none_allowed) {
+  if (partition_horz_allowed && ab_partition_allowed) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+    rd_test_partition3(
+        cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontalb,
+        ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_B,
+#if CONFIG_SUPERTX
+        best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+        mi_row, mi_col, get_subsize(bsize, PARTITION_HORZ), mi_row + mi_step,
+        mi_col, get_subsize(bsize, PARTITION_HORZ_4), mi_row + 3 * mi_step / 2,
+        mi_col, get_subsize(bsize, PARTITION_HORZ_4));
+#else
     subsize = get_subsize(bsize, PARTITION_HORZ_B);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
@@ -4235,11 +4395,26 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
                        mi_row, mi_col, subsize, mi_row + mi_step, mi_col,
                        bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
+#endif
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif  // !CONFIG_PVQ
   }
   // PARTITION_VERT_A
-  if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
-      partition_none_allowed) {
+  if (partition_vert_allowed && ab_partition_allowed) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+    rd_test_partition3(
+        cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticala,
+        ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_A,
+#if CONFIG_SUPERTX
+        best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+        mi_row, mi_col, get_subsize(bsize, PARTITION_VERT_4), mi_row,
+        mi_col + mi_step / 2, get_subsize(bsize, PARTITION_VERT_4), mi_row,
+        mi_col + mi_step, get_subsize(bsize, PARTITION_VERT));
+#else
     subsize = get_subsize(bsize, PARTITION_VERT_A);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
@@ -4249,11 +4424,26 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
                        mi_row, mi_col, bsize2, mi_row + mi_step, mi_col, bsize2,
                        mi_row, mi_col + mi_step, subsize);
+#endif
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif  // !CONFIG_PVQ
   }
   // PARTITION_VERT_B
-  if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
-      partition_none_allowed) {
+  if (partition_vert_allowed && ab_partition_allowed) {
+#if CONFIG_EXT_PARTITION_TYPES_AB
+    rd_test_partition3(
+        cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticalb,
+        ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_B,
+#if CONFIG_SUPERTX
+        best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+        mi_row, mi_col, get_subsize(bsize, PARTITION_VERT), mi_row,
+        mi_col + mi_step, get_subsize(bsize, PARTITION_VERT_4), mi_row,
+        mi_col + 3 * mi_step / 2, get_subsize(bsize, PARTITION_VERT_4));
+#else
     subsize = get_subsize(bsize, PARTITION_VERT_B);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
                        pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
@@ -4263,52 +4453,47 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
                        mi_row, mi_col, subsize, mi_row, mi_col + mi_step,
                        bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
+#endif
+#if !CONFIG_PVQ
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+#else
+    restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize);
+#endif  // !CONFIG_PVQ
   }
 
+#if CONFIG_EXT_PARTITION
+  const int can_partition_4 = (bsize == BLOCK_128X128 || bsize == BLOCK_64X64 ||
+                               bsize == BLOCK_32X32 || bsize == BLOCK_16X16);
+#else
+  const int can_partition_4 =
+      (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16);
+#endif  // CONFIG_EXT_PARTITION
+
   // PARTITION_HORZ_4
   // TODO(david.barker): For this and PARTITION_VERT_4,
   // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
   //   chroma plane
   // * Add support for supertx
-  if (bsize == BLOCK_32X32 && partition_horz_allowed && !force_horz_split &&
+  if (can_partition_4 && partition_horz_allowed && !force_horz_split &&
       (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
-    int i;
     const int quarter_step = mi_size_high[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
 
     subsize = get_subsize(bsize, PARTITION_HORZ_4);
-    av1_zero(sum_rdc);
 
-    for (i = 0; i < 4; ++i) {
+    for (int i = 0; i < 4; ++i) {
       int this_mi_row = mi_row + i * quarter_step;
 
       if (i > 0 && this_mi_row >= cm->mi_rows) break;
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
-
-      ctx_prev = &pc_tree->horizontal4[i];
+      PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
 
-      rd_pick_sb_modes(cpi, tile_data, x, this_mi_row, mi_col, &this_rdc,
-                       PARTITION_HORZ_4, subsize, ctx_prev,
-                       best_rdc.rdcost - sum_rdc.rdcost);
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
+      if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3),
+                           this_mi_row, mi_col, subsize, &best_rdc, &sum_rdc,
+                           &this_rdc, PARTITION_HORZ_4, ctx_prev, ctx_this))
         break;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-      }
 
-      if (sum_rdc.rdcost >= best_rdc.rdcost) break;
-
-      if (i < 3) {
-        update_state(cpi, td, ctx_prev, this_mi_row, mi_col, subsize, 1);
-        encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, this_mi_row, mi_col,
-                          subsize, NULL);
-      }
+      ctx_prev = ctx_this;
     }
 
     if (sum_rdc.rdcost < best_rdc.rdcost) {
@@ -4326,43 +4511,26 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
 #endif
   }
   // PARTITION_VERT_4
-  if (bsize == BLOCK_32X32 && partition_vert_allowed && !force_vert_split &&
+  if (can_partition_4 && partition_vert_allowed && !force_vert_split &&
       (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
-    int i;
     const int quarter_step = mi_size_wide[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
 
     subsize = get_subsize(bsize, PARTITION_VERT_4);
-    av1_zero(sum_rdc);
 
-    for (i = 0; i < 4; ++i) {
+    for (int i = 0; i < 4; ++i) {
       int this_mi_col = mi_col + i * quarter_step;
 
       if (i > 0 && this_mi_col >= cm->mi_cols) break;
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev);
+      PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
 
-      ctx_prev = &pc_tree->vertical4[i];
-
-      rd_pick_sb_modes(cpi, tile_data, x, mi_row, this_mi_col, &this_rdc,
-                       PARTITION_VERT_4, subsize, ctx_prev,
-                       best_rdc.rdcost - sum_rdc.rdcost);
-
-      if (this_rdc.rate == INT_MAX) {
-        sum_rdc.rdcost = INT64_MAX;
-      } else {
-        sum_rdc.rate += this_rdc.rate;
-        sum_rdc.dist += this_rdc.dist;
-        sum_rdc.rdcost += this_rdc.rdcost;
-      }
-
-      if (sum_rdc.rdcost >= best_rdc.rdcost) break;
+      if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3), mi_row,
+                           this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
+                           PARTITION_VERT_4, ctx_prev, ctx_this))
+        break;
 
-      if (i < 3) {
-        update_state(cpi, td, ctx_prev, mi_row, this_mi_col, subsize, 1);
-        encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, this_mi_col,
-                          subsize, NULL);
-      }
+      ctx_prev = ctx_this;
     }
 
     if (sum_rdc.rdcost < best_rdc.rdcost) {
@@ -4381,11 +4549,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
   }
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
-#if CONFIG_SPEED_REFS
-  // First scanning is done.
-  if (cpi->sb_scanning_pass_idx == 0 && bsize == cm->sb_size) return;
-#endif  // CONFIG_SPEED_REFS
-
   // TODO(jbb): This code added so that we avoid static analysis
   // warning related to the fact that best_rd isn't used after this
   // point.  This code should be refactored so that the duplicate
@@ -4393,25 +4556,24 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
   (void)best_rd;
   *rd_cost = best_rdc;
 
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
-    assert(rd_cost->dist_y < INT64_MAX);
-  }
-#endif  // CONFIG_DIST_8X8 && CONFIG_CB4X4
 #if CONFIG_SUPERTX
   *rate_nocoef = best_rate_nocoef;
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_CFL
-  // Store the luma for the best mode
-  x->cfl_store_y = 1;
-#endif
   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
       pc_tree->index != 3) {
     if (bsize == cm->sb_size) {
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC
+#if CONFIG_MOTION_VAR && NC_MODE_INFO
       set_mode_info_sb(cpi, td, tile_info, tp, mi_row, mi_col, bsize, pc_tree);
 #endif
+
+#if CONFIG_LV_MAP
+      x->cb_offset = 0;
+#endif
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+      set_sb_mi_boundaries(cm, xd, mi_row, mi_col);
+#endif
       encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
                 pc_tree, NULL);
     } else {
@@ -4419,13 +4581,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
                 pc_tree, NULL);
     }
   }
-#if CONFIG_CFL
-  x->cfl_store_y = 0;
-#endif
 
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
-      bsize == BLOCK_4X4 && pc_tree->index == 3) {
+  if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
+      best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
               pc_tree, NULL);
   }
@@ -4442,22 +4601,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
   }
 }
 
-#if CONFIG_SPEED_REFS
-static void restore_mi(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                       int mi_row, int mi_col) {
-  const AV1_COMMON *cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
-  int x_idx, y;
-  for (y = 0; y < mi_size_high[cm->sb_size]; y++)
-    for (x_idx = 0; x_idx < mi_size_wide[cm->sb_size]; x_idx++)
-      if (mi_col + x_idx < cm->mi_cols && mi_row + y < cm->mi_rows) {
-        memset(xd->mi + y * cm->mi_stride + x_idx, 0, sizeof(*xd->mi));
-        memset(x->mbmi_ext + y * cm->mi_cols + x_idx, 0, sizeof(*x->mbmi_ext));
-      }
-}
-#endif  // CONFIG_SPEED_REFS
-
 static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
                              TileDataEnc *tile_data, int mi_row,
                              TOKENEXTRA **tp) {
@@ -4476,14 +4619,18 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
   // Initialize the left context for the new SB row
   av1_zero_left_context(xd);
 
-#if CONFIG_DELTA_Q
   // Reset delta for every tile
   if (cm->delta_q_present_flag)
     if (mi_row == tile_info->mi_row_start) xd->prev_qindex = cm->base_qindex;
 #if CONFIG_EXT_DELTA_Q
-  if (cm->delta_lf_present_flag)
+  if (cm->delta_lf_present_flag) {
+#if CONFIG_LOOPFILTER_LEVEL
+    if (mi_row == tile_info->mi_row_start)
+      for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id)
+        xd->prev_delta_lf[lf_id] = 0;
+#endif  // CONFIG_LOOPFILTER_LEVEL
     if (mi_row == tile_info->mi_row_start) xd->prev_delta_lf_from_base = 0;
-#endif
+  }
 #endif
 
   // Code each SB in the row
@@ -4503,9 +4650,21 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
     PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
 
+#if CONFIG_LV_MAP && LV_MAP_PROB
+    av1_fill_coeff_costs(&td->mb, xd->tile_ctx);
+#else
+    av1_fill_token_costs_from_cdf(x->token_head_costs,
+                                  x->e_mbd.tile_ctx->coef_head_cdfs);
+    av1_fill_token_costs_from_cdf(x->token_tail_costs,
+                                  x->e_mbd.tile_ctx->coef_tail_cdfs);
+#endif
+    av1_fill_mode_rates(cm, x, xd->tile_ctx);
+
     if (sf->adaptive_pred_interp_filter) {
+#if !CONFIG_CB4X4
       for (i = 0; i < leaf_nodes; ++i)
         td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
+#endif
 
       for (i = 0; i < leaf_nodes; ++i) {
         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
@@ -4515,6 +4674,7 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
       }
     }
 
+    x->tx_rd_record.num = x->tx_rd_record.index_start = 0;
     av1_zero(x->pred_mv);
     pc_root->index = 0;
 
@@ -4524,8 +4684,10 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
       int segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
     }
+#if CONFIG_AMVR
+    xd->cur_frame_mv_precision_level = cm->cur_frame_mv_precision_level;
+#endif
 
-#if CONFIG_DELTA_Q
     if (cm->delta_q_present_flag) {
       // Test mode for delta quantization
       int sb_row = mi_row >> 3;
@@ -4545,7 +4707,7 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
       assert(current_qindex > 0);
 
       xd->delta_qindex = current_qindex - cm->base_qindex;
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
+      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
       xd->mi[0]->mbmi.current_q_index = current_qindex;
 #if !CONFIG_EXT_DELTA_Q
       xd->mi[0]->mbmi.segment_id = 0;
@@ -4564,13 +4726,19 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
         for (j = 0; j < AOMMIN(cm->mib_size, cm->mi_rows - mi_row); j++) {
           for (k = 0; k < AOMMIN(cm->mib_size, cm->mi_cols - mi_col); k++) {
             cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
-                .mbmi.current_delta_lf_from_base = current_delta_lf_from_base;
+                .mbmi.current_delta_lf_from_base =
+                clamp(current_delta_lf_from_base, 0, MAX_LOOP_FILTER);
+#if CONFIG_LOOPFILTER_LEVEL
+            for (int lf_id = 0; lf_id < FRAME_LF_COUNT; ++lf_id) {
+              cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
+                  .mbmi.curr_delta_lf[lf_id] = current_delta_lf_from_base;
+            }
+#endif  // CONFIG_LOOPFILTER_LEVEL
           }
         }
       }
 #endif  // CONFIG_EXT_DELTA_Q
     }
-#endif  // CONFIG_DELTA_Q
 
     x->source_variance = UINT_MAX;
     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
@@ -4602,35 +4770,12 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
                                 &x->min_partition_size, &x->max_partition_size);
       }
-#if CONFIG_SPEED_REFS
-      // NOTE: Two scanning passes for the current superblock - the first pass
-      //       is only targeted to collect stats.
-      int m_search_count_backup = *(x->m_search_count_ptr);
-      for (int sb_pass_idx = 0; sb_pass_idx < 2; ++sb_pass_idx) {
-        cpi->sb_scanning_pass_idx = sb_pass_idx;
-        if (frame_is_intra_only(cm) && sb_pass_idx == 0) continue;
-
-        rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
-                          &dummy_rdc,
-#if CONFIG_SUPERTX
-                          &dummy_rate_nocoef,
-#endif  // CONFIG_SUPERTX
-                          INT64_MAX, pc_root);
-        if (sb_pass_idx == 0) {
-          av1_zero(x->pred_mv);
-          pc_root->index = 0;
-          restore_mi(cpi, x, mi_row, mi_col);
-          *(x->m_search_count_ptr) = m_search_count_backup;
-        }
-      }
-#else  // !CONFIG_SPEED_REFS
       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
                         &dummy_rdc,
 #if CONFIG_SUPERTX
                         &dummy_rate_nocoef,
 #endif  // CONFIG_SUPERTX
                         INT64_MAX, pc_root);
-#endif  // CONFIG_SPEED_REFS
     }
   }
 }
@@ -4656,7 +4801,7 @@ static int check_dual_ref_flags(AV1_COMP *cpi) {
     return (!!(ref_flags & AOM_GOLD_FLAG) + !!(ref_flags & AOM_LAST_FLAG) +
 #if CONFIG_EXT_REFS
             !!(ref_flags & AOM_LAST2_FLAG) + !!(ref_flags & AOM_LAST3_FLAG) +
-            !!(ref_flags & AOM_BWD_FLAG) +
+            !!(ref_flags & AOM_BWD_FLAG) + !!(ref_flags & AOM_ALT2_FLAG) +
 #endif  // CONFIG_EXT_REFS
             !!(ref_flags & AOM_ALT_FLAG)) >= 2;
   }
@@ -4686,9 +4831,13 @@ static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
            cpi->rc.is_src_frame_ext_arf)
 #else
   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
-#endif
+#endif  // CONFIG_EXT_REFS
     return ALTREF_FRAME;
-  else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
+  else if (cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+           cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
+           cpi->refresh_alt_ref_frame)
     return GOLDEN_FRAME;
   else
     // TODO(zoeliu): To investigate whether a frame_type other than
@@ -4872,7 +5021,12 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
   td->mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context;
 #endif  // CONFIG_PVQ
 
-  av1_setup_across_tile_boundary_info(cm, tile_info);
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+  if (!cm->loop_filter_across_tiles_enabled)
+    av1_setup_across_tile_boundary_info(cm, tile_info);
+#endif
+
+  av1_crc_calculator_init(&td->mb.tx_rd_record.crc_calculator, 24, 0x5D6DCB);
 
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
        mi_row += cm->mib_size) {
@@ -4925,8 +5079,8 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
 
 #if CONFIG_GLOBAL_MOTION
 #define GLOBAL_TRANS_TYPES_ENC 3  // highest motion model to search
-static int gm_get_params_cost(WarpedMotionParams *gm,
-                              WarpedMotionParams *ref_gm, int allow_hp) {
+static int gm_get_params_cost(const WarpedMotionParams *gm,
+                              const WarpedMotionParams *ref_gm, int allow_hp) {
   assert(gm->wmtype < GLOBAL_TRANS_TYPES);
   int params_cost = 0;
   int trans_bits, trans_prec_diff;
@@ -5010,7 +5164,8 @@ static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
 }
 #endif  // CONFIG_GLOBAL_MOTION
 
-#if CONFIG_PALETTE
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
 // Estimate if the source frame is screen content, based on the portion of
 // blocks that have no more than 4 (experimentally selected) luma colors.
 static int is_screen_content(const uint8_t *src,
@@ -5038,7 +5193,7 @@ static int is_screen_content(const uint8_t *src,
   // The threshold is 10%.
   return counts * blk_h * blk_w * 10 > width * height;
 }
-#endif  // CONFIG_PALETTE
+#endif  // !CONFIG_PVQ
 
 static void encode_frame_internal(AV1_COMP *cpi) {
   ThreadData *const td = &cpi->td;
@@ -5057,18 +5212,21 @@ static void encode_frame_internal(AV1_COMP *cpi) {
 
   x->min_partition_size = AOMMIN(x->min_partition_size, cm->sb_size);
   x->max_partition_size = AOMMIN(x->max_partition_size, cm->sb_size);
+#if CONFIG_DIST_8X8
+  x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
+  x->tune_metric = cpi->oxcf.tuning;
+#endif
   cm->setup_mi(cm);
 
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
 
   av1_zero(*td->counts);
-  av1_zero(rdc->coef_counts);
   av1_zero(rdc->comp_pred_diff);
 
-#if CONFIG_PALETTE || CONFIG_INTRABC
   if (frame_is_intra_only(cm)) {
-#if CONFIG_PALETTE
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
     cm->allow_screen_content_tools =
         cpi->oxcf.content == AOM_CONTENT_SCREEN ||
         is_screen_content(cpi->source->y_buffer,
@@ -5078,10 +5236,80 @@ static void encode_frame_internal(AV1_COMP *cpi) {
                           cpi->source->y_stride, cpi->source->y_width,
                           cpi->source->y_height);
 #else
-    cm->allow_screen_content_tools = cpi->oxcf.content == AOM_CONTENT_SCREEN;
-#endif  // CONFIG_PALETTE
+    cm->allow_screen_content_tools = 0;
+#endif  // !CONFIG_PVQ
+  }
+
+#if CONFIG_HASH_ME
+  if (cpi->oxcf.pass != 1 && cpi->common.allow_screen_content_tools) {
+    // add to hash table
+    const int pic_width = cpi->source->y_crop_width;
+    const int pic_height = cpi->source->y_crop_height;
+    uint32_t *block_hash_values[2][2];
+    int8_t *is_block_same[2][3];
+    int k, j;
+
+    for (k = 0; k < 2; k++) {
+      for (j = 0; j < 2; j++) {
+        CHECK_MEM_ERROR(cm, block_hash_values[k][j],
+                        aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
+      }
+
+      for (j = 0; j < 3; j++) {
+        CHECK_MEM_ERROR(cm, is_block_same[k][j],
+                        aom_malloc(sizeof(int8_t) * pic_width * pic_height));
+      }
+    }
+
+    av1_hash_table_create(&cm->cur_frame->hash_table);
+    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
+                                      is_block_same[0]);
+    av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
+                                  block_hash_values[1], is_block_same[0],
+                                  is_block_same[1]);
+    av1_add_to_hash_map_by_row_with_precal_data(
+        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
+        pic_width, pic_height, 4);
+    av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
+                                  block_hash_values[0], is_block_same[1],
+                                  is_block_same[0]);
+    av1_add_to_hash_map_by_row_with_precal_data(
+        &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
+        pic_width, pic_height, 8);
+    av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
+                                  block_hash_values[1], is_block_same[0],
+                                  is_block_same[1]);
+    av1_add_to_hash_map_by_row_with_precal_data(
+        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
+        pic_width, pic_height, 16);
+    av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
+                                  block_hash_values[0], is_block_same[1],
+                                  is_block_same[0]);
+    av1_add_to_hash_map_by_row_with_precal_data(
+        &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
+        pic_width, pic_height, 32);
+    av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
+                                  block_hash_values[1], is_block_same[0],
+                                  is_block_same[1]);
+    av1_add_to_hash_map_by_row_with_precal_data(
+        &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
+        pic_width, pic_height, 64);
+
+    for (k = 0; k < 2; k++) {
+      for (j = 0; j < 2; j++) {
+        aom_free(block_hash_values[k][j]);
+      }
+
+      for (j = 0; j < 3; j++) {
+        aom_free(is_block_same[k][j]);
+      }
+    }
   }
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
+#endif
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  alloc_ncobmc_pred_buffer(xd);
+#endif
 
 #if CONFIG_GLOBAL_MOTION
   av1_zero(rdc->global_motion_used);
@@ -5102,6 +5330,10 @@ static void encode_frame_internal(AV1_COMP *cpi) {
     for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
       ref_buf[frame] = get_ref_frame_buffer(cpi, frame);
       int pframe;
+      cm->global_motion[frame] = default_warp_params;
+      const WarpedMotionParams *ref_params =
+          cm->error_resilient_mode ? &default_warp_params
+                                   : &cm->prev_frame->global_motion[frame];
       // check for duplicate buffer
       for (pframe = LAST_FRAME; pframe < frame; ++pframe) {
         if (ref_buf[frame] == ref_buf[pframe]) break;
@@ -5168,7 +5400,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
           }
           if (cm->global_motion[frame].wmtype <= AFFINE)
             if (!get_shear_params(&cm->global_motion[frame]))
-              set_default_warp_params(&cm->global_motion[frame]);
+              cm->global_motion[frame] = default_warp_params;
 
           if (cm->global_motion[frame].wmtype == TRANSLATION) {
             cm->global_motion[frame].wmmat[0] =
@@ -5185,10 +5417,9 @@ static void encode_frame_internal(AV1_COMP *cpi) {
           // this motion type, revert to IDENTITY.
           if (!is_enough_erroradvantage(
                   (double)best_warp_error / ref_frame_error,
-                  gm_get_params_cost(&cm->global_motion[frame],
-                                     &cm->prev_frame->global_motion[frame],
+                  gm_get_params_cost(&cm->global_motion[frame], ref_params,
                                      cm->allow_high_precision_mv))) {
-            set_default_warp_params(&cm->global_motion[frame]);
+            cm->global_motion[frame] = default_warp_params;
           }
           if (cm->global_motion[frame].wmtype != IDENTITY) break;
         }
@@ -5196,8 +5427,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
       }
       if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
       cpi->gmparams_cost[frame] =
-          gm_get_params_cost(&cm->global_motion[frame],
-                             &cm->prev_frame->global_motion[frame],
+          gm_get_params_cost(&cm->global_motion[frame], ref_params,
                              cm->allow_high_precision_mv) +
           cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
           cpi->gmtype_cost[IDENTITY];
@@ -5221,7 +5451,6 @@ static void encode_frame_internal(AV1_COMP *cpi) {
 
   cm->tx_mode = select_tx_mode(cpi);
 
-#if CONFIG_DELTA_Q
   // Fix delta q resolution for the moment
   cm->delta_q_res = DEFAULT_DELTA_Q_RES;
 // Set delta_q_present_flag before it is used for the first time
@@ -5234,7 +5463,6 @@ static void encode_frame_internal(AV1_COMP *cpi) {
   cm->delta_q_present_flag =
       cpi->oxcf.aq_mode == DELTA_AQ && cm->base_qindex > 0;
 #endif  // CONFIG_EXT_DELTA_Q
-#endif
 
   av1_frame_init_quantizer(cpi);
 
@@ -5262,19 +5490,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
 #endif  // CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
 
 #if CONFIG_TEMPMV_SIGNALING
-  if (cm->prev_frame) {
-    cm->use_prev_frame_mvs &=
-        !cm->error_resilient_mode &&
-#if CONFIG_FRAME_SUPERRES
-        cm->width == cm->last_width && cm->height == cm->last_height &&
-#else
-        cm->width == cm->prev_frame->buf.y_crop_width &&
-        cm->height == cm->prev_frame->buf.y_crop_height &&
-#endif  // CONFIG_FRAME_SUPERRES
-        !cm->intra_only && !cm->prev_frame->intra_only && cm->last_show_frame;
-  } else {
-    cm->use_prev_frame_mvs = 0;
-  }
+  cm->use_prev_frame_mvs &= frame_can_use_prev_frame_mvs(cm);
 #else
   if (cm->prev_frame) {
     cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
@@ -5301,6 +5517,10 @@ static void encode_frame_internal(AV1_COMP *cpi) {
   av1_zero(x->blk_skip_drl);
 #endif
 
+#if CONFIG_MFMV
+  av1_setup_motion_field(cm);
+#endif  // CONFIG_MFMV
+
   {
     struct aom_usec_timer emr_timer;
     aom_usec_timer_start(&emr_timer);
@@ -5326,6 +5546,9 @@ static void encode_frame_internal(AV1_COMP *cpi) {
     aom_usec_timer_mark(&emr_timer);
     cpi->time_encode_sb_row += aom_usec_timer_elapsed(&emr_timer);
   }
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  free_ncobmc_pred_buffer(xd);
+#endif
 
 #if 0
   // Keep record of the total distortion this time around for future use
@@ -5333,7 +5556,6 @@ static void encode_frame_internal(AV1_COMP *cpi) {
 #endif
 }
 
-#if CONFIG_EXT_INTER
 static void make_consistent_compound_tools(AV1_COMMON *cm) {
   (void)cm;
 #if CONFIG_INTERINTRA
@@ -5349,7 +5571,6 @@ static void make_consistent_compound_tools(AV1_COMMON *cm) {
     cm->allow_masked_compound = 0;
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
 }
-#endif  // CONFIG_EXT_INTER
 
 void av1_encode_frame(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
@@ -5358,6 +5579,32 @@ void av1_encode_frame(AV1_COMP *cpi) {
   // rather than the potential full set of 16 transforms
   cm->reduced_tx_set_used = 0;
 #endif  // CONFIG_EXT_TX
+#if CONFIG_ADAPT_SCAN
+  cm->use_adapt_scan = 1;
+  // TODO(angiebird): call av1_init_scan_order only when use_adapt_scan
+  // switches from 1 to 0
+  if (cm->use_adapt_scan == 0) av1_init_scan_order(cm);
+#endif
+
+#if CONFIG_FRAME_MARKER
+  if (cm->show_frame == 0) {
+    int arf_offset = AOMMIN(
+        (MAX_GF_INTERVAL - 1),
+        cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]);
+#if CONFIG_EXT_REFS
+    int brf_offset =
+        cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index];
+    arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset);
+#endif  // CONFIG_EXT_REFS
+    cm->frame_offset = cm->current_video_frame + arf_offset;
+  } else {
+    cm->frame_offset = cm->current_video_frame;
+  }
+  av1_setup_frame_buf_refs(cm);
+#if CONFIG_FRAME_SIGN_BIAS
+  av1_setup_frame_sign_bias(cm);
+#endif  // CONFIG_FRAME_SIGN_BIAS
+#endif  // CONFIG_FRAME_MARKER
 
   // In the longer term the encoder should be generalized to match the
   // decoder such that we allow compound where one of the 3 buffers has a
@@ -5366,14 +5613,14 @@ void av1_encode_frame(AV1_COMP *cpi) {
   // side behavior is where the ALT ref buffer has opposite sign bias to
   // the other two.
   if (!frame_is_intra_only(cm)) {
-#if !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
+#if !CONFIG_ONE_SIDED_COMPOUND
     if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
          cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
         (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
          cm->ref_frame_sign_bias[LAST_FRAME])) {
       cpi->allow_comp_inter_inter = 0;
     } else {
-#endif  // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
+#endif  // !CONFIG_ONE_SIDED_COMPOUND
       cpi->allow_comp_inter_inter = 1;
 #if CONFIG_EXT_REFS
       cm->comp_fwd_ref[0] = LAST_FRAME;
@@ -5381,16 +5628,16 @@ void av1_encode_frame(AV1_COMP *cpi) {
       cm->comp_fwd_ref[2] = LAST3_FRAME;
       cm->comp_fwd_ref[3] = GOLDEN_FRAME;
       cm->comp_bwd_ref[0] = BWDREF_FRAME;
-      cm->comp_bwd_ref[1] = ALTREF_FRAME;
-#else
+      cm->comp_bwd_ref[1] = ALTREF2_FRAME;
+      cm->comp_bwd_ref[2] = ALTREF_FRAME;
+#else                           // !CONFIG_EXT_REFS
     cm->comp_fixed_ref = ALTREF_FRAME;
     cm->comp_var_ref[0] = LAST_FRAME;
     cm->comp_var_ref[1] = GOLDEN_FRAME;
-#endif  // CONFIG_EXT_REFS
-#if !(CONFIG_ONE_SIDED_COMPOUND || \
-      CONFIG_EXT_COMP_REFS)  // Normative in encoder
+#endif                          // CONFIG_EXT_REFS
+#if !CONFIG_ONE_SIDED_COMPOUND  // Normative in encoder
     }
-#endif  // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS)
+#endif  // !CONFIG_ONE_SIDED_COMPOUND
   } else {
     cpi->allow_comp_inter_inter = 0;
   }
@@ -5444,9 +5691,7 @@ void av1_encode_frame(AV1_COMP *cpi) {
     cm->interp_filter = SWITCHABLE;
 #endif
 
-#if CONFIG_EXT_INTER
     make_consistent_compound_tools(cm);
-#endif  // CONFIG_EXT_INTER
 
     rdc->single_ref_used_flag = 0;
     rdc->compound_ref_used_flag = 0;
@@ -5469,9 +5714,7 @@ void av1_encode_frame(AV1_COMP *cpi) {
 #endif  // !CONFIG_REF_ADAPT
       }
     }
-#if CONFIG_EXT_INTER
     make_consistent_compound_tools(cm);
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_VAR_TX
 #if CONFIG_RECT_TX_EXT
@@ -5483,10 +5726,11 @@ void av1_encode_frame(AV1_COMP *cpi) {
       cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64;
 #else
 #if CONFIG_RECT_TX_EXT && CONFIG_EXT_TX
-    if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) {
+    if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0)
 #else
-    if (cm->tx_mode == TX_MODE_SELECT) {
+    if (cm->tx_mode == TX_MODE_SELECT)
 #endif
+    {
 #if CONFIG_TX64X64
       int count4x4 = 0;
       int count8x8_8x8p = 0, count8x8_lp = 0;
@@ -5653,9 +5897,7 @@ void av1_encode_frame(AV1_COMP *cpi) {
     }
 #endif
   } else {
-#if CONFIG_EXT_INTER
     make_consistent_compound_tools(cm);
-#endif  // CONFIG_EXT_INTER
     encode_frame_internal(cpi);
   }
 }
@@ -5664,21 +5906,15 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
                             const MODE_INFO *mi, const MODE_INFO *above_mi,
                             const MODE_INFO *left_mi, const int intraonly,
                             const int mi_row, const int mi_col) {
+  FRAME_CONTEXT *fc = xd->tile_ctx;
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
-#if CONFIG_ENTROPY_STATS
   const PREDICTION_MODE y_mode = mbmi->mode;
   const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
-#else   // CONFIG_ENTROPY_STATS
   (void)counts;
-  (void)above_mi;
-  (void)left_mi;
-  (void)intraonly;
-#endif  // CONFIG_ENTROPY_STATS
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int unify_bsize = CONFIG_CB4X4;
 
   if (bsize < BLOCK_8X8 && !unify_bsize) {
-#if CONFIG_ENTROPY_STATS
     int idx, idy;
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -5687,30 +5923,38 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
         const int bidx = idy * 2 + idx;
         const PREDICTION_MODE bmode = mi->bmi[bidx].as_mode;
         if (intraonly) {
+#if CONFIG_ENTROPY_STATS
           const PREDICTION_MODE a = av1_above_block_mode(mi, above_mi, bidx);
           const PREDICTION_MODE l = av1_left_block_mode(mi, left_mi, bidx);
           ++counts->kf_y_mode[a][l][bmode];
+#endif  // CONFIG_ENTROPY_STATS
+          update_cdf(get_y_mode_cdf(fc, mi, above_mi, left_mi, bidx), bmode,
+                     INTRA_MODES);
         } else {
+#if CONFIG_ENTROPY_STATS
           ++counts->y_mode[0][bmode];
+#endif  // CONFIG_ENTROPY_STATS
+          update_cdf(fc->y_mode_cdf[0], bmode, INTRA_MODES);
         }
       }
-#endif  // CONFIG_ENTROPY_STATS
   } else {
-#if CONFIG_ENTROPY_STATS
     if (intraonly) {
+#if CONFIG_ENTROPY_STATS
       const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0);
       const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0);
       ++counts->kf_y_mode[above][left][y_mode];
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(get_y_mode_cdf(fc, mi, above_mi, left_mi, 0), y_mode,
+                 INTRA_MODES);
     } else {
+#if CONFIG_ENTROPY_STATS
       ++counts->y_mode[size_group_lookup[bsize]][y_mode];
-    }
 #endif  // CONFIG_ENTROPY_STATS
+      update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
+    }
+
 #if CONFIG_FILTER_INTRA
-    if (mbmi->mode == DC_PRED
-#if CONFIG_PALETTE
-        && mbmi->palette_mode_info.palette_size[0] == 0
-#endif  // CONFIG_PALETTE
-        ) {
+    if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
       const int use_filter_intra_mode =
           mbmi->filter_intra_mode_info.use_filter_intra_mode[0];
       ++counts->filter_intra[0][use_filter_intra_mode];
@@ -5721,10 +5965,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
         is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
                             xd->plane[1].subsampling_y)
 #endif
-#if CONFIG_PALETTE
-        && mbmi->palette_mode_info.palette_size[1] == 0
-#endif  // CONFIG_PALETTE
-        ) {
+        && mbmi->palette_mode_info.palette_size[1] == 0) {
       const int use_filter_intra_mode =
           mbmi->filter_intra_mode_info.use_filter_intra_mode[1];
       ++counts->filter_intra[1][use_filter_intra_mode];
@@ -5753,6 +5994,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd,
 #if CONFIG_ENTROPY_STATS
   ++counts->uv_mode[y_mode][uv_mode];
 #endif  // CONFIG_ENTROPY_STATS
+  update_cdf(fc->uv_mode_cdf[y_mode], uv_mode, UV_INTRA_MODES);
 }
 
 #if CONFIG_VAR_TX
@@ -5770,13 +6012,26 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
   const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+  assert(tx_size > TX_4X4);
+
+  if (depth == MAX_VARTX_DEPTH) {
+// Don't add to counts in this case
+#if CONFIG_RECT_TX_EXT
+    if (tx_size == plane_tx_size)
+#endif
+      mbmi->tx_size = tx_size;
+    txfm_partition_update(xd->above_txfm_context + blk_col,
+                          xd->left_txfm_context + blk_row, tx_size, tx_size);
+    return;
+  }
 
 #if CONFIG_RECT_TX_EXT
   if (tx_size == plane_tx_size ||
-      mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) {
+      mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type])
 #else
-  if (tx_size == plane_tx_size) {
+  if (tx_size == plane_tx_size)
 #endif
+  {
     ++counts->txfm_partition[ctx][0];
 #if CONFIG_RECT_TX_EXT
     if (tx_size == plane_tx_size)
@@ -5792,7 +6047,7 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
     ++counts->txfm_partition[ctx][1];
     ++x->txb_split_count;
 
-    if (tx_size == TX_8X8) {
+    if (sub_txs == TX_4X4) {
       mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
       mbmi->tx_size = TX_4X4;
       txfm_partition_update(xd->above_txfm_context + blk_col,
@@ -5815,10 +6070,22 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
   MACROBLOCKD *xd = &x->e_mbd;
   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
   const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
-  TX_SIZE max_tx_size = get_vartx_max_txsize(&xd->mi[0]->mbmi, plane_bsize);
+  TX_SIZE max_tx_size = get_vartx_max_txsize(&xd->mi[0]->mbmi, plane_bsize, 0);
   const int bh = tx_size_high_unit[max_tx_size];
   const int bw = tx_size_wide_unit[max_tx_size];
   int idx, idy;
+  int init_depth =
+      (mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
+
+#if CONFIG_INTRABC
+  // Intrabc doesn't support var-tx yet. So no need to update tx partition
+  // info., except for the split count (otherwise common->tx_mode may be
+  // modified, causing mismatch).
+  if (is_intrabc_block(&x->e_mbd.mi[0]->mbmi)) {
+    if (x->e_mbd.mi[0]->mbmi.tx_size != max_tx_size) ++x->txb_split_count;
+    return;
+  }
+#endif  // CONFIG_INTRABC
 
   xd->above_txfm_context =
       cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
@@ -5827,8 +6094,7 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
 
   for (idy = 0; idy < mi_height; idy += bh)
     for (idx = 0; idx < mi_width; idx += bw)
-      update_txfm_count(x, xd, td_counts, max_tx_size, mi_width != mi_height,
-                        idy, idx);
+      update_txfm_count(x, xd, td_counts, max_tx_size, init_depth, idy, idx);
 }
 
 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
@@ -5874,7 +6140,7 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm,
                                       int mi_row, int mi_col) {
   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
   const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
-  TX_SIZE max_tx_size = get_vartx_max_txsize(&xd->mi[0]->mbmi, plane_bsize);
+  TX_SIZE max_tx_size = get_vartx_max_txsize(&xd->mi[0]->mbmi, plane_bsize, 0);
   const int bh = tx_size_high_unit[max_tx_size];
   const int bw = tx_size_wide_unit[max_tx_size];
   int idx, idy;
@@ -5898,6 +6164,10 @@ void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
                               FRAME_COUNTS *counts) {
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int is_inter = is_inter_block(mbmi);
+  FRAME_CONTEXT *fc = xd->tile_ctx;
+#if !CONFIG_ENTROPY_STATS
+  (void)counts;
+#endif  // !CONFIG_ENTROPY_STATS
 
 #if !CONFIG_TXK_SEL
   TX_TYPE tx_type = mbmi->tx_type;
@@ -5916,12 +6186,64 @@ void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
     const int eset =
         get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
     if (eset > 0) {
+#if !CONFIG_LGT_FROM_PRED
+      const TxSetType tx_set_type = get_ext_tx_set_type(
+          tx_size, bsize, is_inter, cm->reduced_tx_set_used);
       if (is_inter) {
+        update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
+                   av1_ext_tx_ind[tx_set_type][tx_type],
+                   av1_num_ext_tx_set[tx_set_type]);
+#if CONFIG_ENTROPY_STATS
         ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]][tx_type];
+#endif  // CONFIG_ENTROPY_STATS
       } else {
+#if CONFIG_ENTROPY_STATS
         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][mbmi->mode]
                               [tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+        update_cdf(
+            fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][mbmi->mode],
+            av1_ext_tx_ind[tx_set_type][tx_type],
+            av1_num_ext_tx_set[tx_set_type]);
+      }
+#else
+      (void)tx_type;
+      (void)fc;
+      if (is_inter) {
+        if (LGT_FROM_PRED_INTER) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used)
+            ++counts->inter_lgt[txsize_sqr_map[tx_size]][mbmi->use_lgt];
+#if CONFIG_ENTROPY_STATS
+          if (!mbmi->use_lgt)
+            ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]][tx_type];
+          else
+#endif  // CONFIG_ENTROPY_STATS
+            mbmi->tx_type = DCT_DCT;
+        } else {
+#if CONFIG_ENTROPY_STATS
+          ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]][tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+        }
+      } else {
+        if (LGT_FROM_PRED_INTRA) {
+          if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used)
+            ++counts->intra_lgt[txsize_sqr_map[tx_size]][mbmi->mode]
+                               [mbmi->use_lgt];
+#if CONFIG_ENTROPY_STATS
+          if (!mbmi->use_lgt)
+            ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][mbmi->mode]
+                                  [tx_type];
+          else
+#endif  // CONFIG_ENTROPY_STATS
+            mbmi->tx_type = DCT_DCT;
+        } else {
+#if CONFIG_ENTROPY_STATS
+          ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][mbmi->mode]
+                                [tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+        }
       }
+#endif  // CONFIG_LGT_FROM_PRED
     }
   }
 #else
@@ -5932,10 +6254,20 @@ void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd,
       !mbmi->skip &&
       !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     if (is_inter) {
+#if CONFIG_ENTROPY_STATS
       ++counts->inter_ext_tx[tx_size][tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(fc->inter_ext_tx_cdf[tx_size], av1_ext_tx_ind[tx_type],
+                 TX_TYPES);
     } else {
+#if CONFIG_ENTROPY_STATS
       ++counts->intra_ext_tx[tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                             [tx_type];
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(
+          fc->intra_ext_tx_cdf[tx_size]
+                              [intra_mode_to_tx_type_context[mbmi->mode]],
+          av1_ext_tx_ind[tx_type], TX_TYPES);
     }
   }
 #endif  // CONFIG_EXT_TX
@@ -5966,29 +6298,48 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
   x->pvq_speed = 0;
   x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0;
 #endif
-#if CONFIG_CFL
-  x->cfl_store_y = 1;
-#endif
 
   if (!is_inter) {
+#if CONFIG_CFL
+    xd->cfl->store_y = 1;
+#endif  // CONFIG_CFL
     int plane;
     mbmi->skip = 1;
     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
       av1_encode_intra_block_plane((AV1_COMMON *)cm, x, block_size, plane, 1,
                                    mi_row, mi_col);
     }
+#if CONFIG_CFL
+    xd->cfl->store_y = 0;
+#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+    if (is_chroma_reference(mi_row, mi_col, bsize, xd->cfl->subsampling_x,
+                            xd->cfl->subsampling_y) &&
+        !xd->cfl->are_parameters_computed) {
+      cfl_clear_sub8x8_val(xd->cfl);
+    }
+#endif  // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG
+#endif  // CONFIG_CFL
     if (!dry_run) {
       sum_intra_stats(td->counts, xd, mi, xd->above_mi, xd->left_mi,
                       frame_is_intra_only(cm), mi_row, mi_col);
     }
-#if CONFIG_PALETTE
-    if (bsize >= BLOCK_8X8 && !dry_run) {
+
+// TODO(anybody) : remove this flag when PVQ supports pallete coding tool
+#if !CONFIG_PVQ
+    if (bsize >= BLOCK_8X8) {
       for (plane = 0; plane <= 1; ++plane) {
-        if (mbmi->palette_mode_info.palette_size[plane] > 0)
-          av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate);
+        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
+          if (!dry_run)
+            av1_tokenize_color_map(x, plane, 0, t, bsize, mbmi->tx_size,
+                                   PALETTE_MAP);
+          else if (dry_run == DRY_RUN_COSTCOEFFS)
+            rate += av1_cost_color_map(x, plane, 0, bsize, mbmi->tx_size,
+                                       PALETTE_MAP);
+        }
       }
     }
-#endif  // CONFIG_PALETTE
+#endif  // !CONFIG_PVQ
+
 #if CONFIG_VAR_TX
     mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
 #endif
@@ -6012,7 +6363,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
       av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
                            &xd->block_refs[ref]->sf);
     }
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     // Single ref compound mode
     if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) {
       xd->block_refs[1] = xd->block_refs[0];
@@ -6024,9 +6375,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // !CONFIG_INTRABC
       av1_setup_pre_planes(xd, 1, cfg, mi_row, mi_col, &xd->block_refs[1]->sf);
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
     av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, block_size);
+
+#if !CONFIG_NCOBMC_ADAPT_WEIGHT
 #if CONFIG_MOTION_VAR
     if (mbmi->motion_mode == OBMC_CAUSAL) {
 #if CONFIG_NCOBMC
@@ -6037,6 +6390,17 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
         av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
     }
 #endif  // CONFIG_MOTION_VAR
+#else
+    if (mbmi->motion_mode == OBMC_CAUSAL) {
+      av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
+    } else if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT &&
+               dry_run == OUTPUT_ENABLED) {
+      int p;
+      for (p = 0; p < MAX_MB_PLANE; ++p) {
+        get_pred_from_intrpl_buf(xd, mi_row, mi_col, block_size, p);
+      }
+    }
+#endif
 
     av1_encode_sb((AV1_COMMON *)cm, x, block_size, mi_row, mi_col);
 #if CONFIG_VAR_TX
@@ -6053,7 +6417,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
   }
 
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  if (bsize < BLOCK_8X8) {
+  if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
     dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
                             block_size_wide[bsize], block_size_high[bsize],
                             mi_row, mi_col);
@@ -6079,8 +6443,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
         tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts);
       } else {
         const int tx_size_ctx = get_tx_size_context(xd);
-        const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                         : intra_tx_size_cat_lookup[bsize];
+        const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+                                             : intra_tx_size_cat_lookup[bsize];
         const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
         const int depth = tx_size_to_depth(coded_tx_size);
         ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
@@ -6088,8 +6452,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
       }
 #else
       const int tx_size_ctx = get_tx_size_context(xd);
-      const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                       : intra_tx_size_cat_lookup[bsize];
+      const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+                                           : intra_tx_size_cat_lookup[bsize];
       const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
       const int depth = tx_size_to_depth(coded_tx_size);
 
@@ -6141,9 +6505,6 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
 #endif
     }
 
-    ++td->counts->tx_size_totals[txsize_sqr_map[tx_size]];
-    ++td->counts->tx_size_totals[txsize_sqr_map[av1_get_uv_tx_size(
-        mbmi, &xd->plane[1])]];
 #if !CONFIG_TXK_SEL
     av1_update_tx_type_count(cm, xd, bsize, tx_size, td->counts);
 #endif
@@ -6156,27 +6517,46 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
 #else
       mbmi->sb_type >= BLOCK_8X8 &&
 #endif
-      is_inter && !(mbmi->skip || seg_skip)) {
+      is_inter && !(mbmi->skip || seg_skip) &&
+      !xd->lossless[mbmi->segment_id]) {
     if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
   } else {
     TX_SIZE tx_size = mbmi->tx_size;
     // The new intra coding scheme requires no change of transform size
-    if (is_inter)
-      tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, is_inter);
-    else
+    if (is_inter) {
+      if (xd->lossless[mbmi->segment_id]) {
+        tx_size = TX_4X4;
+      } else {
+        tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, is_inter);
+      }
+    } else {
       tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
+    }
     mbmi->tx_size = tx_size;
     set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, (mbmi->skip || seg_skip), xd);
   }
 #endif  // CONFIG_VAR_TX
+#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8
+  CFL_CTX *const cfl = xd->cfl;
+#if CONFIG_DEBUG
+  if (is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
+                          cfl->subsampling_y) &&
+      !cfl->are_parameters_computed) {
+    cfl_clear_sub8x8_val(cfl);
+  }
+#endif  // CONFIG_DEBUG
+  if (is_inter_block(mbmi) &&
+      !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
+                           cfl->subsampling_y)) {
+    cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
+  }
+#endif  // CONFIG_CFL && CONFIG_CHROMA_SUB8X8
 }
 
 #if CONFIG_SUPERTX
 static int check_intra_b(PICK_MODE_CONTEXT *ctx) {
   if (!is_inter_mode((&ctx->mic)->mbmi.mode)) return 1;
-#if CONFIG_EXT_INTER
   if (ctx->mic.mbmi.ref_frame[1] == INTRA_FRAME) return 1;
-#endif  // CONFIG_EXT_INTER
   return 0;
 }
 
@@ -6235,6 +6615,9 @@ static int check_intra_sb(const AV1_COMP *const cpi, const TileInfo *const tile,
       }
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       for (i = 0; i < 3; i++) {
         if (check_intra_b(&pc_tree->horizontala[i])) return 1;
@@ -6289,6 +6672,9 @@ static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
       else
         return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]);
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       return check_supertx_b(supertx_size, &pc_tree->horizontala[0]);
     case PARTITION_HORZ_B:
@@ -6303,10 +6689,8 @@ static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
 }
 
 static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
-#if CONFIG_EXT_INTER
-                               int mi_row_ori, int mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                               int mi_row_pred, int mi_col_pred, int plane,
+                               int mi_row_ori, int mi_col_ori, int mi_row_pred,
+                               int mi_col_pred, int plane,
                                BLOCK_SIZE bsize_pred, int b_sub8x8, int block) {
   // Used in supertx
   // (mi_row_ori, mi_col_ori): location for mv
@@ -6328,7 +6712,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
                          &xd->block_refs[ref]->sf);
   }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   // Single ref compound mode
   if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) {
     xd->block_refs[1] = xd->block_refs[0];
@@ -6336,20 +6720,14 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
     av1_setup_pre_planes(xd, 1, cfg, mi_row_pred, mi_col_pred,
                          &xd->block_refs[1]->sf);
   }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
   if (!b_sub8x8)
-    av1_build_inter_predictor_sb_extend(cm, xd,
-#if CONFIG_EXT_INTER
-                                        mi_row_ori, mi_col_ori,
-#endif  // CONFIG_EXT_INTER
+    av1_build_inter_predictor_sb_extend(cm, xd, mi_row_ori, mi_col_ori,
                                         mi_row_pred, mi_col_pred, plane,
                                         bsize_pred);
   else
-    av1_build_inter_predictor_sb_sub8x8_extend(cm, xd,
-#if CONFIG_EXT_INTER
-                                               mi_row_ori, mi_col_ori,
-#endif  // CONFIG_EXT_INTER
+    av1_build_inter_predictor_sb_sub8x8_extend(cm, xd, mi_row_ori, mi_col_ori,
                                                mi_row_pred, mi_col_pred, plane,
                                                bsize_pred, block);
 }
@@ -6390,12 +6768,8 @@ static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td,
       dst_buf + (r >> xd->plane[plane].subsampling_y) * dst_stride +
       (c >> xd->plane[plane].subsampling_x);
 
-  predict_superblock(cpi, td,
-#if CONFIG_EXT_INTER
-                     mi_row_ori, mi_col_ori,
-#endif  // CONFIG_EXT_INTER
-                     mi_row_pred, mi_col_pred, plane, bsize_pred, b_sub8x8,
-                     block);
+  predict_superblock(cpi, td, mi_row_ori, mi_col_ori, mi_row_pred, mi_col_pred,
+                     plane, bsize_pred, b_sub8x8, block);
 
   if (!dry_run && (plane == 0) && (block == 0 || !b_sub8x8))
     update_stats(&cpi->common, td, mi_row_pred, mi_col_pred, 1);
@@ -6940,6 +7314,9 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
       }
       break;
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION_TYPES_AB
+#error HORZ/VERT_A/B partitions not yet updated in superres code
+#endif
     case PARTITION_HORZ_A:
       predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
                        mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
@@ -7130,9 +7507,6 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
   TX_SIZE tx_size;
   MB_MODE_INFO *mbmi;
   TX_TYPE tx_type, best_tx_nostx;
-#if CONFIG_EXT_TX
-  int ext_tx_set;
-#endif  // CONFIG_EXT_TX
   int tmp_rate_tx = 0, skip_tx = 0;
   int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX;
 
@@ -7202,7 +7576,9 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
   tx_size = max_txsize_lookup[bsize];
   av1_subtract_plane(x, bsize, 0);
 #if CONFIG_EXT_TX
-  ext_tx_set = get_ext_tx_set(tx_size, bsize, 1, cm->reduced_tx_set_used);
+  int ext_tx_set = get_ext_tx_set(tx_size, bsize, 1, cm->reduced_tx_set_used);
+  const TxSetType tx_set_type =
+      get_ext_tx_set_type(tx_size, bsize, 1, cm->reduced_tx_set_used);
 #endif  // CONFIG_EXT_TX
   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
 #if CONFIG_VAR_TX
@@ -7213,7 +7589,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
 #endif  // CONFIG_VAR_TX
 
 #if CONFIG_EXT_TX
-    if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
+    if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
 #else
     if (tx_size >= TX_32X32 && tx_type != DCT_DCT) continue;
 #endif  // CONFIG_EXT_TX
@@ -7239,12 +7615,12 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
         !xd->lossless[xd->mi[0]->mbmi.segment_id] && this_rate != INT_MAX) {
       if (ext_tx_set > 0)
         this_rate +=
-            cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->tx_type];
+            x->inter_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->tx_type];
     }
 #else
     if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
         this_rate != INT_MAX) {
-      this_rate += cpi->inter_tx_type_costs[tx_size][mbmi->tx_type];
+      this_rate += x->inter_tx_type_costs[tx_size][mbmi->tx_type];
     }
 #endif  // CONFIG_EXT_TX
     *tmp_rate = rate_uv + this_rate;
diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h
index 569ec9f72..b54e54d25 100644
--- a/third_party/aom/av1/encoder/encodeframe.h
+++ b/third_party/aom/av1/encoder/encodeframe.h
@@ -41,7 +41,6 @@ void av1_update_tx_type_count(const struct AV1Common *cm, MACROBLOCKD *xd,
 #endif
                               BLOCK_SIZE bsize, TX_SIZE tx_size,
                               FRAME_COUNTS *counts);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c
index e7f4d313d..f35ce8a4f 100644
--- a/third_party/aom/av1/encoder/encodemb.c
+++ b/third_party/aom/av1/encoder/encodemb.c
@@ -110,42 +110,46 @@ void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
                  pd->dst.buf, pd->dst.stride);
 }
 
+// Shifting negative values is undefined behaviour in C99,
+// and could mislead the optimizer, who might assume the shifted is positive.
+// This also avoids ubsan warnings.
+// In practise, this gets inlined by the optimizer to a single instruction.
+static INLINE int signed_shift_right(int x, int shift) {
+  if (x >= 0)
+    return x >> shift;
+  else
+    return -((-x) >> shift);
+}
+
+#if !CONFIG_LV_MAP
 // These numbers are empirically obtained.
 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
   { 10, 7 }, { 8, 5 },
 };
 
-static INLINE unsigned int get_token_bit_costs(
-    unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
-    int ctx, int token) {
-  (void)skip_eob;
-  return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
-}
-
-#if !CONFIG_LV_MAP
-
 static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                              int blk_row, int blk_col, int block,
                              TX_SIZE tx_size, int ctx) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   struct macroblock_plane *const p = &mb->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
+  const PLANE_TYPE plane_type = pd->plane_type;
+  const int eob = p->eobs[block];
+  assert(mb->qindex > 0);
+  assert((!plane_type && !plane) || (plane_type && plane));
+  assert(eob <= tx_size_2d[tx_size]);
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
-  uint8_t token_cache[MAX_TX_SQUARE];
   const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  const int eob = p->eobs[block];
-  const PLANE_TYPE plane_type = pd->plane_type;
   const int16_t *const dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
-  TX_TYPE tx_type =
+  const TX_TYPE tx_type =
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
   const SCAN_ORDER *const scan_order =
       get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
   const int16_t *const scan = scan_order->scan;
   const int16_t *const nb = scan_order->neighbors;
-  int dqv;
   const int shift = av1_get_tx_scale(tx_size);
 #if CONFIG_AOM_QM
   int seg_id = xd->mi[0]->mbmi.segment_id;
@@ -159,66 +163,52 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
   int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
   const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
 #endif  // CONFIG_NEW_QUANT
-  int sz = 0;
   int64_t rd_cost0, rd_cost1;
   int16_t t0, t1;
-  int i, final_eob;
+  int i, final_eob = 0;
   const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
-  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
-      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
-  const int default_eob = tx_size_2d[tx_size];
-
-  assert(mb->qindex > 0);
-
-  assert((!plane_type && !plane) || (plane_type && plane));
-  assert(eob <= default_eob);
-
-  int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
-
+  int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
+      mb->token_head_costs[txsize_sqr_map[tx_size]][plane_type][ref];
+  int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
+      mb->token_tail_costs[txsize_sqr_map[tx_size]][plane_type][ref];
+  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
   int64_t rate0, rate1;
+  int64_t eob_cost0, eob_cost1;
+  tran_low_t before_best_eob_qc = 0;
+  tran_low_t before_best_eob_dqc = 0;
+
+  uint8_t token_cache[MAX_TX_SQUARE];
   for (i = 0; i < eob; i++) {
     const int rc = scan[i];
     token_cache[rc] = av1_pt_energy_class[av1_get_token(qcoeff[rc])];
   }
 
-  unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
-      token_costs;
-
-  final_eob = 0;
-
-  int64_t eob_cost0, eob_cost1;
-  tran_low_t before_best_eob_qc = 0;
-  tran_low_t before_best_eob_dqc = 0;
-
-  const int ctx0 = ctx;
   /* Record the r-d cost */
   int64_t accu_rate = 0;
   // Initialized to the worst possible error for the largest transform size.
   // This ensures that it never goes negative.
   int64_t accu_error = ((int64_t)1) << 50;
-
-  rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
-                              EOB_TOKEN);
+  rate0 = head_token_costs[0][ctx][0];
   int64_t best_block_rd_cost = RDCOST(rdmult, rate0, accu_error);
 
   // int64_t best_block_rd_cost_all0 = best_block_rd_cost;
-  int x_prev = 1;
+  const int seg_eob =
+      av1_get_tx_eob(&cm->seg, xd->mi[0]->mbmi.segment_id, tx_size);
   for (i = 0; i < eob; i++) {
     const int rc = scan[i];
-    int x = qcoeff[rc];
-    sz = -(x < 0);
-
-    int band_cur = band_translate[i];
-    int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
-    int token_tree_sel_cur = (x_prev == 0);
+    const int x = qcoeff[rc];
+    const int sz = -(x < 0);
+    const int band_cur = band_translate[i];
+    const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
+    const int eob_val =
+        (i + 1 == eob) ? (i + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
+    const int is_first = (i == 0);
 
     if (x == 0) {
       // no need to search when x == 0
-      int token = av1_get_token(x);
-      rate0 = get_token_bit_costs(*(token_costs_ptr + band_cur),
-                                  token_tree_sel_cur, ctx_cur, token);
-      accu_rate += rate0;
-      x_prev = 0;
+      accu_rate += av1_get_coeff_token_cost(
+          ZERO_TOKEN, eob_val, is_first, head_token_costs[band_cur][ctx_cur],
+          tail_token_costs[band_cur][ctx_cur]);
       // accu_error does not change when x==0
     } else {
       /*  Computing distortion
@@ -226,136 +216,109 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
       // compute the distortion for the first candidate
       // and the distortion for quantizing to 0.
       int dx0 = abs(coeff[rc]) * (1 << shift);
-#if CONFIG_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        dx0 >>= xd->bd - 8;
-      }
-#endif
-      int64_t d0 = (int64_t)dx0 * dx0;
-
-      int x_a = x - 2 * sz - 1;
-      int64_t d2, d2_a;
-
-      int dx;
+      dx0 >>= xd->bd - 8;
 
+      const int64_t d0 = (int64_t)dx0 * dx0;
+      const int x_a = x - 2 * sz - 1;
+      int dqv;
 #if CONFIG_AOM_QM
-      int iwt = iqmatrix[rc];
+      int iwt;
       dqv = dequant_ptr[rc != 0];
-      dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+      if (iqmatrix != NULL) {
+        iwt = iqmatrix[rc];
+        dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+      }
 #else
       dqv = dequant_ptr[rc != 0];
 #endif
 
-      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
-#if CONFIG_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        int dx_sign = dx < 0 ? 1 : 0;
-        dx = abs(dx) >> (xd->bd - 8);
-        if (dx_sign) dx = -dx;
-      }
-#endif  // CONFIG_HIGHBITDEPTH
-      d2 = (int64_t)dx * dx;
+      int dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
+      dx = signed_shift_right(dx, xd->bd - 8);
+      const int64_t d2 = (int64_t)dx * dx;
 
       /* compute the distortion for the second candidate
        * x_a = x - 2 * sz + 1;
        */
+      int64_t d2_a;
       if (x_a != 0) {
 #if CONFIG_NEW_QUANT
         dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
-             (coeff[rc] << shift);
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          dx >>= xd->bd - 8;
-        }
-#endif  // CONFIG_HIGHBITDEPTH
+             (coeff[rc] * (1 << shift));
+        dx >>= xd->bd - 8;
 #else   // CONFIG_NEW_QUANT
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
-        } else {
-          dx -= (dqv + sz) ^ sz;
-        }
-#else
-        dx -= (dqv + sz) ^ sz;
-#endif  // CONFIG_HIGHBITDEPTH
+        dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
 #endif  // CONFIG_NEW_QUANT
         d2_a = (int64_t)dx * dx;
       } else {
         d2_a = d0;
       }
-      /*  Computing rates and r-d cost
-       */
-
-      int best_x, best_eob_x;
-      int64_t base_bits, next_bits0, next_bits1;
-      int64_t next_eob_bits0, next_eob_bits1;
-
+      // Computing RD cost
+      int64_t base_bits;
       // rate cost of x
       base_bits = av1_get_token_cost(x, &t0, cat6_bits);
-      rate0 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
-                                              token_tree_sel_cur, ctx_cur, t0);
-
+      rate0 = base_bits +
+              av1_get_coeff_token_cost(t0, eob_val, is_first,
+                                       head_token_costs[band_cur][ctx_cur],
+                                       tail_token_costs[band_cur][ctx_cur]);
+      // rate cost of x_a
       base_bits = av1_get_token_cost(x_a, &t1, cat6_bits);
-      rate1 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
-                                              token_tree_sel_cur, ctx_cur, t1);
-
-      next_bits0 = 0;
-      next_bits1 = 0;
-      next_eob_bits0 = 0;
-      next_eob_bits1 = 0;
+      if (t1 == ZERO_TOKEN && eob_val) {
+        rate1 = base_bits;
+      } else {
+        rate1 = base_bits +
+                av1_get_coeff_token_cost(t1, eob_val, is_first,
+                                         head_token_costs[band_cur][ctx_cur],
+                                         tail_token_costs[band_cur][ctx_cur]);
+      }
 
-      if (i < default_eob - 1) {
-        int ctx_next, token_tree_sel_next;
-        int band_next = band_translate[i + 1];
-        int token_next =
-            i + 1 != eob ? av1_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
+      int64_t next_bits0 = 0, next_bits1 = 0;
+      if (i < eob - 1) {
+        int ctx_next;
+        const int band_next = band_translate[i + 1];
+        const int token_next = av1_get_token(qcoeff[scan[i + 1]]);
+        const int eob_val_next =
+            (i + 2 == eob) ? (i + 2 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
 
         token_cache[rc] = av1_pt_energy_class[t0];
         ctx_next = get_coef_context(nb, token_cache, i + 1);
-        token_tree_sel_next = (x == 0);
-
-        next_bits0 =
-            get_token_bit_costs(*(token_costs_ptr + band_next),
-                                token_tree_sel_next, ctx_next, token_next);
-        next_eob_bits0 =
-            get_token_bit_costs(*(token_costs_ptr + band_next),
-                                token_tree_sel_next, ctx_next, EOB_TOKEN);
+        next_bits0 = av1_get_coeff_token_cost(
+            token_next, eob_val_next, 0, head_token_costs[band_next][ctx_next],
+            tail_token_costs[band_next][ctx_next]);
 
         token_cache[rc] = av1_pt_energy_class[t1];
         ctx_next = get_coef_context(nb, token_cache, i + 1);
-        token_tree_sel_next = (x_a == 0);
-
-        next_bits1 =
-            get_token_bit_costs(*(token_costs_ptr + band_next),
-                                token_tree_sel_next, ctx_next, token_next);
-
-        if (x_a != 0) {
-          next_eob_bits1 =
-              get_token_bit_costs(*(token_costs_ptr + band_next),
-                                  token_tree_sel_next, ctx_next, EOB_TOKEN);
-        }
+        next_bits1 = av1_get_coeff_token_cost(
+            token_next, eob_val_next, 0, head_token_costs[band_next][ctx_next],
+            tail_token_costs[band_next][ctx_next]);
       }
 
       rd_cost0 = RDCOST(rdmult, (rate0 + next_bits0), d2);
       rd_cost1 = RDCOST(rdmult, (rate1 + next_bits1), d2_a);
+      const int best_x = (rd_cost1 < rd_cost0);
 
-      best_x = (rd_cost1 < rd_cost0);
-
-      eob_cost0 = RDCOST(rdmult, (accu_rate + rate0 + next_eob_bits0),
-                         (accu_error + d2 - d0));
+      const int eob_v = (i + 1 == seg_eob) ? LAST_EOB : EARLY_EOB;
+      int64_t next_eob_bits0, next_eob_bits1;
+      int best_eob_x;
+      next_eob_bits0 = av1_get_coeff_token_cost(
+          t0, eob_v, is_first, head_token_costs[band_cur][ctx_cur],
+          tail_token_costs[band_cur][ctx_cur]);
+      eob_cost0 =
+          RDCOST(rdmult, (accu_rate + next_eob_bits0), (accu_error + d2 - d0));
       eob_cost1 = eob_cost0;
       if (x_a != 0) {
-        eob_cost1 = RDCOST(rdmult, (accu_rate + rate1 + next_eob_bits1),
+        next_eob_bits1 = av1_get_coeff_token_cost(
+            t1, eob_v, is_first, head_token_costs[band_cur][ctx_cur],
+            tail_token_costs[band_cur][ctx_cur]);
+        eob_cost1 = RDCOST(rdmult, (accu_rate + next_eob_bits1),
                            (accu_error + d2_a - d0));
         best_eob_x = (eob_cost1 < eob_cost0);
       } else {
         best_eob_x = 0;
       }
 
-      int dqc, dqc_a = 0;
-
-      dqc = dqcoeff[rc];
-      if (best_x + best_eob_x) {
+      const int dqc = dqcoeff[rc];
+      int dqc_a = 0;
+      if (best_x || best_eob_x) {
         if (x_a != 0) {
 #if CONFIG_NEW_QUANT
           dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
@@ -375,29 +338,23 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
 
       // record the better quantized value
       if (best_x) {
+        assert(d2_a <= d0);
         qcoeff[rc] = x_a;
         dqcoeff[rc] = dqc_a;
-
         accu_rate += rate1;
         accu_error += d2_a - d0;
-        assert(d2_a <= d0);
-
         token_cache[rc] = av1_pt_energy_class[t1];
       } else {
+        assert(d2 <= d0);
         accu_rate += rate0;
         accu_error += d2 - d0;
-        assert(d2 <= d0);
-
         token_cache[rc] = av1_pt_energy_class[t0];
       }
       assert(accu_error >= 0);
 
-      x_prev = qcoeff[rc];
-
       // determine whether to move the eob position to i+1
-      int use_a = (x_a != 0) && (best_eob_x);
-      int64_t best_eob_cost_i = use_a ? eob_cost1 : eob_cost0;
-
+      const int use_a = (x_a != 0) && (best_eob_x);
+      const int64_t best_eob_cost_i = use_a ? eob_cost1 : eob_cost0;
       if (best_eob_cost_i < best_block_rd_cost) {
         best_block_rd_cost = best_eob_cost_i;
         final_eob = i + 1;
@@ -427,7 +384,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
     dqcoeff[rc] = 0;
   }
 
-  mb->plane[plane].eobs[block] = final_eob;
+  p->eobs[block] = final_eob;
   return final_eob;
 }
 #endif  // !CONFIG_LV_MAP
@@ -435,7 +392,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
 int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
                    int blk_col, int block, BLOCK_SIZE plane_bsize,
                    TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
-                   const ENTROPY_CONTEXT *l) {
+                   const ENTROPY_CONTEXT *l, int fast_mode) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   struct macroblock_plane *const p = &mb->plane[plane];
   const int eob = p->eobs[block];
@@ -455,6 +412,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
   (void)plane_bsize;
   (void)blk_row;
   (void)blk_col;
+  (void)fast_mode;
 #if CONFIG_VAR_TX
   int ctx = get_entropy_context(tx_size, a, l);
 #else
@@ -466,7 +424,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
   TXB_CTX txb_ctx;
   get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
   return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size,
-                          &txb_ctx);
+                          &txb_ctx, fast_mode);
 #endif  // !CONFIG_LV_MAP
 }
 
@@ -492,10 +450,12 @@ static AV1_QUANT_FACADE
     };
 #endif  // !CONFIG_PVQ
 
+#if !CONFIG_TXMG && !CONFIG_PVQ
 typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride,
                             TxfmParam *txfm_param);
 static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm,
                                               av1_highbd_fwd_txfm };
+#endif
 
 void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
                      int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
@@ -514,7 +474,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
   TX_TYPE tx_type =
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
 
-#if CONFIG_AOM_QM || CONFIG_NEW_QUANT
+#if (CONFIG_AOM_QM || CONFIG_NEW_QUANT) && !CONFIG_PVQ
   const int is_inter = is_inter_block(mbmi);
 #endif
 
@@ -524,7 +484,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = block_size_wide[plane_bsize];
-#if CONFIG_AOM_QM
+#if CONFIG_AOM_QM && !CONFIG_PVQ
   int seg_id = mbmi->segment_id;
   // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
   const qm_val_t *qmatrix =
@@ -538,7 +498,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
 
   TxfmParam txfm_param;
 
-#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
+#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
   uint8_t *dst;
   const int dst_stride = pd->dst.stride;
 #if CONFIG_PVQ || CONFIG_DIST_8X8
@@ -601,29 +561,37 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
 #endif  // CONFIG_HIGHBITDEPTH
 #endif
 
-#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
+#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
   dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
+#endif  // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT_FROM_PRED ||
+        // CONFIG_MRC_TX
+
 #if CONFIG_PVQ || CONFIG_DIST_8X8
-  pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
+  if (CONFIG_PVQ
+#if CONFIG_DIST_8X8
+      || x->using_dist_8x8
+#endif  // CONFIG_DIST_8X8
+      ) {
+    pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
 
 // copy uint8 orig and predicted block to int16 buffer
 // in order to use existing VP10 transform functions
 #if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (j = 0; j < txh; j++)
-      for (i = 0; i < txw; i++)
-        pred[diff_stride * j + i] =
-            CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
-  } else {
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      for (j = 0; j < txh; j++)
+        for (i = 0; i < txw; i++)
+          pred[diff_stride * j + i] =
+              CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
+    } else {
 #endif  // CONFIG_HIGHBITDEPTH
-    for (j = 0; j < txh; j++)
-      for (i = 0; i < txw; i++)
-        pred[diff_stride * j + i] = dst[dst_stride * j + i];
+      for (j = 0; j < txh; j++)
+        for (i = 0; i < txw; i++)
+          pred[diff_stride * j + i] = dst[dst_stride * j + i];
 #if CONFIG_HIGHBITDEPTH
-  }
+    }
 #endif  // CONFIG_HIGHBITDEPTH
+  }
 #endif  // CONFIG_PVQ || CONFIG_DIST_8X8
-#endif  // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT || CONFIG_MRC_TX
 
   (void)ctx;
 
@@ -631,18 +599,32 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
   txfm_param.tx_size = tx_size;
   txfm_param.lossless = xd->lossless[mbmi->segment_id];
 #if CONFIG_MRC_TX || CONFIG_LGT
-  txfm_param.dst = dst;
-  txfm_param.stride = dst_stride;
-#endif  // CONFIG_MRC_TX || CONFIG_LGT
-#if CONFIG_LGT
   txfm_param.is_inter = is_inter_block(mbmi);
-  txfm_param.mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
 #endif
+#if CONFIG_MRC_TX || CONFIG_LGT_FROM_PRED
+  txfm_param.dst = dst;
+  txfm_param.stride = dst_stride;
+#if CONFIG_MRC_TX
+  txfm_param.valid_mask = &mbmi->valid_mrc_mask;
+#if SIGNAL_ANY_MRC_MASK
+  txfm_param.mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // SIGNAL_ANY_MRC_MASK
+#endif  // CONFIG_MRC_TX
+#if CONFIG_LGT_FROM_PRED
+  txfm_param.mode = mbmi->mode;
+  txfm_param.use_lgt = mbmi->use_lgt;
+#endif  // CONFIG_LGT_FROM_PRED
+#endif  // CONFIG_MRC_TX || CONFIG_LGT_FROM_PRED
 
 #if !CONFIG_PVQ
   txfm_param.bd = xd->bd;
   const int is_hbd = get_bitdepth_data_path_index(xd);
+
+#if CONFIG_TXMG
+  av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param);
+#else   // CONFIG_TXMG
   fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &txfm_param);
+#endif  // CONFIG_TXMG
 
   if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
     if (LIKELY(!x->skip_block)) {
@@ -705,6 +687,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
   uint8_t *dst;
 #if !CONFIG_PVQ
   ENTROPY_CONTEXT *a, *l;
@@ -731,10 +716,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
   // Assert not magic number (uninitialized).
   assert(x->blk_skip[plane][blk_row * bw + blk_col] != 234);
 
-  if (x->blk_skip[plane][blk_row * bw + blk_col] == 0) {
-#else
-  {
+  if (x->blk_skip[plane][blk_row * bw + blk_col] == 0)
 #endif
+  {
     av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     ctx, AV1_XFORM_QUANT_FP);
   }
@@ -746,29 +730,35 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
 
 #if !CONFIG_PVQ
   av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a,
-                 l);
+                 l, 0);
 
   av1_set_txb_context(x, plane, block, tx_size, a, l);
 
   if (p->eobs[block]) *(args->skip) = 0;
 
-  if (p->eobs[block] == 0) return;
+  if (p->eobs[block] != 0)
 #else
   (void)ctx;
   if (!x->pvq_skip[plane]) *(args->skip) = 0;
 
-  if (x->pvq_skip[plane]) return;
+  if (!x->pvq_skip[plane])
 #endif
-  TX_TYPE tx_type =
-      av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size);
-#if CONFIG_LGT
-  PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
-  av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, dst,
-                              pd->dst.stride, p->eobs[block]);
-#else
-  av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst,
-                              pd->dst.stride, p->eobs[block]);
+  {
+#if CONFIG_LGT_FROM_PRED
+    PREDICTION_MODE mode = xd->mi[0]->mbmi.mode;
+#endif  // CONFIG_LGT_FROM_PRED
+    TX_TYPE tx_type =
+        av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size);
+    av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT_FROM_PRED
+                                mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                tx_type, tx_size, dst, pd->dst.stride,
+                                p->eobs[block]);
+  }
 }
 
 #if CONFIG_VAR_TX
@@ -803,7 +793,8 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
     if (is_qttx) assert(blk_row == 0 && blk_col == 0 && block == 0);
 #else
     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
-    assert(sub_txs < tx_size);
+    assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
+    assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
 #endif
     // This is the square transform block partition entry point.
     int bsl = tx_size_wide_unit[sub_txs];
@@ -858,34 +849,36 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
 
   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                   ctx, AV1_XFORM_QUANT_B);
-#if !CONFIG_PVQ
-  if (p->eobs[block] > 0) {
-#else
+#if CONFIG_PVQ
   if (!x->pvq_skip[plane]) {
-    {
-      int tx_blk_size;
-      int i, j;
-      // transform block size in pixels
-      tx_blk_size = tx_size_wide[tx_size];
+    int tx_blk_size;
+    int i, j;
+    // transform block size in pixels
+    tx_blk_size = tx_size_wide[tx_size];
 
 // Since av1 does not have separate function which does inverse transform
 // but av1_inv_txfm_add_*x*() also does addition of predicted image to
 // inverse transformed image,
 // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
 #if CONFIG_HIGHBITDEPTH
-      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        for (j = 0; j < tx_blk_size; j++)
-          for (i = 0; i < tx_blk_size; i++)
-            CONVERT_TO_SHORTPTR(dst)[j * pd->dst.stride + i] = 0;
-      } else {
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      for (j = 0; j < tx_blk_size; j++)
+        for (i = 0; i < tx_blk_size; i++)
+          CONVERT_TO_SHORTPTR(dst)[j * pd->dst.stride + i] = 0;
+    } else {
 #endif  // CONFIG_HIGHBITDEPTH
-        for (j = 0; j < tx_blk_size; j++)
-          for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
+      for (j = 0; j < tx_blk_size; j++)
+        for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
 #if CONFIG_HIGHBITDEPTH
-      }
-#endif  // CONFIG_HIGHBITDEPTH
     }
-#endif  // !CONFIG_PVQ
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+#endif  // CONFIG_PVQ
+
+#if !CONFIG_PVQ
+  if (p->eobs[block] > 0)
+#endif
+  {
     txfm_param.bd = xd->bd;
     txfm_param.tx_type = DCT_DCT;
     txfm_param.eob = p->eobs[block];
@@ -944,7 +937,8 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
     const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
     const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
-    const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
+    const TX_SIZE max_tx_size = get_vartx_max_txsize(
+        mbmi, plane_bsize, pd->subsampling_x || pd->subsampling_y);
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
     const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
     const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
@@ -1059,320 +1053,6 @@ static void encode_block_intra_and_set_context(int plane, int block,
 #endif
 }
 
-#if CONFIG_DPCM_INTRA
-static int get_eob(const tran_low_t *qcoeff, intptr_t n_coeffs,
-                   const int16_t *scan) {
-  int eob = -1;
-  for (int i = (int)n_coeffs - 1; i >= 0; i--) {
-    const int rc = scan[i];
-    if (qcoeff[rc]) {
-      eob = i;
-      break;
-    }
-  }
-  return eob + 1;
-}
-
-static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value,
-                            int16_t quant, int16_t quant_shift, int16_t dequant,
-                            int log_scale, tran_low_t *const qcoeff,
-                            tran_low_t *const dqcoeff) {
-  zbin = ROUND_POWER_OF_TWO(zbin, log_scale);
-  round_value = ROUND_POWER_OF_TWO(round_value, log_scale);
-  const int coeff_sign = (coeff >> 31);
-  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-  if (abs_coeff >= zbin) {
-    int tmp = clamp(abs_coeff + round_value, INT16_MIN, INT16_MAX);
-    tmp = ((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - log_scale);
-    *qcoeff = (tmp ^ coeff_sign) - coeff_sign;
-    *dqcoeff = (*qcoeff * dequant) / (1 << log_scale);
-  }
-}
-
-#if CONFIG_HIGHBITDEPTH
-typedef void (*hbd_dpcm_fwd_tx_func)(const int16_t *input, int stride,
-                                     TX_TYPE_1D tx_type, tran_low_t *output,
-                                     int dir);
-
-static hbd_dpcm_fwd_tx_func get_hbd_dpcm_fwd_tx_func(int tx_length) {
-  switch (tx_length) {
-    case 4: return av1_hbd_dpcm_ft4_c;
-    case 8: return av1_hbd_dpcm_ft8_c;
-    case 16: return av1_hbd_dpcm_ft16_c;
-    case 32:
-      return av1_hbd_dpcm_ft32_c;
-    // TODO(huisu): add support for TX_64X64.
-    default: assert(0); return NULL;
-  }
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride,
-                                 TX_TYPE_1D tx_type, tran_low_t *output);
-
-static dpcm_fwd_tx_func get_dpcm_fwd_tx_func(int tx_length) {
-  switch (tx_length) {
-    case 4: return av1_dpcm_ft4_c;
-    case 8: return av1_dpcm_ft8_c;
-    case 16: return av1_dpcm_ft16_c;
-    case 32:
-      return av1_dpcm_ft32_c;
-    // TODO(huisu): add support for TX_64X64.
-    default: assert(0); return NULL;
-  }
-}
-
-static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                    struct macroblockd_plane *const pd,
-                                    struct macroblock_plane *const p,
-                                    uint8_t *src, int src_stride, uint8_t *dst,
-                                    int dst_stride, int16_t *src_diff,
-                                    int diff_stride, tran_low_t *coeff,
-                                    tran_low_t *qcoeff, tran_low_t *dqcoeff) {
-  const int tx1d_width = tx_size_wide[tx_size];
-  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width);
-  dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_dpcm_inv_txfm_add_func(tx1d_width);
-  const int tx1d_height = tx_size_high[tx_size];
-  const int log_scale = av1_get_tx_scale(tx_size);
-  int q_idx = 0;
-  for (int r = 0; r < tx1d_height; ++r) {
-    // Update prediction.
-    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
-    // Subtraction.
-    for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
-    // Forward transform.
-    forward_tx(src_diff, 1, tx_type_1d, coeff);
-    // Quantization.
-    for (int c = 0; c < tx1d_width; ++c) {
-      quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
-                      p->quant[q_idx], p->quant_shift[q_idx],
-                      pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
-      q_idx = 1;
-    }
-    // Inverse transform.
-    inverse_tx(dqcoeff, 1, tx_type_1d, dst);
-    // Move to the next row.
-    coeff += tx1d_width;
-    qcoeff += tx1d_width;
-    dqcoeff += tx1d_width;
-    src_diff += diff_stride;
-    dst += dst_stride;
-    src += src_stride;
-  }
-}
-
-static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d,
-                                    struct macroblockd_plane *const pd,
-                                    struct macroblock_plane *const p,
-                                    uint8_t *src, int src_stride, uint8_t *dst,
-                                    int dst_stride, int16_t *src_diff,
-                                    int diff_stride, tran_low_t *coeff,
-                                    tran_low_t *qcoeff, tran_low_t *dqcoeff) {
-  const int tx1d_height = tx_size_high[tx_size];
-  dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height);
-  dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_dpcm_inv_txfm_add_func(tx1d_height);
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int log_scale = av1_get_tx_scale(tx_size);
-  int q_idx = 0;
-  for (int c = 0; c < tx1d_width; ++c) {
-    for (int r = 0; r < tx1d_height; ++r) {
-      // Update prediction.
-      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
-      // Subtraction.
-      src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
-    }
-    // Forward transform.
-    tran_low_t tx_buff[64];
-    forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff);
-    for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
-    // Quantization.
-    for (int r = 0; r < tx1d_height; ++r) {
-      quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
-                      p->quant[q_idx], p->quant_shift[q_idx],
-                      pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
-                      &dqcoeff[r * tx1d_width]);
-      q_idx = 1;
-    }
-    // Inverse transform.
-    for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
-    inverse_tx(tx_buff, dst_stride, tx_type_1d, dst);
-    // Move to the next column.
-    ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
-  }
-}
-
-#if CONFIG_HIGHBITDEPTH
-static void hbd_process_block_dpcm_vert(
-    TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
-    struct macroblockd_plane *const pd, struct macroblock_plane *const p,
-    uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
-    int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
-    tran_low_t *dqcoeff) {
-  const int tx1d_width = tx_size_wide[tx_size];
-  hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_width);
-  hbd_dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width);
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  const int tx1d_height = tx_size_high[tx_size];
-  const int log_scale = av1_get_tx_scale(tx_size);
-  int q_idx = 0;
-  for (int r = 0; r < tx1d_height; ++r) {
-    // Update prediction.
-    if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0]));
-    // Subtraction.
-    for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c];
-    // Forward transform.
-    forward_tx(src_diff, 1, tx_type_1d, coeff, 1);
-    // Quantization.
-    for (int c = 0; c < tx1d_width; ++c) {
-      quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx],
-                      p->quant[q_idx], p->quant_shift[q_idx],
-                      pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]);
-      q_idx = 1;
-    }
-    // Inverse transform.
-    inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst, 1);
-    // Move to the next row.
-    coeff += tx1d_width;
-    qcoeff += tx1d_width;
-    dqcoeff += tx1d_width;
-    src_diff += diff_stride;
-    dst += dst_stride;
-    src += src_stride;
-  }
-}
-
-static void hbd_process_block_dpcm_horz(
-    TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd,
-    struct macroblockd_plane *const pd, struct macroblock_plane *const p,
-    uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride,
-    int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff,
-    tran_low_t *dqcoeff) {
-  const int tx1d_height = tx_size_high[tx_size];
-  hbd_dpcm_fwd_tx_func forward_tx = get_hbd_dpcm_fwd_tx_func(tx1d_height);
-  hbd_dpcm_inv_txfm_add_func inverse_tx =
-      av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height);
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int log_scale = av1_get_tx_scale(tx_size);
-  int q_idx = 0;
-  for (int c = 0; c < tx1d_width; ++c) {
-    for (int r = 0; r < tx1d_height; ++r) {
-      // Update prediction.
-      if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1];
-      // Subtraction.
-      src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride];
-    }
-    // Forward transform.
-    tran_low_t tx_buff[64];
-    forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff, 0);
-    for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r];
-    // Quantization.
-    for (int r = 0; r < tx1d_height; ++r) {
-      quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx],
-                      p->quant[q_idx], p->quant_shift[q_idx],
-                      pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width],
-                      &dqcoeff[r * tx1d_width]);
-      q_idx = 1;
-    }
-    // Inverse transform.
-    for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width];
-    inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst, 0);
-    // Move to the next column.
-    ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src;
-  }
-}
-#endif  // CONFIG_HIGHBITDEPTH
-
-void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
-                                 PREDICTION_MODE mode, int plane, int block,
-                                 int blk_row, int blk_col,
-                                 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                                 TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
-                                 ENTROPY_CONTEXT *tl, int8_t *skip) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane *const p = &x->plane[plane];
-  struct macroblockd_plane *const pd = &xd->plane[plane];
-  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  const int diff_stride = block_size_wide[plane_bsize];
-  const int src_stride = p->src.stride;
-  const int dst_stride = pd->dst.stride;
-  const int tx1d_width = tx_size_wide[tx_size];
-  const int tx1d_height = tx_size_high[tx_size];
-  const SCAN_ORDER *const scan_order =
-      get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
-  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
-  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  uint8_t *dst =
-      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-  uint8_t *src =
-      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
-  int16_t *src_diff =
-      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
-  uint16_t *eob = &p->eobs[block];
-  *eob = 0;
-  memset(qcoeff, 0, tx1d_height * tx1d_width * sizeof(*qcoeff));
-  memset(dqcoeff, 0, tx1d_height * tx1d_width * sizeof(*dqcoeff));
-
-  if (LIKELY(!x->skip_block)) {
-    TX_TYPE_1D tx_type_1d = DCT_1D;
-    switch (tx_type) {
-      case IDTX: tx_type_1d = IDTX_1D; break;
-      case V_DCT:
-        assert(mode == H_PRED);
-        tx_type_1d = DCT_1D;
-        break;
-      case H_DCT:
-        assert(mode == V_PRED);
-        tx_type_1d = DCT_1D;
-        break;
-      default: assert(0);
-    }
-    switch (mode) {
-      case V_PRED:
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, pd, p, src,
-                                      src_stride, dst, dst_stride, src_diff,
-                                      diff_stride, coeff, qcoeff, dqcoeff);
-        } else {
-#endif  // CONFIG_HIGHBITDEPTH
-          process_block_dpcm_vert(tx_size, tx_type_1d, pd, p, src, src_stride,
-                                  dst, dst_stride, src_diff, diff_stride, coeff,
-                                  qcoeff, dqcoeff);
-#if CONFIG_HIGHBITDEPTH
-        }
-#endif  // CONFIG_HIGHBITDEPTH
-        break;
-      case H_PRED:
-#if CONFIG_HIGHBITDEPTH
-        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-          hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, pd, p, src,
-                                      src_stride, dst, dst_stride, src_diff,
-                                      diff_stride, coeff, qcoeff, dqcoeff);
-        } else {
-#endif  // CONFIG_HIGHBITDEPTH
-          process_block_dpcm_horz(tx_size, tx_type_1d, pd, p, src, src_stride,
-                                  dst, dst_stride, src_diff, diff_stride, coeff,
-                                  qcoeff, dqcoeff);
-#if CONFIG_HIGHBITDEPTH
-        }
-#endif  // CONFIG_HIGHBITDEPTH
-        break;
-      default: assert(0);
-    }
-    *eob = get_eob(qcoeff, tx1d_height * tx1d_width, scan_order->scan);
-  }
-
-  ta[blk_col] = tl[blk_row] = *eob > 0;
-  if (*eob) *skip = 0;
-}
-#endif  // CONFIG_DPCM_INTRA
-
 void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                             void *arg) {
@@ -1383,6 +1063,9 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
   PLANE_TYPE plane_type = get_plane_type(plane);
   const TX_TYPE tx_type =
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
@@ -1391,21 +1074,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
   uint8_t *dst =
       &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
 
-  av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
-
-#if CONFIG_DPCM_INTRA || CONFIG_LGT
-  const PREDICTION_MODE mode =
-      get_prediction_mode(xd->mi[0], plane, tx_size, block);
-#if CONFIG_DPCM_INTRA
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
-    av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
-                                plane_bsize, tx_size, tx_type, args->ta,
-                                args->tl, args->skip);
-    return;
-  }
-#endif  // CONFIG_DPCM_INTRA
-#endif  // CONFIG_DPCM_INTRA || CONFIG_LGT
+  av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row,
+                                 tx_size);
 
   av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
 
@@ -1416,7 +1086,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
     av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     ctx, AV1_XFORM_QUANT_FP);
     av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
-                   a, l);
+                   a, l, 0);
   } else {
     av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     ctx, AV1_XFORM_QUANT_B);
@@ -1429,9 +1099,12 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
   if (x->pvq_skip[plane]) return;
 #endif  // CONFIG_PVQ
   av1_inverse_transform_block(xd, dqcoeff,
-#if CONFIG_LGT
-                              mode,
+#if CONFIG_LGT_FROM_PRED
+                              xd->mi[0]->mbmi.mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                               tx_type, tx_size, dst, dst_stride, *eob);
 #if !CONFIG_PVQ
   if (*eob) *(args->skip) = 0;
@@ -1439,12 +1112,10 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
 // Note : *(args->skip) == mbmi->skip
 #endif
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y && x->cfl_store_y) {
-    // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
-    // intra predicted.
-    cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
+  if (plane == AOM_PLANE_Y && xd->cfl->store_y) {
+    cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
   }
-#endif
+#endif  // CONFIG_CFL
 }
 
 void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
@@ -1483,7 +1154,7 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
                                     tran_low_t *ref_coeff,
                                     tran_low_t *const dqcoeff, uint16_t *eob,
                                     const int16_t *quant, int plane,
-                                    int tx_size, TX_TYPE tx_type, int *rate,
+                                    TX_SIZE tx_size, TX_TYPE tx_type, int *rate,
                                     int speed, PVQ_INFO *pvq_info) {
   const int tx_blk_size = tx_size_wide[tx_size];
   daala_enc_ctx *daala_enc = &x->daala_enc;
@@ -1512,10 +1183,11 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
   // DC quantizer for PVQ
   if (use_activity_masking)
     pvq_dc_quant =
-        OD_MAXI(1, (quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift) *
-                           daala_enc->state
-                               .pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
-                       4);
+        OD_MAXI(1,
+                (quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift) *
+                        daala_enc->state
+                            .pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
+                    4);
   else
     pvq_dc_quant =
         OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift);
@@ -1549,18 +1221,19 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
     out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant);
   }
 
-  ac_dc_coded =
-      od_pvq_encode(daala_enc, ref_int32, in_int32, out_int32,
-                    OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3) >>
-                                   hbd_downshift),  // scale/quantizer
-                    OD_MAXI(1, quant[1] << (OD_COEFF_SHIFT - 3) >>
-                                   hbd_downshift),  // scale/quantizer
-                    plane,
-                    tx_size, OD_PVQ_BETA[use_activity_masking][plane][tx_size],
-                    0,  // is_keyframe,
-                    daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
-                    speed,  // speed
-                    pvq_info);
+  ac_dc_coded = od_pvq_encode(
+      daala_enc, ref_int32, in_int32, out_int32,
+      OD_MAXI(1,
+              quant[0] << (OD_COEFF_SHIFT - 3) >>
+                  hbd_downshift),  // scale/quantizer
+      OD_MAXI(1,
+              quant[1] << (OD_COEFF_SHIFT - 3) >>
+                  hbd_downshift),  // scale/quantizer
+      plane, tx_size, OD_PVQ_BETA[use_activity_masking][plane][tx_size],
+      0,  // is_keyframe,
+      daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
+      speed,  // speed
+      pvq_info);
 
   // Encode residue of DC coeff, if required.
   if (!has_dc_skip || out_int32[0]) {
diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h
index 65476bcae..c817a94f0 100644
--- a/third_party/aom/av1/encoder/encodemb.h
+++ b/third_party/aom/av1/encoder/encodemb.h
@@ -56,15 +56,17 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
 int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
                    int blk_col, int block, BLOCK_SIZE plane_bsize,
                    TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
-                   const ENTROPY_CONTEXT *l);
+                   const ENTROPY_CONTEXT *l, int fast_mode);
 
 void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
                       int blk_col, int blk_row, TX_SIZE tx_size);
 
 void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
+#if !CONFIG_PVQ
 void av1_set_txb_context(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
                          ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l);
+#endif
 
 void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
@@ -79,7 +81,7 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
                                     tran_low_t *ref_coeff,
                                     tran_low_t *const dqcoeff, uint16_t *eob,
                                     const int16_t *quant, int plane,
-                                    int tx_size, TX_TYPE tx_type, int *rate,
+                                    TX_SIZE tx_size, TX_TYPE tx_type, int *rate,
                                     int speed, PVQ_INFO *pvq_info);
 
 void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
@@ -87,15 +89,6 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
                             int *size, int skip_rest, int skip_dir, int bs);
 #endif
 
-#if CONFIG_DPCM_INTRA
-void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
-                                 PREDICTION_MODE mode, int plane, int block,
-                                 int blk_row, int blk_col,
-                                 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
-                                 TX_TYPE tx_type, ENTROPY_CONTEXT *ta,
-                                 ENTROPY_CONTEXT *tl, int8_t *skip);
-#endif  // CONFIG_DPCM_INTRA
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c
index fd61fe6b2..f8a546999 100644
--- a/third_party/aom/av1/encoder/encodemv.c
+++ b/third_party/aom/av1/encoder/encodemv.c
@@ -62,17 +62,22 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
   } else {
     int i;
     const int n = mv_class + CLASS0_BITS - 1;  // number of bits
+#if CONFIG_NEW_MULTISYMBOL
+    for (i = 0; i < n; ++i)
+      aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[(i + 1) / 2], 2);
+#else
     for (i = 0; i < n; ++i) aom_write(w, (d >> i) & 1, mvcomp->bits[i]);
+#endif
   }
-
 // Fractional bits
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
   if (precision > MV_SUBPEL_NONE)
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
   {
-    aom_write_symbol(w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d]
-                                                   : mvcomp->fp_cdf,
-                     MV_FP_SIZE);
+    aom_write_symbol(
+        w, fr,
+        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
+        MV_FP_SIZE);
   }
 
   // High precision bit
@@ -129,9 +134,9 @@ static void build_nmv_component_cost_table(int *mvcost,
       const int b = c + CLASS0_BITS - 1; /* number of bits */
       for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
     }
-#if CONFIG_INTRABC
+#if CONFIG_INTRABC || CONFIG_AMVR
     if (precision > MV_SUBPEL_NONE)
-#endif  // CONFIG_INTRABC
+#endif  // CONFIG_INTRABC || CONFIG_AMVR
     {
       if (c == MV_CLASS_0) {
         cost += class0_fp_cost[d][f];
@@ -165,6 +170,11 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w,
                          nmv_context_counts *const nmv_counts) {
   int i;
   int nmv_ctx = 0;
+#if CONFIG_AMVR
+  if (cm->cur_frame_mv_precision_level) {
+    return;
+  }
+#endif
   for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
     nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
     nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
@@ -184,6 +194,11 @@ void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
                    nmv_context *mvctx, int usehp) {
   const MV diff = { mv->row - ref->row, mv->col - ref->col };
   const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
+#if CONFIG_AMVR
+  if (cpi->common.cur_frame_mv_precision_level) {
+    usehp = MV_SUBPEL_NONE;
+  }
+#endif
   aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS);
   if (mv_joint_vertical(j))
     encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
@@ -222,10 +237,14 @@ void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
   build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
 }
 
-#if CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2], const int_mv pred_mvs[2],
-                    nmv_context_counts *nmv_counts) {
+                    nmv_context_counts *nmv_counts
+#if CONFIG_AMVR
+                    ,
+                    MvSubpelPrecision precision
+#endif
+                    ) {
   int i;
   PREDICTION_MODE mode = mbmi->mode;
 
@@ -240,7 +259,11 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                       mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
       (void)pred_mvs;
+#if CONFIG_AMVR
+      av1_inc_mv(&diff, counts, precision);
+#else
       av1_inc_mv(&diff, counts, 1);
+#endif
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
@@ -251,7 +274,11 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
         av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#if CONFIG_AMVR
+    av1_inc_mv(&diff, counts, precision);
+#else
     av1_inc_mv(&diff, counts, 1);
+#endif
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -261,7 +288,11 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
         av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#if CONFIG_AMVR
+    av1_inc_mv(&diff, counts, precision);
+#else
     av1_inc_mv(&diff, counts, 1);
+#endif
 #if CONFIG_COMPOUND_SINGLEREF
   } else {
     assert(  // mode == SR_NEAREST_NEWMV ||
@@ -288,7 +319,12 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
 
 static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
                            const MB_MODE_INFO_EXT *mbmi_ext,
-                           nmv_context_counts *nmv_counts) {
+                           nmv_context_counts *nmv_counts
+#if CONFIG_AMVR
+                           ,
+                           MvSubpelPrecision precision
+#endif
+                           ) {
   int i;
   PREDICTION_MODE mode = mi->bmi[block].as_mode;
   const MB_MODE_INFO *mbmi = &mi->mbmi;
@@ -303,7 +339,11 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
           av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                       mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#if CONFIG_AMVR
+      av1_inc_mv(&diff, counts, precision);
+#else
       av1_inc_mv(&diff, counts, 1);
+#endif
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
@@ -314,7 +354,11 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
         av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#if CONFIG_AMVR
+    av1_inc_mv(&diff, counts, precision);
+#else
     av1_inc_mv(&diff, counts, 1);
+#endif
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -324,28 +368,13 @@ static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
         av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#if CONFIG_AMVR
+    av1_inc_mv(&diff, counts, precision);
+#else
     av1_inc_mv(&diff, counts, 1);
+#endif
   }
 }
-#else   // !CONFIG_EXT_INTER
-static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
-                    const int_mv mvs[2], const int_mv pred_mvs[2],
-                    nmv_context_counts *nmv_counts) {
-  int i;
-
-  for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
-    int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame);
-    int nmv_ctx =
-        av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type],
-                    mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
-    nmv_context_counts *counts = &nmv_counts[nmv_ctx];
-    const MV *ref = &pred_mvs[i].as_mv;
-    const MV diff = { mvs[i].as_mv.row - ref->row,
-                      mvs[i].as_mv.col - ref->col };
-    av1_inc_mv(&diff, counts, 1);
-  }
-}
-#endif  // CONFIG_EXT_INTER
 
 void av1_update_mv_count(ThreadData *td) {
   const MACROBLOCKD *xd = &td->mb.e_mbd;
@@ -357,6 +386,12 @@ void av1_update_mv_count(ThreadData *td) {
 #else
   const int unify_bsize = 0;
 #endif
+#if CONFIG_AMVR
+  MvSubpelPrecision precision = 1;
+  if (xd->cur_frame_mv_precision_level) {
+    precision = MV_SUBPEL_NONE;
+  }
+#endif
 
   if (mbmi->sb_type < BLOCK_8X8 && !unify_bsize) {
     const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
@@ -367,22 +402,24 @@ void av1_update_mv_count(ThreadData *td) {
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         const int i = idy * 2 + idx;
 
-#if CONFIG_EXT_INTER
         if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
-          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv);
+
+#if CONFIG_AMVR
+          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv,
+                         precision);
 #else
-        if (mi->bmi[i].as_mode == NEWMV)
-          inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, mi->bmi[i].pred_mv,
-                  td->counts->mv);
-#endif  // CONFIG_EXT_INTER
+          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv);
+#endif
       }
     }
   } else {
-#if CONFIG_EXT_INTER
     if (have_newmv_in_inter_mode(mbmi->mode))
+
+#if CONFIG_AMVR
+      inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv,
+              precision);
 #else
-    if (mbmi->mode == NEWMV)
-#endif  // CONFIG_EXT_INTER
       inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv);
+#endif
   }
 }
diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c
index 943e2c6a0..e9ab3c87f 100644
--- a/third_party/aom/av1/encoder/encoder.c
+++ b/third_party/aom/av1/encoder/encoder.c
@@ -45,11 +45,18 @@
 #endif
 #include "av1/encoder/ethread.h"
 #include "av1/encoder/firstpass.h"
+#if CONFIG_HASH_ME
+#include "av1/encoder/hash_motion.h"
+#endif
 #include "av1/encoder/mbgraph.h"
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+#include "av1/common/ncobmc_kernels.h"
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #include "av1/encoder/picklpf.h"
 #if CONFIG_LOOP_RESTORATION
 #include "av1/encoder/pickrst.h"
 #endif  // CONFIG_LOOP_RESTORATION
+#include "av1/encoder/random.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
 #include "av1/encoder/segmentation.h"
@@ -90,6 +97,7 @@ FRAME_COUNTS aggregate_fc_per_type[FRAME_CONTEXTS];
                                        // mv. Choose a very high value for
                                        // now so that HIGH_PRECISION is always
                                        // chosen.
+
 // #define OUTPUT_YUV_REC
 #ifdef OUTPUT_YUV_DENOISED
 FILE *yuv_denoised_file = NULL;
@@ -172,14 +180,37 @@ static void apply_active_map(AV1_COMP *cpi) {
         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
       av1_enable_segmentation(seg);
       av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
+#if CONFIG_LOOPFILTER_LEVEL
+      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
+      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
+      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
+      av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
+
+      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H,
+                      -MAX_LOOP_FILTER);
+      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V,
+                      -MAX_LOOP_FILTER);
+      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U,
+                      -MAX_LOOP_FILTER);
+      av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V,
+                      -MAX_LOOP_FILTER);
+#else
       av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
       // filter level being zero regardless of the value of seg->abs_delta.
       av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
                       -MAX_LOOP_FILTER);
+#endif  // CONFIG_LOOPFILTER_LEVEL
     } else {
       av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
+#if CONFIG_LOOPFILTER_LEVEL
+      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H);
+      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V);
+      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U);
+      av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V);
+#else
       av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
+#endif  // CONFIG_LOOPFILTER_LEVEL
       if (seg->enabled) {
         seg->update_data = 1;
         seg->update_map = 1;
@@ -246,11 +277,21 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
   }
 }
 
-void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv) {
+static void set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv
+#if CONFIG_AMVR
+                                  ,
+                                  int cur_frame_mv_precision_level
+#endif
+                                  ) {
   MACROBLOCK *const mb = &cpi->td.mb;
   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 
+#if CONFIG_AMVR
+  if (cpi->common.allow_high_precision_mv &&
+      cur_frame_mv_precision_level == 0) {
+#else
   if (cpi->common.allow_high_precision_mv) {
+#endif
     int i;
     for (i = 0; i < NMV_CONTEXTS; ++i) {
       mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
@@ -296,13 +337,17 @@ static void setup_frame(AV1_COMP *cpi) {
   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
     av1_setup_past_independence(cm);
   } else {
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+// Just use frame context from first signaled reference frame.
+// This will always be LAST_FRAME for now.
+#else
 #if CONFIG_EXT_REFS
     const GF_GROUP *gf_group = &cpi->twopass.gf_group;
-    if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
+    if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE)
       cm->frame_context_idx = EXT_ARF_FRAME;
     else if (cpi->refresh_alt_ref_frame)
       cm->frame_context_idx = ARF_FRAME;
-#else
+#else   // !CONFIG_EXT_REFS
     if (cpi->refresh_alt_ref_frame) cm->frame_context_idx = ARF_FRAME;
 #endif  // CONFIG_EXT_REFS
     else if (cpi->rc.is_src_frame_alt_ref)
@@ -315,32 +360,56 @@ static void setup_frame(AV1_COMP *cpi) {
 #endif  // CONFIG_EXT_REFS
     else
       cm->frame_context_idx = REGULAR_FRAME;
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
   }
 
   if (cm->frame_type == KEY_FRAME) {
     cpi->refresh_golden_frame = 1;
     cpi->refresh_alt_ref_frame = 1;
     av1_zero(cpi->interp_filter_selected);
+    set_sb_size(cm, select_sb_size(cpi));
+#if CONFIG_REFERENCE_BUFFER
+    set_use_reference_buffer(cm, 0);
+#endif  // CONFIG_REFERENCE_BUFFER
   } else {
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+    if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
+        cm->frame_refs[0].idx < 0) {
+      *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
+    } else {
+      *cm->fc = cm->frame_contexts[cm->frame_refs[0].idx];
+    }
+#else
     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
     av1_zero(cpi->interp_filter_selected[0]);
   }
 #if CONFIG_EXT_REFS
-#if CONFIG_ONE_SIDED_COMPOUND  // No change to bitstream
+#if CONFIG_ONE_SIDED_COMPOUND && \
+    !CONFIG_EXT_COMP_REFS  // No change to bitstream
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
     cpi->refresh_bwd_ref_frame = cpi->refresh_last_frame;
     cpi->rc.is_bipred_frame = 1;
   }
-#endif
-#endif
+#endif  // CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
+#endif  // CONFIG_EXT_REFS
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
+      cm->frame_refs[0].idx < 0) {
+    // use default frame context values
+    cm->pre_fc = &cm->frame_contexts[FRAME_CONTEXT_DEFAULTS];
+  } else {
+    *cm->fc = cm->frame_contexts[cm->frame_refs[0].idx];
+    cm->pre_fc = &cm->frame_contexts[cm->frame_refs[0].idx];
+  }
+#else
   cm->pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
 
   cpi->vaq_refresh = 0;
-
-  set_sb_size(cm, select_sb_size(cpi));
 }
 
-static void av1_enc_setup_mi(AV1_COMMON *cm) {
+static void enc_setup_mi(AV1_COMMON *cm) {
   int i;
   cm->mi = cm->mip + cm->mi_stride + 1;
   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
@@ -350,7 +419,6 @@ static void av1_enc_setup_mi(AV1_COMMON *cm) {
   // Clear left border column
   for (i = 1; i < cm->mi_rows + 1; ++i)
     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
-
   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 
@@ -358,7 +426,7 @@ static void av1_enc_setup_mi(AV1_COMMON *cm) {
          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 }
 
-static int av1_enc_alloc_mi(AV1_COMMON *cm, int mi_size) {
+static int enc_alloc_mi(AV1_COMMON *cm, int mi_size) {
   cm->mip = aom_calloc(mi_size, sizeof(*cm->mip));
   if (!cm->mip) return 1;
   cm->prev_mip = aom_calloc(mi_size, sizeof(*cm->prev_mip));
@@ -374,7 +442,7 @@ static int av1_enc_alloc_mi(AV1_COMMON *cm, int mi_size) {
   return 0;
 }
 
-static void av1_enc_free_mi(AV1_COMMON *cm) {
+static void enc_free_mi(AV1_COMMON *cm) {
   aom_free(cm->mip);
   cm->mip = NULL;
   aom_free(cm->prev_mip);
@@ -383,9 +451,10 @@ static void av1_enc_free_mi(AV1_COMMON *cm) {
   cm->mi_grid_base = NULL;
   aom_free(cm->prev_mi_grid_base);
   cm->prev_mi_grid_base = NULL;
+  cm->mi_alloc_size = 0;
 }
 
-static void av1_swap_mi_and_prev_mi(AV1_COMMON *cm) {
+static void swap_mi_and_prev_mi(AV1_COMMON *cm) {
   // Current mip will be the prev_mip for the next frame.
   MODE_INFO **temp_base = cm->prev_mi_grid_base;
   MODE_INFO *temp = cm->prev_mip;
@@ -416,18 +485,31 @@ void av1_initialize_enc(void) {
 #endif
     av1_entropy_mv_init();
     av1_encode_token_init();
-#if CONFIG_EXT_INTER
     av1_init_wedge_masks();
-#endif
     init_done = 1;
   }
 }
 
+static void dealloc_context_buffers_ext(AV1_COMP *cpi) {
+  if (cpi->mbmi_ext_base) {
+    aom_free(cpi->mbmi_ext_base);
+    cpi->mbmi_ext_base = NULL;
+  }
+}
+
+static void alloc_context_buffers_ext(AV1_COMP *cpi) {
+  AV1_COMMON *cm = &cpi->common;
+  int mi_size = cm->mi_cols * cm->mi_rows;
+
+  dealloc_context_buffers_ext(cpi);
+  CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
+                  aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
+}
+
 static void dealloc_compressor_data(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
 
-  aom_free(cpi->mbmi_ext_base);
-  cpi->mbmi_ext_base = NULL;
+  dealloc_context_buffers_ext(cpi);
 
 #if CONFIG_PVQ
   if (cpi->oxcf.pass != 1) {
@@ -498,9 +580,7 @@ static void dealloc_compressor_data(AV1_COMP *cpi) {
 
   av1_free_pc_tree(&cpi->td);
 
-#if CONFIG_PALETTE
   aom_free(cpi->td.mb.palette_buffer);
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_ANS
   aom_buf_ans_free(&cpi->buf_ans);
@@ -593,10 +673,22 @@ static void configure_static_seg_features(AV1_COMP *cpi) {
       qi_delta =
           av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
       av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
+#if CONFIG_LOOPFILTER_LEVEL
+      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
+      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
+      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
+      av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
+
+      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
+      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
+      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
+      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
+#else
       av1_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
+      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
       av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
-      av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
 
       // Where relevant assume segment data is delta data
       seg->abs_delta = SEGMENT_DELTADATA;
@@ -617,8 +709,20 @@ static void configure_static_seg_features(AV1_COMP *cpi) {
         av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
         av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
 
+#if CONFIG_LOOPFILTER_LEVEL
+        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
+        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
+        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2);
+        av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2);
+
+        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H);
+        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V);
+        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U);
+        av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V);
+#else
         av1_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
         av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
         // Segment coding disabled for compred testing
         if (high_q || (cpi->static_mb_pct == 100)) {
@@ -781,15 +885,7 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) {
                        "Failed to allocate scaled last source buffer");
 }
 
-static void alloc_context_buffers_ext(AV1_COMP *cpi) {
-  AV1_COMMON *cm = &cpi->common;
-  int mi_size = cm->mi_cols * cm->mi_rows;
-
-  CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
-                  aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
-}
-
-void av1_alloc_compressor_data(AV1_COMP *cpi) {
+static void alloc_compressor_data(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
 
   av1_alloc_context_buffers(cm, cm->width, cm->height);
@@ -806,9 +902,6 @@ void av1_alloc_compressor_data(AV1_COMP *cpi) {
     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
                     aom_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
-#if CONFIG_ANS && !ANS_MAX_SYMBOLS
-    aom_buf_ans_alloc(&cpi->buf_ans, &cm->error, (int)tokens);
-#endif  // CONFIG_ANS
   }
 
   av1_setup_pc_tree(&cpi->common, &cpi->td);
@@ -821,10 +914,61 @@ void av1_new_framerate(AV1_COMP *cpi, double framerate) {
   cpi->od_rc.framerate = cpi->framerate;
   od_enc_rc_resize(&cpi->od_rc);
 #else
-  av1_rc_update_framerate(cpi);
+  av1_rc_update_framerate(cpi, cpi->common.width, cpi->common.height);
 #endif
 }
 
+#if CONFIG_MAX_TILE
+
+static void set_tile_info_max_tile(AV1_COMP *cpi) {
+  AV1_COMMON *const cm = &cpi->common;
+  int i, start_sb;
+
+  av1_get_tile_limits(cm);
+
+  // configure tile columns
+  if (cpi->oxcf.tile_width_count == 0 || cpi->oxcf.tile_height_count == 0) {
+    cm->uniform_tile_spacing_flag = 1;
+    cm->log2_tile_cols = AOMMAX(cpi->oxcf.tile_columns, cm->min_log2_tile_cols);
+    cm->log2_tile_cols = AOMMIN(cm->log2_tile_cols, cm->max_log2_tile_cols);
+  } else {
+    int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+    int sb_cols = mi_cols >> MAX_MIB_SIZE_LOG2;
+    int size_sb, j = 0;
+    cm->uniform_tile_spacing_flag = 0;
+    for (i = 0, start_sb = 0; start_sb < sb_cols && i < MAX_TILE_COLS; i++) {
+      cm->tile_col_start_sb[i] = start_sb;
+      size_sb = cpi->oxcf.tile_widths[j++];
+      if (j >= cpi->oxcf.tile_width_count) j = 0;
+      start_sb += AOMMIN(size_sb, MAX_TILE_WIDTH_SB);
+    }
+    cm->tile_cols = i;
+    cm->tile_col_start_sb[i] = sb_cols;
+  }
+  av1_calculate_tile_cols(cm);
+
+  // configure tile rows
+  if (cm->uniform_tile_spacing_flag) {
+    cm->log2_tile_rows = AOMMAX(cpi->oxcf.tile_rows, cm->min_log2_tile_rows);
+    cm->log2_tile_rows = AOMMIN(cm->log2_tile_rows, cm->max_log2_tile_rows);
+  } else {
+    int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+    int sb_rows = mi_rows >> MAX_MIB_SIZE_LOG2;
+    int size_sb, j = 0;
+    for (i = 0, start_sb = 0; start_sb < sb_rows && i < MAX_TILE_ROWS; i++) {
+      cm->tile_row_start_sb[i] = start_sb;
+      size_sb = cpi->oxcf.tile_heights[j++];
+      if (j >= cpi->oxcf.tile_height_count) j = 0;
+      start_sb += AOMMIN(size_sb, cm->max_tile_height_sb);
+    }
+    cm->tile_rows = i;
+    cm->tile_row_start_sb[i] = sb_rows;
+  }
+  av1_calculate_tile_rows(cm);
+}
+
+#endif
+
 static void set_tile_info(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
 #if CONFIG_DEPENDENT_HORZTILES
@@ -866,24 +1010,22 @@ static void set_tile_info(AV1_COMP *cpi) {
     while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows;
   } else {
 #endif  // CONFIG_EXT_TILE
-    int min_log2_tile_cols, max_log2_tile_cols;
-    av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
-
-    cm->log2_tile_cols =
-        clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
-    cm->log2_tile_rows = cpi->oxcf.tile_rows;
-
-    cm->tile_cols = 1 << cm->log2_tile_cols;
-    cm->tile_rows = 1 << cm->log2_tile_rows;
 
-    cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
-    cm->tile_width >>= cm->log2_tile_cols;
-    cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
-    cm->tile_height >>= cm->log2_tile_rows;
-
-    // round to integer multiples of max superblock size
-    cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
-    cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
+#if CONFIG_MAX_TILE
+    set_tile_info_max_tile(cpi);
+#else
+  int min_log2_tile_cols, max_log2_tile_cols;
+  av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+  cm->log2_tile_cols =
+      clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
+  cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+  cm->tile_width =
+      get_tile_size(cm->mi_cols, cm->log2_tile_cols, &cm->tile_cols);
+  cm->tile_height =
+      get_tile_size(cm->mi_rows, cm->log2_tile_rows, &cm->tile_rows);
+#endif  // CONFIG_MAX_TILE
 #if CONFIG_EXT_TILE
   }
 #endif  // CONFIG_EXT_TILE
@@ -952,7 +1094,6 @@ static void update_frame_size(AV1_COMP *cpi) {
                        NULL);
   memset(cpi->mbmi_ext_base, 0,
          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
-
   set_tile_info(cpi);
 }
 
@@ -963,14 +1104,21 @@ static void init_buffer_indices(AV1_COMP *cpi) {
     cpi->lst_fb_idxes[fb_idx] = fb_idx;
   cpi->gld_fb_idx = LAST_REF_FRAMES;
   cpi->bwd_fb_idx = LAST_REF_FRAMES + 1;
-  cpi->alt_fb_idx = LAST_REF_FRAMES + 2;
+  cpi->alt2_fb_idx = LAST_REF_FRAMES + 2;
+  cpi->alt_fb_idx = LAST_REF_FRAMES + 3;
+  cpi->ext_fb_idx = LAST_REF_FRAMES + 4;
   for (fb_idx = 0; fb_idx < MAX_EXT_ARFS + 1; ++fb_idx)
     cpi->arf_map[fb_idx] = LAST_REF_FRAMES + 2 + fb_idx;
-#else
+#else   // !CONFIG_EXT_REFS
   cpi->lst_fb_idx = 0;
   cpi->gld_fb_idx = 1;
   cpi->alt_fb_idx = 2;
 #endif  // CONFIG_EXT_REFS
+#if CONFIG_AMVR
+  cpi->rate_index = 0;
+  cpi->rate_size = 0;
+  cpi->cur_poc = -1;
+#endif
 }
 
 static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
@@ -993,7 +1141,7 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
-  av1_alloc_compressor_data(cpi);
+  alloc_compressor_data(cpi);
 
   // Single thread case: use counts in common.
   cpi->td.counts = &cm->counts;
@@ -1004,6 +1152,10 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
   cpi->static_mb_pct = 0;
   cpi->ref_frame_flags = 0;
 
+  // Reset resize pending flags
+  cpi->resize_pending_width = 0;
+  cpi->resize_pending_height = 0;
+
   init_buffer_indices(cpi);
 }
 
@@ -1212,9 +1364,22 @@ MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d)
 MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8)
 MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg)
 MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d)
-#endif
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x64)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x64_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x64x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x16)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x16_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x16x4d)
+#if CONFIG_EXT_PARTITION
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x128)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x128_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x128x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x32)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x32x4d)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
-#if CONFIG_EXT_INTER
 #define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
   cpi->fn_ptr[BT].msdf = MCSDF;       \
   cpi->fn_ptr[BT].msvf = MCSVF;
@@ -1268,8 +1433,13 @@ MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16)
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4)
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32)
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
-#endif
-#endif  // CONFIG_EXT_INTER
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x64)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
+#if CONFIG_EXT_PARTITION
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x128)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_MOTION_VAR
 #define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
@@ -1318,7 +1488,13 @@ MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16)
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4)
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32)
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
-#endif
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16)
+#if CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x128)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_MOTION_VAR
 
 static void highbd_set_var_fns(AV1_COMP *const cpi) {
@@ -1327,6 +1503,32 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
     switch (cm->bit_depth) {
       case AOM_BITS_8:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits8,
+                   aom_highbd_sad128x32_avg_bits8, aom_highbd_8_variance128x32,
+                   aom_highbd_8_sub_pixel_variance128x32,
+                   aom_highbd_8_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits8)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits8,
+                   aom_highbd_sad32x128_avg_bits8, aom_highbd_8_variance32x128,
+                   aom_highbd_8_sub_pixel_variance32x128,
+                   aom_highbd_8_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits8)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8,
+                   aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16,
+                   aom_highbd_8_sub_pixel_variance64x16,
+                   aom_highbd_8_sub_pixel_avg_variance64x16, NULL, NULL,
+                   aom_highbd_sad64x16x4d_bits8)
+
+        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits8,
+                   aom_highbd_sad16x64_avg_bits8, aom_highbd_8_variance16x64,
+                   aom_highbd_8_sub_pixel_variance16x64,
+                   aom_highbd_8_sub_pixel_avg_variance16x64, NULL, NULL,
+                   aom_highbd_sad16x64x4d_bits8)
+
         HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits8,
                    aom_highbd_sad32x8_avg_bits8, aom_highbd_8_variance32x8,
                    aom_highbd_8_sub_pixel_variance32x8,
@@ -1464,7 +1666,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                    aom_highbd_sad64x128x4d_bits8)
 #endif  // CONFIG_EXT_PARTITION
 
-#if CONFIG_EXT_INTER
 #if CONFIG_EXT_PARTITION
         HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8,
                     aom_highbd_8_masked_sub_pixel_variance128x128)
@@ -1500,6 +1701,20 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
                     aom_highbd_8_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance64x16)
+
+        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance16x64)
+
         HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8,
                     aom_highbd_8_masked_sub_pixel_variance32x8)
 
@@ -1512,7 +1727,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8,
                     aom_highbd_8_masked_sub_pixel_variance4x16)
 #endif
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR
 #if CONFIG_EXT_PARTITION
         HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits8,
@@ -1565,6 +1779,24 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                     aom_highbd_obmc_variance4x4,
                     aom_highbd_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits8,
+                    aom_highbd_obmc_variance128x32,
+                    aom_highbd_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits8,
+                    aom_highbd_obmc_variance32x128,
+                    aom_highbd_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits8,
+                    aom_highbd_obmc_variance64x16,
+                    aom_highbd_obmc_sub_pixel_variance64x16)
+
+        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits8,
+                    aom_highbd_obmc_variance16x64,
+                    aom_highbd_obmc_sub_pixel_variance16x64)
+
         HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8,
                     aom_highbd_obmc_variance32x8,
                     aom_highbd_obmc_sub_pixel_variance32x8)
@@ -1586,6 +1818,34 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
 
       case AOM_BITS_10:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits10,
+                   aom_highbd_sad128x32_avg_bits10,
+                   aom_highbd_10_variance128x32,
+                   aom_highbd_10_sub_pixel_variance128x32,
+                   aom_highbd_10_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits10)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits10,
+                   aom_highbd_sad32x128_avg_bits10,
+                   aom_highbd_10_variance32x128,
+                   aom_highbd_10_sub_pixel_variance32x128,
+                   aom_highbd_10_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits10)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10,
+                   aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16,
+                   aom_highbd_10_sub_pixel_variance64x16,
+                   aom_highbd_10_sub_pixel_avg_variance64x16, NULL, NULL,
+                   aom_highbd_sad64x16x4d_bits10)
+
+        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits10,
+                   aom_highbd_sad16x64_avg_bits10, aom_highbd_10_variance16x64,
+                   aom_highbd_10_sub_pixel_variance16x64,
+                   aom_highbd_10_sub_pixel_avg_variance16x64, NULL, NULL,
+                   aom_highbd_sad16x64x4d_bits10)
+
         HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10,
                    aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8,
                    aom_highbd_10_sub_pixel_variance32x8,
@@ -1727,7 +1987,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                    aom_highbd_sad64x128x4d_bits10)
 #endif  // CONFIG_EXT_PARTITION
 
-#if CONFIG_EXT_INTER
 #if CONFIG_EXT_PARTITION
         HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10,
                     aom_highbd_10_masked_sub_pixel_variance128x128)
@@ -1763,6 +2022,20 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
                     aom_highbd_10_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance64x16)
+
+        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance16x64)
+
         HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10,
                     aom_highbd_10_masked_sub_pixel_variance32x8)
 
@@ -1775,7 +2048,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10,
                     aom_highbd_10_masked_sub_pixel_variance4x16)
 #endif
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR
 #if CONFIG_EXT_PARTITION
         HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits10,
@@ -1828,6 +2100,24 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                     aom_highbd_10_obmc_variance4x4,
                     aom_highbd_10_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits10,
+                    aom_highbd_10_obmc_variance128x32,
+                    aom_highbd_10_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits10,
+                    aom_highbd_10_obmc_variance32x128,
+                    aom_highbd_10_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits10,
+                    aom_highbd_10_obmc_variance64x16,
+                    aom_highbd_10_obmc_sub_pixel_variance64x16)
+
+        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits10,
+                    aom_highbd_10_obmc_variance16x64,
+                    aom_highbd_10_obmc_sub_pixel_variance16x64)
+
         HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10,
                     aom_highbd_10_obmc_variance32x8,
                     aom_highbd_10_obmc_sub_pixel_variance32x8)
@@ -1849,6 +2139,34 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
 
       case AOM_BITS_12:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits12,
+                   aom_highbd_sad128x32_avg_bits12,
+                   aom_highbd_12_variance128x32,
+                   aom_highbd_12_sub_pixel_variance128x32,
+                   aom_highbd_12_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits12)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits12,
+                   aom_highbd_sad32x128_avg_bits12,
+                   aom_highbd_12_variance32x128,
+                   aom_highbd_12_sub_pixel_variance32x128,
+                   aom_highbd_12_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits12)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12,
+                   aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16,
+                   aom_highbd_12_sub_pixel_variance64x16,
+                   aom_highbd_12_sub_pixel_avg_variance64x16, NULL, NULL,
+                   aom_highbd_sad64x16x4d_bits12)
+
+        HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits12,
+                   aom_highbd_sad16x64_avg_bits12, aom_highbd_12_variance16x64,
+                   aom_highbd_12_sub_pixel_variance16x64,
+                   aom_highbd_12_sub_pixel_avg_variance16x64, NULL, NULL,
+                   aom_highbd_sad16x64x4d_bits12)
+
         HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12,
                    aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8,
                    aom_highbd_12_sub_pixel_variance32x8,
@@ -1990,7 +2308,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                    aom_highbd_sad64x128x4d_bits12)
 #endif  // CONFIG_EXT_PARTITION
 
-#if CONFIG_EXT_INTER
 #if CONFIG_EXT_PARTITION
         HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12,
                     aom_highbd_12_masked_sub_pixel_variance128x128)
@@ -2026,6 +2343,20 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
                     aom_highbd_12_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance64x16)
+
+        HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance16x64)
+
         HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12,
                     aom_highbd_12_masked_sub_pixel_variance32x8)
 
@@ -2038,7 +2369,6 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
         HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12,
                     aom_highbd_12_masked_sub_pixel_variance4x16)
 #endif
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_MOTION_VAR
 #if CONFIG_EXT_PARTITION
@@ -2092,6 +2422,24 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) {
                     aom_highbd_12_obmc_variance4x4,
                     aom_highbd_12_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits12,
+                    aom_highbd_12_obmc_variance128x32,
+                    aom_highbd_12_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits12,
+                    aom_highbd_12_obmc_variance32x128,
+                    aom_highbd_12_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
+        HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits12,
+                    aom_highbd_12_obmc_variance64x16,
+                    aom_highbd_12_obmc_sub_pixel_variance64x16)
+
+        HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits12,
+                    aom_highbd_12_obmc_variance16x64,
+                    aom_highbd_12_obmc_sub_pixel_variance16x64)
+
         HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12,
                     aom_highbd_12_obmc_variance32x8,
                     aom_highbd_12_obmc_sub_pixel_variance32x8)
@@ -2139,7 +2487,6 @@ static void realloc_segmentation_maps(AV1_COMP *cpi) {
                   aom_calloc(cm->mi_rows * cm->mi_cols, 1));
 }
 
-#if CONFIG_EXT_INTER
 void set_compound_tools(AV1_COMMON *cm) {
   (void)cm;
 #if CONFIG_INTERINTRA
@@ -2149,7 +2496,6 @@ void set_compound_tools(AV1_COMMON *cm) {
   cm->allow_masked_compound = 1;
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 }
-#endif  // CONFIG_EXT_INTER
 
 void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
   AV1_COMMON *const cm = &cpi->common;
@@ -2186,25 +2532,28 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
   cpi->refresh_golden_frame = 0;
 #if CONFIG_EXT_REFS
   cpi->refresh_bwd_ref_frame = 0;
+  cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
 
   cm->refresh_frame_context =
       (oxcf->error_resilient_mode || oxcf->frame_parallel_decoding_mode)
           ? REFRESH_FRAME_CONTEXT_FORWARD
           : REFRESH_FRAME_CONTEXT_BACKWARD;
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+#endif
 
-#if CONFIG_PALETTE
   if (x->palette_buffer == NULL) {
     CHECK_MEM_ERROR(cm, x->palette_buffer,
                     aom_memalign(16, sizeof(*x->palette_buffer)));
   }
-#endif  // CONFIG_PALETTE
-#if CONFIG_EXT_INTER
   set_compound_tools(cm);
-#endif  // CONFIG_EXT_INTER
   av1_reset_segment_features(cm);
-  av1_set_high_precision_mv(cpi, 0);
+#if CONFIG_AMVR
+  set_high_precision_mv(cpi, 0, 0);
+#else
+  set_high_precision_mv(cpi, 0);
+#endif
 
   set_rc_buffer_sizes(rc, &cpi->oxcf);
 
@@ -2235,7 +2584,8 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
   if (cpi->initial_width) {
     if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) {
       av1_free_context_buffers(cm);
-      av1_alloc_compressor_data(cpi);
+      av1_free_pc_tree(&cpi->td);
+      alloc_compressor_data(cpi);
       realloc_segmentation_maps(cpi);
       cpi->initial_width = cpi->initial_height = 0;
     }
@@ -2265,15 +2615,12 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
 #if CONFIG_HIGHBITDEPTH
   highbd_set_var_fns(cpi);
 #endif
-
 #if CONFIG_ANS && ANS_MAX_SYMBOLS
   cpi->common.ans_window_size_log2 = cpi->oxcf.ans_window_size_log2;
-  if (cpi->buf_ans.size != (1 << cpi->common.ans_window_size_log2)) {
-    aom_buf_ans_free(&cpi->buf_ans);
-    aom_buf_ans_alloc(&cpi->buf_ans, &cpi->common.error,
-                      1 << cpi->common.ans_window_size_log2);
-  }
 #endif  // CONFIG_ANS && ANS_MAX_SYMBOLS
+#if CONFIG_AMVR
+  cm->seq_mv_precision_level = 2;
+#endif
 }
 
 AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
@@ -2293,9 +2640,13 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
   }
 
   cm->error.setjmp = 1;
-  cm->alloc_mi = av1_enc_alloc_mi;
-  cm->free_mi = av1_enc_free_mi;
-  cm->setup_mi = av1_enc_setup_mi;
+  cm->alloc_mi = enc_alloc_mi;
+  cm->free_mi = enc_free_mi;
+  cm->setup_mi = enc_setup_mi;
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  get_default_ncobmc_kernels(cm);
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 
   CHECK_MEM_ERROR(cm, cm->fc,
                   (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
@@ -2467,12 +2818,14 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
 #endif
   CHECK_MEM_ERROR(
       cm, cpi->td.mb.above_pred_buf,
-      (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.above_pred_buf)));
+      (uint8_t *)aom_memalign(16,
+                              buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+                                  sizeof(*cpi->td.mb.above_pred_buf)));
   CHECK_MEM_ERROR(
       cm, cpi->td.mb.left_pred_buf,
-      (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                      sizeof(*cpi->td.mb.left_pred_buf)));
+      (uint8_t *)aom_memalign(16,
+                              buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+                                  sizeof(*cpi->td.mb.left_pred_buf)));
 
   CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
                   (int32_t *)aom_memalign(
@@ -2513,7 +2866,25 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
   BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
       aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL,
       aom_sad32x8x4d)
-#endif
+
+  BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64,
+      aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, NULL, NULL,
+      aom_sad16x64x4d)
+
+  BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
+      aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, NULL, NULL,
+      aom_sad64x16x4d)
+
+#if CONFIG_EXT_PARTITION
+  BFP(BLOCK_32X128, aom_sad32x128, aom_sad32x128_avg, aom_variance32x128,
+      aom_sub_pixel_variance32x128, aom_sub_pixel_avg_variance32x128, NULL,
+      NULL, aom_sad32x128x4d)
+
+  BFP(BLOCK_128X32, aom_sad128x32, aom_sad128x32_avg, aom_variance128x32,
+      aom_sub_pixel_variance128x32, aom_sub_pixel_avg_variance128x32, NULL,
+      NULL, aom_sad128x32x4d)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_PARTITION
   BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
@@ -2640,10 +3011,23 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
 
   OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8,
        aom_obmc_sub_pixel_variance32x8)
-#endif
+
+  OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64,
+       aom_obmc_sub_pixel_variance16x64)
+
+  OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
+       aom_obmc_sub_pixel_variance64x16)
+
+#if CONFIG_EXT_PARTITION
+  OBFP(BLOCK_32X128, aom_obmc_sad32x128, aom_obmc_variance32x128,
+       aom_obmc_sub_pixel_variance32x128)
+
+  OBFP(BLOCK_128X32, aom_obmc_sad128x32, aom_obmc_variance128x32,
+       aom_obmc_sub_pixel_variance128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_MOTION_VAR
 
-#if CONFIG_EXT_INTER
 #define MBFP(BT, MCSDF, MCSVF)  \
   cpi->fn_ptr[BT].msdf = MCSDF; \
   cpi->fn_ptr[BT].msvf = MCSVF;
@@ -2676,8 +3060,17 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
   MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32)
 
   MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8)
-#endif
-#endif  // CONFIG_EXT_INTER
+
+  MBFP(BLOCK_16X64, aom_masked_sad16x64, aom_masked_sub_pixel_variance16x64)
+
+  MBFP(BLOCK_64X16, aom_masked_sad64x16, aom_masked_sub_pixel_variance64x16)
+
+#if CONFIG_EXT_PARTITION
+  MBFP(BLOCK_32X128, aom_masked_sad32x128, aom_masked_sub_pixel_variance32x128)
+
+  MBFP(BLOCK_128X32, aom_masked_sad128x32, aom_masked_sub_pixel_variance128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_HIGHBITDEPTH
   highbd_set_var_fns(cpi);
@@ -2695,7 +3088,7 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf,
 
   av1_loop_filter_init(cm);
 #if CONFIG_FRAME_SUPERRES
-  cm->superres_scale_numerator = SCALE_DENOMINATOR;
+  cm->superres_scale_denominator = SCALE_NUMERATOR;
   cm->superres_upscaled_width = oxcf->width;
   cm->superres_upscaled_height = oxcf->height;
 #endif  // CONFIG_FRAME_SUPERRES
@@ -2815,9 +3208,7 @@ void av1_remove_compressor(AV1_COMP *cpi) {
 
     // Deallocate allocated thread data.
     if (t < cpi->num_workers - 1) {
-#if CONFIG_PALETTE
       aom_free(thread_data->td->palette_buffer);
-#endif  // CONFIG_PALETTE
 #if CONFIG_MOTION_VAR
       aom_free(thread_data->td->above_pred_buf);
       aom_free(thread_data->td->left_pred_buf);
@@ -2862,7 +3253,6 @@ void av1_remove_compressor(AV1_COMP *cpi) {
 #ifdef OUTPUT_YUV_REC
   fclose(yuv_rec_file);
 #endif
-
 #if 0
 
   if (keyfile)
@@ -2911,31 +3301,9 @@ void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags) {
   cpi->ext_refresh_frame_flags_pending = 1;
 }
 
-static YV12_BUFFER_CONFIG *get_av1_ref_frame_buffer(
-    AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag) {
-  MV_REFERENCE_FRAME ref_frame = NONE_FRAME;
-  if (ref_frame_flag == AOM_LAST_FLAG) ref_frame = LAST_FRAME;
-#if CONFIG_EXT_REFS
-  else if (ref_frame_flag == AOM_LAST2_FLAG)
-    ref_frame = LAST2_FRAME;
-  else if (ref_frame_flag == AOM_LAST3_FLAG)
-    ref_frame = LAST3_FRAME;
-#endif  // CONFIG_EXT_REFS
-  else if (ref_frame_flag == AOM_GOLD_FLAG)
-    ref_frame = GOLDEN_FRAME;
-#if CONFIG_EXT_REFS
-  else if (ref_frame_flag == AOM_BWD_FLAG)
-    ref_frame = BWDREF_FRAME;
-#endif  // CONFIG_EXT_REFS
-  else if (ref_frame_flag == AOM_ALT_FLAG)
-    ref_frame = ALTREF_FRAME;
-
-  return ref_frame == NONE_FRAME ? NULL : get_ref_frame_buffer(cpi, ref_frame);
-}
-
-int av1_copy_reference_enc(AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag,
-                           YV12_BUFFER_CONFIG *sd) {
-  YV12_BUFFER_CONFIG *cfg = get_av1_ref_frame_buffer(cpi, ref_frame_flag);
+int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
+  AV1_COMMON *const cm = &cpi->common;
+  YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
   if (cfg) {
     aom_yv12_copy_frame(cfg, sd);
     return 0;
@@ -2944,9 +3312,9 @@ int av1_copy_reference_enc(AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag,
   }
 }
 
-int av1_set_reference_enc(AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag,
-                          YV12_BUFFER_CONFIG *sd) {
-  YV12_BUFFER_CONFIG *cfg = get_av1_ref_frame_buffer(cpi, ref_frame_flag);
+int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) {
+  AV1_COMMON *const cm = &cpi->common;
+  YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx);
   if (cfg) {
     aom_yv12_copy_frame(sd, cfg);
     return 0;
@@ -2994,7 +3362,53 @@ void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) {
 #endif
 
 #if CONFIG_EXT_REFS && !CONFIG_XIPHRC
+#if USE_GF16_MULTI_LAYER
+static void check_show_existing_frame_gf16(AV1_COMP *cpi) {
+  const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+  AV1_COMMON *const cm = &cpi->common;
+  const FRAME_UPDATE_TYPE next_frame_update_type =
+      gf_group->update_type[gf_group->index];
+
+  if (cm->show_existing_frame == 1) {
+    cm->show_existing_frame = 0;
+  } else if (cpi->rc.is_last_bipred_frame) {
+    cpi->rc.is_last_bipred_frame = 0;
+    cm->show_existing_frame = 1;
+    cpi->existing_fb_idx_to_show = cpi->bwd_fb_idx;
+  } else if (next_frame_update_type == OVERLAY_UPDATE ||
+             next_frame_update_type == INTNL_OVERLAY_UPDATE) {
+    // Check the temporal filtering status for the next OVERLAY frame
+    const int num_arfs_in_gf = cpi->num_extra_arfs + 1;
+    int which_arf = 0, arf_idx;
+    // Identify the index to the next overlay frame.
+    for (arf_idx = 0; arf_idx < num_arfs_in_gf; arf_idx++) {
+      if (gf_group->index == cpi->arf_pos_for_ovrly[arf_idx]) {
+        which_arf = arf_idx;
+        break;
+      }
+    }
+    assert(arf_idx < num_arfs_in_gf);
+    if (cpi->is_arf_filter_off[which_arf]) {
+      cm->show_existing_frame = 1;
+      cpi->rc.is_src_frame_alt_ref = 1;
+      cpi->existing_fb_idx_to_show = (next_frame_update_type == OVERLAY_UPDATE)
+                                         ? cpi->alt_fb_idx
+                                         : cpi->bwd_fb_idx;
+      cpi->is_arf_filter_off[which_arf] = 0;
+    }
+  }
+  cpi->rc.is_src_frame_ext_arf = 0;
+}
+#endif  // USE_GF16_MULTI_LAYER
+
 static void check_show_existing_frame(AV1_COMP *cpi) {
+#if USE_GF16_MULTI_LAYER
+  if (cpi->rc.baseline_gf_interval == 16) {
+    check_show_existing_frame_gf16(cpi);
+    return;
+  }
+#endif  // USE_GF16_MULTI_LAYER
+
   const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
   AV1_COMMON *const cm = &cpi->common;
   const FRAME_UPDATE_TYPE next_frame_update_type =
@@ -3004,9 +3418,9 @@ static void check_show_existing_frame(AV1_COMP *cpi) {
   if (cm->show_existing_frame == 1) {
     cm->show_existing_frame = 0;
   } else if (cpi->rc.is_last_bipred_frame) {
-    // NOTE(zoeliu): If the current frame is a last bi-predictive frame, it is
-    //               needed next to show the BWDREF_FRAME, which is pointed by
-    //               the last_fb_idxes[0] after reference frame buffer update
+    // NOTE: If the current frame is a last bi-predictive frame, it is
+    //       needed next to show the BWDREF_FRAME, which is pointed by
+    //       the last_fb_idxes[0] after reference frame buffer update
     cpi->rc.is_last_bipred_frame = 0;
     cm->show_existing_frame = 1;
     cpi->existing_fb_idx_to_show = cpi->lst_fb_idxes[0];
@@ -3017,7 +3431,9 @@ static void check_show_existing_frame(AV1_COMP *cpi) {
     // in av1_rc_get_second_pass_params(cpi)
     cm->show_existing_frame = 1;
     cpi->rc.is_src_frame_alt_ref = 1;
-    cpi->existing_fb_idx_to_show = cpi->alt_fb_idx;
+    cpi->existing_fb_idx_to_show = (next_frame_update_type == OVERLAY_UPDATE)
+                                       ? cpi->alt_fb_idx
+                                       : cpi->alt2_fb_idx;
     cpi->is_arf_filter_off[which_arf] = 0;
   }
   cpi->rc.is_src_frame_ext_arf = 0;
@@ -3028,7 +3444,7 @@ static void check_show_existing_frame(AV1_COMP *cpi) {
 void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) {
   uint8_t *src = s->y_buffer;
   int h = cm->height;
-
+  if (yuv_rec_file == NULL) return;
 #if CONFIG_HIGHBITDEPTH
   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
@@ -3095,7 +3511,8 @@ static int recode_loop_test_global_motion(AV1_COMP *cpi) {
     if (cm->global_motion[i].wmtype != IDENTITY &&
         rdc->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
             cpi->gmparams_cost[i]) {
-      set_default_warp_params(&cm->global_motion[i]);
+      cm->global_motion[i] = default_warp_params;
+      assert(cm->global_motion[i].wmtype == IDENTITY);
       cpi->gmparams_cost[i] = 0;
       recode = 1;
       recode |= (rdc->global_motion_used[i] > 0);
@@ -3242,14 +3659,69 @@ static void enc_check_valid_ref_frames(AV1_COMP *const cpi) {
 }
 #endif  // CONFIG_VAR_REFS
 
-void av1_update_reference_frames(AV1_COMP *cpi) {
+#if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+static void update_reference_frames_gf16(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   BufferPool *const pool = cm->buffer_pool;
 
+  if (cm->frame_type == KEY_FRAME) {
+    for (int ref_frame = 0; ref_frame < LAST_REF_FRAMES; ++ref_frame) {
+      ref_cnt_fb(pool->frame_bufs,
+                 &cm->ref_frame_map[cpi->lst_fb_idxes[ref_frame]],
+                 cm->new_fb_idx);
+    }
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+               cm->new_fb_idx);
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
+               cm->new_fb_idx);
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt2_fb_idx],
+               cm->new_fb_idx);
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
+               cm->new_fb_idx);
+  } else {
+    if (cpi->refresh_last_frame || cpi->refresh_golden_frame ||
+        cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame ||
+        cpi->refresh_alt_ref_frame) {
+      assert(cpi->refresh_fb_idx >= 0 && cpi->refresh_fb_idx < REF_FRAMES);
+      ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->refresh_fb_idx],
+                 cm->new_fb_idx);
+    }
+
+    // TODO(zoeliu): To handle cpi->interp_filter_selected[].
+
+    // For GF of 16, an additional ref frame index mapping needs to be handled
+    // if this is the last frame to encode in the current GF group.
+    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+    if (gf_group->update_type[gf_group->index + 1] == OVERLAY_UPDATE)
+      av1_ref_frame_map_idx_updates(cpi, gf_group->index + 1);
+  }
+
+#if DUMP_REF_FRAME_IMAGES == 1
+  // Dump out all reference frame images.
+  dump_ref_frame_images(cpi);
+#endif  // DUMP_REF_FRAME_IMAGES
+}
+#endif  // USE_GF16_MULTI_LAYER
+#endif  // CONFIG_EXT_REFS
+
+static void update_reference_frames(AV1_COMP *cpi) {
+  AV1_COMMON *const cm = &cpi->common;
+
   // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
   //       for the purpose to verify no mismatch between encoder and decoder.
   if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
 
+#if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+  if (cpi->rc.baseline_gf_interval == 16) {
+    update_reference_frames_gf16(cpi);
+    return;
+  }
+#endif  // USE_GF16_MULTI_LAYER
+#endif  // CONFIG_EXT_REFS
+
+  BufferPool *const pool = cm->buffer_pool;
   // At this point the new frame has been encoded.
   // If any buffer copy / swapping is signaled it should be done here.
   if (cm->frame_type == KEY_FRAME) {
@@ -3258,6 +3730,8 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
 #if CONFIG_EXT_REFS
     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
                cm->new_fb_idx);
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt2_fb_idx],
+               cm->new_fb_idx);
 #endif  // CONFIG_EXT_REFS
     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
                cm->new_fb_idx);
@@ -3281,7 +3755,7 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
 #if CONFIG_EXT_REFS
     // We need to modify the mapping accordingly
     cpi->arf_map[0] = cpi->alt_fb_idx;
-#endif
+#endif  // CONFIG_EXT_REFS
 // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
 // cpi->interp_filter_selected[GOLDEN_FRAME]?
 #if CONFIG_EXT_REFS
@@ -3290,36 +3764,32 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
     // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
     // by updating the virtual indices.
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    int which_arf = gf_group->arf_ref_idx[gf_group->index];
-    int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
+    const int which_arf = gf_group->arf_ref_idx[gf_group->index];
+    assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE);
 
+    const int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
     shift_last_ref_frames(cpi);
-    cpi->lst_fb_idxes[0] = cpi->alt_fb_idx;
-    cpi->alt_fb_idx = tmp;
 
+    cpi->lst_fb_idxes[0] = cpi->alt2_fb_idx;
+    cpi->alt2_fb_idx = tmp;
     // We need to modify the mapping accordingly
-    cpi->arf_map[which_arf] = cpi->alt_fb_idx;
+    cpi->arf_map[which_arf] = cpi->alt2_fb_idx;
 
     memcpy(cpi->interp_filter_selected[LAST_FRAME],
-           cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
-           sizeof(cpi->interp_filter_selected[ALTREF_FRAME + which_arf]));
+           cpi->interp_filter_selected[ALTREF2_FRAME],
+           sizeof(cpi->interp_filter_selected[ALTREF2_FRAME]));
 #endif     // CONFIG_EXT_REFS
   } else { /* For non key/golden frames */
+    // === ALTREF_FRAME ===
     if (cpi->refresh_alt_ref_frame) {
       int arf_idx = cpi->alt_fb_idx;
       int which_arf = 0;
-#if CONFIG_EXT_REFS
-      if (cpi->oxcf.pass == 2) {
-        const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-        which_arf = gf_group->arf_update_idx[gf_group->index];
-        arf_idx = cpi->arf_map[which_arf];
-      }
-#else
+#if !CONFIG_EXT_REFS
       if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
         const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
         arf_idx = gf_group->arf_update_idx[gf_group->index];
       }
-#endif  // CONFIG_EXT_REFS
+#endif  // !CONFIG_EXT_REFS
       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
 
       memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
@@ -3327,6 +3797,7 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
              sizeof(cpi->interp_filter_selected[0]));
     }
 
+    // === GOLDEN_FRAME ===
     if (cpi->refresh_golden_frame) {
       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
                  cm->new_fb_idx);
@@ -3340,18 +3811,8 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
     }
 
 #if CONFIG_EXT_REFS
+    // === BWDREF_FRAME ===
     if (cpi->refresh_bwd_ref_frame) {
-      if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
-        // We have swapped the virtual indices to allow bwd_ref_frame to use
-        // ALT0 as reference frame. We need to swap them back.
-        // NOTE: The ALT_REFs' are indexed reversely, and ALT0 refers to the
-        //       farthest ALT_REF from the first frame in the gf group.
-        int tmp = cpi->arf_map[0];
-        cpi->arf_map[0] = cpi->alt_fb_idx;
-        cpi->alt_fb_idx = cpi->bwd_fb_idx;
-        cpi->bwd_fb_idx = tmp;
-      }
-
       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
                  cm->new_fb_idx);
 
@@ -3359,6 +3820,16 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
              cpi->interp_filter_selected[0],
              sizeof(cpi->interp_filter_selected[0]));
     }
+
+    // === ALTREF2_FRAME ===
+    if (cpi->refresh_alt2_ref_frame) {
+      ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt2_fb_idx],
+                 cm->new_fb_idx);
+
+      memcpy(cpi->interp_filter_selected[ALTREF2_FRAME],
+             cpi->interp_filter_selected[0],
+             sizeof(cpi->interp_filter_selected[0]));
+    }
 #endif  // CONFIG_EXT_REFS
   }
 
@@ -3396,15 +3867,6 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
     // lst_fb_idxes[2], lst_fb_idxes[0], lst_fb_idxes[1]
     int ref_frame;
 
-    if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
-      // We have swapped the virtual indices to use ALT0 as BWD_REF
-      // and we need to swap them back.
-      int tmp = cpi->arf_map[0];
-      cpi->arf_map[0] = cpi->alt_fb_idx;
-      cpi->alt_fb_idx = cpi->bwd_fb_idx;
-      cpi->bwd_fb_idx = tmp;
-    }
-
     if (cm->frame_type == KEY_FRAME) {
       for (ref_frame = 0; ref_frame < LAST_REF_FRAMES; ++ref_frame) {
         ref_cnt_fb(pool->frame_bufs,
@@ -3448,7 +3910,7 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
                sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
       }
     }
-#else
+#else   // !CONFIG_EXT_REFS
     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
                cm->new_fb_idx);
     if (!cpi->rc.is_src_frame_alt_ref) {
@@ -3468,18 +3930,12 @@ void av1_update_reference_frames(AV1_COMP *cpi) {
 static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) {
   assert(buffer_idx != INVALID_IDX);
   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
-  if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
-      new_fb_ptr->mi_cols < cm->mi_cols) {
-    aom_free(new_fb_ptr->mvs);
-    CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
-                    (MV_REF *)aom_calloc(cm->mi_rows * cm->mi_cols,
-                                         sizeof(*new_fb_ptr->mvs)));
-    new_fb_ptr->mi_rows = cm->mi_rows;
-    new_fb_ptr->mi_cols = cm->mi_cols;
-  }
+  ensure_mv_buffer(new_fb_ptr, cm);
+  new_fb_ptr->width = cm->width;
+  new_fb_ptr->height = cm->height;
 }
 
-void av1_scale_references(AV1_COMP *cpi) {
+static void scale_references(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   MV_REFERENCE_FRAME ref_frame;
   const AOM_REFFRAME ref_mask[INTER_REFS_PER_FRAME] = {
@@ -3491,6 +3947,7 @@ void av1_scale_references(AV1_COMP *cpi) {
     AOM_GOLD_FLAG,
 #if CONFIG_EXT_REFS
     AOM_BWD_FLAG,
+    AOM_ALT2_FLAG,
 #endif  // CONFIG_EXT_REFS
     AOM_ALT_FLAG
   };
@@ -3581,8 +4038,9 @@ static void release_scaled_references(AV1_COMP *cpi) {
     refresh[1] = refresh[2] = 0;
     refresh[3] = (cpi->refresh_golden_frame) ? 1 : 0;
     refresh[4] = (cpi->refresh_bwd_ref_frame) ? 1 : 0;
-    refresh[5] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
-#else
+    refresh[5] = (cpi->refresh_alt2_ref_frame) ? 1 : 0;
+    refresh[6] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#else   // !CONFIG_EXT_REFS
     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
 #endif  // CONFIG_EXT_REFS
@@ -3611,28 +4069,6 @@ static void release_scaled_references(AV1_COMP *cpi) {
   }
 }
 
-static void full_to_model_count(unsigned int *model_count,
-                                unsigned int *full_count) {
-  int n;
-  model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
-  model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
-  model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
-  for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
-    model_count[TWO_TOKEN] += full_count[n];
-  model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
-}
-
-void av1_full_to_model_counts(av1_coeff_count_model *model_count,
-                              av1_coeff_count *full_count) {
-  int i, j, k, l;
-
-  for (i = 0; i < PLANE_TYPES; ++i)
-    for (j = 0; j < REF_TYPES; ++j)
-      for (k = 0; k < COEF_BANDS; ++k)
-        for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
-          full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
-}
-
 #if 0 && CONFIG_INTERNAL_STATS
 static void output_frame_level_debug_stats(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
@@ -3731,7 +4167,7 @@ static void set_size_independent_vars(AV1_COMP *cpi) {
 #if CONFIG_GLOBAL_MOTION
   int i;
   for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-    set_default_warp_params(&cpi->common.global_motion[i]);
+    cpi->common.global_motion[i] = default_warp_params;
   }
   cpi->global_motion_search_done = 0;
 #endif  // CONFIG_GLOBAL_MOTION
@@ -3739,9 +4175,7 @@ static void set_size_independent_vars(AV1_COMP *cpi) {
   av1_set_rd_speed_thresholds(cpi);
   av1_set_rd_speed_thresholds_sub8x8(cpi);
   cpi->common.interp_filter = cpi->sf.default_interp_filter;
-#if CONFIG_EXT_INTER
   if (!frame_is_intra_only(&cpi->common)) set_compound_tools(&cpi->common);
-#endif  // CONFIG_EXT_INTER
 }
 
 static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
@@ -3759,11 +4193,17 @@ static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
       &cpi->od_rc, cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame,
       frame_type, bottom_index, top_index);
 #else
-  *q = av1_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+  *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, bottom_index,
+                                top_index);
 #endif
 
   if (!frame_is_intra_only(cm)) {
-    av1_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
+#if CONFIG_AMVR
+    set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH,
+                          cpi->common.cur_frame_mv_precision_level);
+#else
+    set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
+#endif
   }
 
   // Configure experimental use of segmentation for enhanced coding of
@@ -3801,9 +4241,89 @@ static void set_restoration_tilesize(int width, int height, int sx, int sy,
   rst[0].restoration_tilesize = (RESTORATION_TILESIZE_MAX >> 1);
   rst[1].restoration_tilesize = rst[0].restoration_tilesize >> s;
   rst[2].restoration_tilesize = rst[1].restoration_tilesize;
+
+  rst[0].procunit_width = rst[0].procunit_height = RESTORATION_PROC_UNIT_SIZE;
+  rst[1].procunit_width = rst[2].procunit_width =
+      RESTORATION_PROC_UNIT_SIZE >> sx;
+  rst[1].procunit_height = rst[2].procunit_height =
+      RESTORATION_PROC_UNIT_SIZE >> sy;
 }
 #endif  // CONFIG_LOOP_RESTORATION
 
+static void init_ref_frame_bufs(AV1_COMMON *cm) {
+  int i;
+  BufferPool *const pool = cm->buffer_pool;
+  cm->new_fb_idx = INVALID_IDX;
+  for (i = 0; i < REF_FRAMES; ++i) {
+    cm->ref_frame_map[i] = INVALID_IDX;
+    pool->frame_bufs[i].ref_count = 0;
+  }
+#if CONFIG_HASH_ME
+  for (i = 0; i < FRAME_BUFFERS; ++i) {
+    av1_hash_table_init(&pool->frame_bufs[i].hash_table);
+  }
+#endif
+}
+
+static void check_initial_width(AV1_COMP *cpi,
+#if CONFIG_HIGHBITDEPTH
+                                int use_highbitdepth,
+#endif
+                                int subsampling_x, int subsampling_y) {
+  AV1_COMMON *const cm = &cpi->common;
+
+  if (!cpi->initial_width ||
+#if CONFIG_HIGHBITDEPTH
+      cm->use_highbitdepth != use_highbitdepth ||
+#endif
+      cm->subsampling_x != subsampling_x ||
+      cm->subsampling_y != subsampling_y) {
+    cm->subsampling_x = subsampling_x;
+    cm->subsampling_y = subsampling_y;
+#if CONFIG_HIGHBITDEPTH
+    cm->use_highbitdepth = use_highbitdepth;
+#endif
+
+    alloc_raw_frame_buffers(cpi);
+    init_ref_frame_bufs(cm);
+    alloc_util_frame_buffers(cpi);
+
+    init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
+
+    cpi->initial_width = cm->width;
+    cpi->initial_height = cm->height;
+    cpi->initial_mbs = cm->MBs;
+  }
+}
+
+// Returns 1 if the assigned width or height was <= 0.
+static int set_size_literal(AV1_COMP *cpi, int width, int height) {
+  AV1_COMMON *cm = &cpi->common;
+#if CONFIG_HIGHBITDEPTH
+  check_initial_width(cpi, cm->use_highbitdepth, cm->subsampling_x,
+                      cm->subsampling_y);
+#else
+  check_initial_width(cpi, cm->subsampling_x, cm->subsampling_y);
+#endif  // CONFIG_HIGHBITDEPTH
+
+  if (width <= 0 || height <= 0) return 1;
+
+  cm->width = width;
+  cm->height = height;
+
+  if (cpi->initial_width && cpi->initial_height &&
+      (cm->width > cpi->initial_width || cm->height > cpi->initial_height)) {
+    av1_free_context_buffers(cm);
+    av1_free_pc_tree(&cpi->td);
+    alloc_compressor_data(cpi);
+    realloc_segmentation_maps(cpi);
+    cpi->initial_width = cpi->initial_height = 0;
+  }
+  update_frame_size(cpi);
+
+  return 0;
+}
+
 static void set_frame_size(AV1_COMP *cpi, int width, int height) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -3811,13 +4331,13 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
 
   if (width != cm->width || height != cm->height) {
     // There has been a change in the encoded frame size
-    av1_set_size_literal(cpi, width, height);
+    set_size_literal(cpi, width, height);
     set_mv_search_params(cpi);
   }
 
 #if !CONFIG_XIPHRC
   if (cpi->oxcf.pass == 2) {
-    av1_set_target_rate(cpi);
+    av1_set_target_rate(cpi, cm->width, cm->height);
   }
 #endif
 
@@ -3848,6 +4368,8 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
   for (int i = 0; i < MAX_MB_PLANE; ++i) {
     cpi->rst_search[i].restoration_tilesize =
         cm->rst_info[i].restoration_tilesize;
+    cpi->rst_search[i].procunit_width = cm->rst_info[i].procunit_width;
+    cpi->rst_search[i].procunit_height = cm->rst_info[i].procunit_height;
     av1_alloc_restoration_struct(cm, &cpi->rst_search[i],
 #if CONFIG_FRAME_SUPERRES
                                  cm->superres_upscaled_width,
@@ -3903,26 +4425,189 @@ static void set_frame_size(AV1_COMP *cpi, int width, int height) {
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
 }
 
-static void setup_frame_size(AV1_COMP *cpi) {
-  int encode_width = cpi->oxcf.width;
-  int encode_height = cpi->oxcf.height;
+static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
+  // Choose an arbitrary random number
+  static unsigned int seed = 56789;
+  const AV1EncoderConfig *oxcf = &cpi->oxcf;
+  if (oxcf->pass == 1) return SCALE_NUMERATOR;
+  uint8_t new_denom = SCALE_NUMERATOR;
+
+  switch (oxcf->resize_mode) {
+    case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break;
+    case RESIZE_FIXED:
+      if (cpi->common.frame_type == KEY_FRAME)
+        new_denom = oxcf->resize_kf_scale_denominator;
+      else
+        new_denom = oxcf->resize_scale_denominator;
+      break;
+    case RESIZE_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
+    default: assert(0);
+  }
+  return new_denom;
+}
+
+#if CONFIG_FRAME_SUPERRES
+
+static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
+  // Choose an arbitrary random number
+  static unsigned int seed = 34567;
+  const AV1EncoderConfig *oxcf = &cpi->oxcf;
+  if (oxcf->pass == 1) return SCALE_NUMERATOR;
+  uint8_t new_denom = SCALE_NUMERATOR;
+  int bottom_index, top_index, q, qthresh;
+
+  switch (oxcf->superres_mode) {
+    case SUPERRES_NONE: new_denom = SCALE_NUMERATOR; break;
+    case SUPERRES_FIXED:
+      if (cpi->common.frame_type == KEY_FRAME)
+        new_denom = oxcf->superres_kf_scale_denominator;
+      else
+        new_denom = oxcf->superres_scale_denominator;
+      break;
+    case SUPERRES_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
+    case SUPERRES_QTHRESH:
+      qthresh = (cpi->common.frame_type == KEY_FRAME ? oxcf->superres_kf_qthresh
+                                                     : oxcf->superres_qthresh);
+      av1_set_target_rate(cpi, cpi->oxcf.width, cpi->oxcf.height);
+      q = av1_rc_pick_q_and_bounds(cpi, cpi->oxcf.width, cpi->oxcf.height,
+                                   &bottom_index, &top_index);
+      if (q < qthresh) {
+        new_denom = SCALE_NUMERATOR;
+      } else {
+        new_denom = SCALE_NUMERATOR + 1 + ((q - qthresh) >> 3);
+        new_denom = AOMMIN(SCALE_NUMERATOR << 1, new_denom);
+        // printf("SUPERRES: q %d, qthresh %d: denom %d\n", q, qthresh,
+        // new_denom);
+      }
+      break;
+    default: assert(0);
+  }
+  return new_denom;
+}
+
+static int dimension_is_ok(int orig_dim, int resized_dim, int denom) {
+  return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2);
+}
+
+// TODO(now): Fix?
+static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) {
+  return dimension_is_ok(owidth, rsz->resize_width, rsz->superres_denom) &&
+         (CONFIG_HORZONLY_FRAME_SUPERRES ||
+          dimension_is_ok(oheight, rsz->resize_height, rsz->superres_denom));
+}
+
+#define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
+
+static int validate_size_scales(RESIZE_MODE resize_mode,
+                                SUPERRES_MODE superres_mode, int owidth,
+                                int oheight, size_params_type *rsz) {
+  if (dimensions_are_ok(owidth, oheight, rsz)) {  // Nothing to do.
+    return 1;
+  }
+
+  // Calculate current resize scale.
+  int resize_denom =
+      AOMMAX(DIVIDE_AND_ROUND(owidth * SCALE_NUMERATOR, rsz->resize_width),
+             DIVIDE_AND_ROUND(oheight * SCALE_NUMERATOR, rsz->resize_height));
+
+  if (resize_mode != RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
+    // Alter superres scale as needed to enforce conformity.
+    rsz->superres_denom =
+        (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / resize_denom;
+    if (!dimensions_are_ok(owidth, oheight, rsz)) {
+      if (rsz->superres_denom > SCALE_NUMERATOR) --rsz->superres_denom;
+    }
+  } else if (resize_mode == RESIZE_RANDOM && superres_mode != SUPERRES_RANDOM) {
+    // Alter resize scale as needed to enforce conformity.
+    resize_denom =
+        (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / rsz->superres_denom;
+    rsz->resize_width = owidth;
+    rsz->resize_height = oheight;
+    av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
+                              resize_denom);
+    if (!dimensions_are_ok(owidth, oheight, rsz)) {
+      if (resize_denom > SCALE_NUMERATOR) {
+        --resize_denom;
+        rsz->resize_width = owidth;
+        rsz->resize_height = oheight;
+        av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
+                                  resize_denom);
+      }
+    }
+  } else if (resize_mode == RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) {
+    // Alter both resize and superres scales as needed to enforce conformity.
+    do {
+      if (resize_denom > rsz->superres_denom)
+        --resize_denom;
+      else
+        --rsz->superres_denom;
+      rsz->resize_width = owidth;
+      rsz->resize_height = oheight;
+      av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height,
+                                resize_denom);
+    } while (!dimensions_are_ok(owidth, oheight, rsz) &&
+             (resize_denom > SCALE_NUMERATOR ||
+              rsz->superres_denom > SCALE_NUMERATOR));
+  } else {  // We are allowed to alter neither resize scale nor superres scale.
+    return 0;
+  }
+  return dimensions_are_ok(owidth, oheight, rsz);
+}
+#undef DIVIDE_AND_ROUND
+#endif  // CONFIG_FRAME_SUPERRES
+
+// Calculates resize and superres params for next frame
+size_params_type av1_calculate_next_size_params(AV1_COMP *cpi) {
+  const AV1EncoderConfig *oxcf = &cpi->oxcf;
+  size_params_type rsz = {
+    oxcf->width,
+    oxcf->height,
+#if CONFIG_FRAME_SUPERRES
+    SCALE_NUMERATOR
+#endif  // CONFIG_FRAME_SUPERRES
+  };
+  int resize_denom;
+  if (oxcf->pass == 1) return rsz;
+  if (cpi->resize_pending_width && cpi->resize_pending_height) {
+    rsz.resize_width = cpi->resize_pending_width;
+    rsz.resize_height = cpi->resize_pending_height;
+    cpi->resize_pending_width = cpi->resize_pending_height = 0;
+  } else {
+    resize_denom = calculate_next_resize_scale(cpi);
+    rsz.resize_width = cpi->oxcf.width;
+    rsz.resize_height = cpi->oxcf.height;
+    av1_calculate_scaled_size(&rsz.resize_width, &rsz.resize_height,
+                              resize_denom);
+  }
+#if CONFIG_FRAME_SUPERRES
+  rsz.superres_denom = calculate_next_superres_scale(cpi);
+  if (!validate_size_scales(oxcf->resize_mode, oxcf->superres_mode, oxcf->width,
+                            oxcf->height, &rsz))
+    assert(0 && "Invalid scale parameters");
+#endif  // CONFIG_FRAME_SUPERRES
+  return rsz;
+}
 
-  uint8_t resize_num = av1_calculate_next_resize_scale(cpi);
-  av1_calculate_scaled_size(&encode_width, &encode_height, resize_num);
+static void setup_frame_size_from_params(AV1_COMP *cpi, size_params_type *rsz) {
+  int encode_width = rsz->resize_width;
+  int encode_height = rsz->resize_height;
 
 #if CONFIG_FRAME_SUPERRES
   AV1_COMMON *cm = &cpi->common;
   cm->superres_upscaled_width = encode_width;
   cm->superres_upscaled_height = encode_height;
-  cm->superres_scale_numerator =
-      av1_calculate_next_superres_scale(cpi, encode_width, encode_width);
-  av1_calculate_scaled_size(&encode_width, &encode_height,
-                            cm->superres_scale_numerator);
+  cm->superres_scale_denominator = rsz->superres_denom;
+  av1_calculate_scaled_superres_size(&encode_width, &encode_height,
+                                     rsz->superres_denom);
 #endif  // CONFIG_FRAME_SUPERRES
-
   set_frame_size(cpi, encode_width, encode_height);
 }
 
+static void setup_frame_size(AV1_COMP *cpi) {
+  size_params_type rsz = av1_calculate_next_size_params(cpi);
+  setup_frame_size_from_params(cpi, &rsz);
+}
+
 #if CONFIG_FRAME_SUPERRES
 static void superres_post_encode(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
@@ -3978,7 +4663,12 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
 #endif  // CONFIG_EXT_TILE
 
   if (no_loopfilter) {
+#if CONFIG_LOOPFILTER_LEVEL
+    lf->filter_level[0] = 0;
+    lf->filter_level[1] = 0;
+#else
     lf->filter_level = 0;
+#endif
   } else {
     struct aom_usec_timer timer;
 
@@ -3992,15 +4682,31 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
     cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer);
   }
 
-  if (lf->filter_level > 0) {
+#if !CONFIG_LPF_SB
+#if CONFIG_LOOPFILTER_LEVEL
+  if (lf->filter_level[0] || lf->filter_level[1])
+#else
+  if (lf->filter_level > 0)
+#endif
+#endif  // CONFIG_LPF_SB
+  {
 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
-#if CONFIG_UV_LVL
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u, 1, 0);
-    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v, 2, 0);
+#if CONFIG_LPF_SB
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0, 0,
+                          0);
+#else
+#if CONFIG_LOOPFILTER_LEVEL
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level[0],
+                          lf->filter_level[1], 0, 0);
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_u,
+                          lf->filter_level_u, 1, 0);
+    av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level_v,
+                          lf->filter_level_v, 2, 0);
+
 #else
     av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
+#endif  // CONFIG_LPF_SB
 #else
     if (cpi->num_workers > 1)
       av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
@@ -4010,13 +4716,18 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
       av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
 #endif
   }
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm);
+#endif
+
 #if CONFIG_CDEF
   if (is_lossless_requested(&cpi->oxcf)) {
     cm->cdef_bits = 0;
     cm->cdef_strengths[0] = 0;
     cm->nb_cdef_strengths = 1;
   } else {
-    // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
+    // Find CDEF parameters
     av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
                     cpi->oxcf.speed > 0);
 
@@ -4030,6 +4741,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
 #endif  // CONFIG_FRAME_SUPERRES
 
 #if CONFIG_LOOP_RESTORATION
+  aom_extend_frame_borders(cm->frame_to_show);
   av1_pick_filter_restoration(cpi->source, cpi, cpi->sf.lpf_pick);
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
@@ -4049,7 +4761,9 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
   aom_clear_system_state();
 
   set_size_independent_vars(cpi);
+
   setup_frame_size(cpi);
+
   assert(cm->width == cpi->scaled_source.y_crop_width);
   assert(cm->height == cpi->scaled_source.y_crop_height);
 
@@ -4060,9 +4774,12 @@ static void encode_without_recode_loop(AV1_COMP *cpi) {
   if (cpi->unscaled_last_source != NULL)
     cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source,
                                              &cpi->scaled_last_source);
+#if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
+  cpi->source->buf_8bit_valid = 0;
+#endif
 
   if (frame_is_intra_only(cm) == 0) {
-    av1_scale_references(cpi);
+    scale_references(cpi);
   }
 
   av1_set_quantizer(cm, q);
@@ -4113,14 +4830,18 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
 
   set_size_independent_vars(cpi);
 
+#if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
+  cpi->source->buf_8bit_valid = 0;
+#endif
+
+  aom_clear_system_state();
+  setup_frame_size(cpi);
+  set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
   do {
     aom_clear_system_state();
 
-    setup_frame_size(cpi);
-
     if (loop_count == 0) {
-      set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
-
       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
       set_mv_search_params(cpi);
 
@@ -4143,6 +4864,13 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
                                        &frame_over_shoot_limit);
     }
 
+#if CONFIG_GLOBAL_MOTION
+    // if frame was scaled calculate global_motion_search again if already done
+    if (loop_count > 0 && cpi->source && cpi->global_motion_search_done)
+      if (cpi->source->y_crop_width != cm->width ||
+          cpi->source->y_crop_height != cm->height)
+        cpi->global_motion_search_done = 0;
+#endif  // CONFIG_GLOBAL_MOTION
     cpi->source =
         av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source);
     if (cpi->unscaled_last_source != NULL)
@@ -4153,9 +4881,8 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
       if (loop_count > 0) {
         release_scaled_references(cpi);
       }
-      av1_scale_references(cpi);
+      scale_references(cpi);
     }
-
     av1_set_quantizer(cm, q);
 
     if (loop_count == 0) setup_frame(cpi);
@@ -4170,7 +4897,13 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
           cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
         for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
       } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+        if (cm->frame_refs[0].idx >= 0) {
+          cm->frame_contexts[cm->frame_refs[0].idx] = *cm->fc;
+        }
+#else
         cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif
       }
     }
 #endif  // CONFIG_Q_ADAPT_PROBS
@@ -4184,6 +4917,7 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
     }
 
     // transform / motion compensation build reconstruction frame
+    save_coding_context(cpi);
     av1_encode_frame(cpi);
 
     // Update the skip mb flag probabilities based on the distribution
@@ -4196,8 +4930,7 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
     // accurate estimate of output frame size to determine if we need
     // to recode.
     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
-      save_coding_context(cpi);
-
+      restore_coding_context(cpi);
       av1_pack_bitstream(cpi, dest, size);
 
       rc->projected_frame_size = (int)(*size) << 3;
@@ -4279,20 +5012,22 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
 
           if (undershoot_seen || loop_at_this_size > 1) {
             // Update rate_correction_factor unless
-            av1_rc_update_rate_correction_factors(cpi);
+            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
 
             q = (q_high + q_low + 1) / 2;
           } else {
             // Update rate_correction_factor unless
-            av1_rc_update_rate_correction_factors(cpi);
+            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
 
             q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                  AOMMAX(q_high, top_index));
+                                  AOMMAX(q_high, top_index), cm->width,
+                                  cm->height);
 
             while (q < q_low && retries < 10) {
-              av1_rc_update_rate_correction_factors(cpi);
+              av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
               q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                    AOMMAX(q_high, top_index));
+                                    AOMMAX(q_high, top_index), cm->width,
+                                    cm->height);
               retries++;
             }
           }
@@ -4303,12 +5038,12 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
           q_high = q > q_low ? q - 1 : q_low;
 
           if (overshoot_seen || loop_at_this_size > 1) {
-            av1_rc_update_rate_correction_factors(cpi);
+            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
             q = (q_high + q_low) / 2;
           } else {
-            av1_rc_update_rate_correction_factors(cpi);
+            av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
             q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                  top_index);
+                                  top_index, cm->width, cm->height);
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
@@ -4318,9 +5053,9 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size,
             }
 
             while (q > q_high && retries < 10) {
-              av1_rc_update_rate_correction_factors(cpi);
+              av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
               q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
-                                    top_index);
+                                    top_index, cm->width, cm->height);
               retries++;
             }
           }
@@ -4369,13 +5104,13 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
   const int last3_is_last =
       map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]];
   const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]];
-#if CONFIG_ONE_SIDED_COMPOUND
+#if CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
   const int last3_is_last2 =
       map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
   const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]];
   const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]];
-#else  // !CONFIG_ONE_SIDED_COMPOUND
+#else   // !CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
   const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[0]];
   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
 
@@ -4388,12 +5123,19 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
   const int bwd_is_last3 = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[2]];
 
   const int bwd_is_gld = map[cpi->bwd_fb_idx] == map[cpi->gld_fb_idx];
+#endif  // CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
+
+  const int alt2_is_last = map[cpi->alt2_fb_idx] == map[cpi->lst_fb_idxes[0]];
+  const int alt2_is_last2 = map[cpi->alt2_fb_idx] == map[cpi->lst_fb_idxes[1]];
+  const int alt2_is_last3 = map[cpi->alt2_fb_idx] == map[cpi->lst_fb_idxes[2]];
+  const int alt2_is_gld = map[cpi->alt2_fb_idx] == map[cpi->gld_fb_idx];
+  const int alt2_is_bwd = map[cpi->alt2_fb_idx] == map[cpi->bwd_fb_idx];
 
-#endif  // CONFIG_ONE_SIDED_COMPOUND
   const int last2_is_alt = map[cpi->lst_fb_idxes[1]] == map[cpi->alt_fb_idx];
   const int last3_is_alt = map[cpi->lst_fb_idxes[2]] == map[cpi->alt_fb_idx];
   const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
   const int bwd_is_alt = map[cpi->bwd_fb_idx] == map[cpi->alt_fb_idx];
+  const int alt2_is_alt = map[cpi->alt2_fb_idx] == map[cpi->alt_fb_idx];
 #else   // !CONFIG_EXT_REFS
   const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
   const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
@@ -4402,13 +5144,6 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
 
   int flags = AOM_REFFRAME_ALL;
 
-#if CONFIG_EXT_REFS
-  // Disable the use of BWDREF_FRAME for non-bipredictive frames.
-  if (!(cpi->rc.is_bipred_frame || cpi->rc.is_last_bipred_frame ||
-        (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs)))
-    flags &= ~AOM_BWD_FLAG;
-#endif  // CONFIG_EXT_REFS
-
   if (gld_is_last || gld_is_alt) flags &= ~AOM_GOLD_FLAG;
 
   if (cpi->rc.frames_till_gf_update_due == INT_MAX) flags &= ~AOM_GOLD_FLAG;
@@ -4422,15 +5157,21 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) {
 
   if (gld_is_last2 || gld_is_last3) flags &= ~AOM_GOLD_FLAG;
 
-#if CONFIG_ONE_SIDED_COMPOUND  // Changes LL & HL bitstream
+#if CONFIG_ONE_SIDED_COMPOUND && \
+    !CONFIG_EXT_COMP_REFS  // Changes LL & HL bitstream
   /* Allow biprediction between two identical frames (e.g. bwd_is_last = 1) */
   if (bwd_is_alt && (flags & AOM_BWD_FLAG)) flags &= ~AOM_BWD_FLAG;
-#else
+#else   // !CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS
   if ((bwd_is_last || bwd_is_last2 || bwd_is_last3 || bwd_is_gld ||
        bwd_is_alt) &&
       (flags & AOM_BWD_FLAG))
     flags &= ~AOM_BWD_FLAG;
-#endif
+#endif  // CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
+
+  if ((alt2_is_last || alt2_is_last2 || alt2_is_last3 || alt2_is_gld ||
+       alt2_is_bwd || alt2_is_alt) &&
+      (flags & AOM_ALT2_FLAG))
+    flags &= ~AOM_ALT2_FLAG;
 #endif  // CONFIG_EXT_REFS
 
   return flags;
@@ -4453,6 +5194,7 @@ static void set_ext_overrides(AV1_COMP *cpi) {
   }
 }
 
+#if !CONFIG_FRAME_SIGN_BIAS
 static void set_arf_sign_bias(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   int arf_sign_bias;
@@ -4461,8 +5203,8 @@ static void set_arf_sign_bias(AV1_COMP *cpi) {
   // The arf_sign_bias will be one for internal ARFs'
   arf_sign_bias = cpi->rc.source_alt_ref_active &&
                   (!cpi->refresh_alt_ref_frame ||
-                   (gf_group->rf_level[gf_group->index] == GF_ARF_LOW));
-#else
+                   gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE);
+#else   // !CONFIG_EXT_REFS
   if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
     arf_sign_bias = cpi->rc.source_alt_ref_active &&
@@ -4477,8 +5219,11 @@ static void set_arf_sign_bias(AV1_COMP *cpi) {
   cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
 #if CONFIG_EXT_REFS
   cm->ref_frame_sign_bias[BWDREF_FRAME] = cm->ref_frame_sign_bias[ALTREF_FRAME];
+  cm->ref_frame_sign_bias[ALTREF2_FRAME] =
+      cm->ref_frame_sign_bias[ALTREF_FRAME];
 #endif  // CONFIG_EXT_REFS
 }
+#endif  // !CONFIG_FRAME_SIGN_BIAS
 
 static int setup_interp_filter_search_mask(AV1_COMP *cpi) {
   InterpFilter ifilter;
@@ -4488,26 +5233,16 @@ static int setup_interp_filter_search_mask(AV1_COMP *cpi) {
   int arf_idx = ALTREF_FRAME;
 
 #if CONFIG_EXT_REFS
-  // Get which arf used as ALTREF_FRAME
-  if (cpi->oxcf.pass == 2)
-    arf_idx += cpi->twopass.gf_group.arf_ref_idx[cpi->twopass.gf_group.index];
-#endif  // CONFIG_EXT_REFS
-
+  if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
+      cpi->refresh_alt2_ref_frame)
+#else   // !CONFIG_EXT_REFS
   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
+#endif  // CONFIG_EXT_REFS
     return mask;
 
-#if CONFIG_EXT_REFS
-  for (ref = LAST_FRAME; ref < ALTREF_FRAME; ++ref)
-    for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
-      ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
-
-  for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
-    ref_total[ref] += cpi->interp_filter_selected[arf_idx][ifilter];
-#else
   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
     for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
-#endif  // CONFIG_EXT_REFS
 
   for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter) {
     if ((ref_total[LAST_FRAME] &&
@@ -4527,6 +5262,9 @@ static int setup_interp_filter_search_mask(AV1_COMP *cpi) {
         (ref_total[BWDREF_FRAME] == 0 ||
          cpi->interp_filter_selected[BWDREF_FRAME][ifilter] * 50 <
              ref_total[BWDREF_FRAME]) &&
+        (ref_total[ALTREF2_FRAME] == 0 ||
+         cpi->interp_filter_selected[ALTREF2_FRAME][ifilter] * 50 <
+             ref_total[ALTREF2_FRAME]) &&
 #endif  // CONFIG_EXT_REFS
         (ref_total[ALTREF_FRAME] == 0 ||
          cpi->interp_filter_selected[arf_idx][ifilter] * 50 <
@@ -4574,6 +5312,19 @@ static void dump_filtered_recon_frames(AV1_COMP *cpi) {
       cpi->refresh_alt_ref_frame,
       cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index],
       recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
+#if 0
+  int ref_frame;
+  printf("get_ref_frame_map_idx: [");
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
+    printf(" %d", get_ref_frame_map_idx(cpi, ref_frame));
+  printf(" ]\n");
+  printf("cm->new_fb_idx = %d\n", cm->new_fb_idx);
+  printf("cm->ref_frame_map = [");
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    printf(" %d", cm->ref_frame_map[ref_frame - LAST_FRAME]);
+  }
+  printf(" ]\n");
+#endif  // 0
 
   // --- Y ---
   for (h = 0; h < cm->height; ++h) {
@@ -4609,7 +5360,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
   AV1_COMMON *const cm = &cpi->common;
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   struct segmentation *const seg = &cm->seg;
-  TX_SIZE t;
   FRAME_CONTEXT **tile_ctxs = aom_malloc(cm->tile_rows * cm->tile_cols *
                                          sizeof(&cpi->tile_data[0].tctx));
   aom_cdf_prob **cdf_ptrs =
@@ -4622,8 +5372,11 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
   set_ext_overrides(cpi);
   aom_clear_system_state();
 
+#if !CONFIG_FRAME_SIGN_BIAS
   // Set the arf sign bias for this frame.
   set_arf_sign_bias(cpi);
+#endif  // !CONFIG_FRAME_SIGN_BIAS
+
 #if CONFIG_TEMPMV_SIGNALING
   // frame type has been decided outside of this function call
   cm->cur_frame->intra_only = cm->frame_type == KEY_FRAME || cm->intra_only;
@@ -4654,12 +5407,14 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     cpi->refresh_last_frame = 0;
     cpi->refresh_golden_frame = 0;
     cpi->refresh_bwd_ref_frame = 0;
+    cpi->refresh_alt2_ref_frame = 0;
     cpi->refresh_alt_ref_frame = 0;
 
     cpi->rc.is_bwd_ref_frame = 0;
     cpi->rc.is_last_bipred_frame = 0;
     cpi->rc.is_bipred_frame = 0;
 
+    restore_coding_context(cpi);
     // Build the bitstream
     av1_pack_bitstream(cpi, dest, size);
 
@@ -4672,7 +5427,16 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 #endif  // DUMP_RECON_FRAMES
 
     // Update the LAST_FRAME in the reference frame buffer.
-    av1_update_reference_frames(cpi);
+    // NOTE:
+    // (1) For BWDREF_FRAME as the show_existing_frame, the reference frame
+    //     update has been done previously when handling the LAST_BIPRED_FRAME
+    //     right before BWDREF_FRAME (in the display order);
+    // (2) For INTNL_OVERLAY as the show_existing_frame, the reference frame
+    //     update will be done when the following is called, which will exchange
+    //     the virtual indexes between LAST_FRAME and ALTREF2_FRAME, so that
+    //     LAST3 will get retired, LAST2 becomes LAST3, LAST becomes LAST2, and
+    //     ALTREF2_FRAME will serve as the new LAST_FRAME.
+    update_reference_frames(cpi);
 
     // Update frame flags
     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
@@ -4687,7 +5451,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     // Since we allocate a spot for the OVERLAY frame in the gf group, we need
     // to do post-encoding update accordingly.
     if (cpi->rc.is_src_frame_alt_ref) {
-      av1_set_target_rate(cpi);
+      av1_set_target_rate(cpi, cm->width, cm->height);
 #if CONFIG_XIPHRC
       frame_type = cm->frame_type == INTER_FRAME ? OD_P_FRAME : OD_I_FRAME;
       drop_this_frame = od_enc_rc_update_state(
@@ -4728,6 +5492,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 
     cm->error_resilient_mode = oxcf->error_resilient_mode;
 
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
     // By default, encoder assumes decoder can use prev_mi.
     if (cm->error_resilient_mode) {
       cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
@@ -4736,6 +5501,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
       // Only reset the current context.
       cm->reset_frame_context = RESET_FRAME_CONTEXT_CURRENT;
     }
+#endif
   }
   if (cpi->oxcf.mtu == 0) {
     cm->num_tg = cpi->oxcf.num_tile_groups;
@@ -4781,7 +5547,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 #endif
 
 #if CONFIG_REFERENCE_BUFFER
-  {
+  if (cm->seq_params.frame_id_numbers_present_flag) {
     /* Non-normative definition of current_frame_id ("frame counter" with
     * wraparound) */
     const int frame_id_length = FRAME_ID_LENGTH_MINUS7 + 7;
@@ -4806,11 +5572,14 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
           (1 << frame_id_length);
     }
   }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 
 #if CONFIG_EXT_DELTA_Q
   cm->delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
   cm->delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF;
+#if CONFIG_LOOPFILTER_LEVEL
+  cm->delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #endif
 
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
@@ -4819,6 +5588,9 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     encode_with_recode_loop(cpi, size, dest);
   }
 
+  cm->last_tile_cols = cm->tile_cols;
+  cm->last_tile_rows = cm->tile_rows;
+
 #ifdef OUTPUT_YUV_SKINMAP
   if (cpi->common.current_video_frame > 1) {
     av1_compute_skin_map(cpi, yuv_skinmap_file);
@@ -4864,6 +5636,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
   // Pick the loop filter level for the frame.
   loopfilter_frame(cpi, cm);
 
+#ifdef OUTPUT_YUV_REC
+  aom_write_one_yuv_frame(cm, cm->frame_to_show);
+#endif
+
   // Build the bitstream
   av1_pack_bitstream(cpi, dest, size);
 
@@ -4874,7 +5650,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
   }
 
 #if CONFIG_REFERENCE_BUFFER
-  {
+  if (cm->seq_params.frame_id_numbers_present_flag) {
     int i;
     /* Update reference frame id values based on the value of refresh_mask */
     for (i = 0; i < REF_FRAMES; i++) {
@@ -4883,7 +5659,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
       }
     }
   }
-#endif
+#endif  // CONFIG_REFERENCE_BUFFER
 
 #if DUMP_RECON_FRAMES == 1
   // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
@@ -4896,11 +5672,8 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     release_scaled_references(cpi);
   }
 
-  av1_update_reference_frames(cpi);
+  update_reference_frames(cpi);
 
-  for (t = 0; t < TX_SIZES; t++)
-    av1_full_to_model_counts(cpi->td.counts->coef[t],
-                             cpi->td.rd_counts.coef_counts[t]);
 #if CONFIG_ENTROPY_STATS
   av1_accumulate_frame_counts(&aggregate_fc, &cm->counts);
   assert(cm->frame_context_idx < FRAME_CONTEXTS);
@@ -4908,7 +5681,9 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
                               &cm->counts);
 #endif  // CONFIG_ENTROPY_STATS
   if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+#if CONFIG_LV_MAP
     av1_adapt_coef_probs(cm);
+#endif  // CONFIG_LV_MAP
     av1_adapt_intra_frame_probs(cm);
     make_update_tile_list_enc(cpi, cm->tile_rows, cm->tile_cols, tile_ctxs);
     av1_average_tile_coef_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs,
@@ -4997,7 +5772,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that are
 // being used as reference.
 #endif  // CONFIG_EXT_REFS
-    av1_swap_mi_and_prev_mi(cm);
+    swap_mi_and_prev_mi(cm);
     // Don't increment frame counters if this was an altref buffer
     // update not a real frame
     ++cm->current_video_frame;
@@ -5017,6 +5792,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 #if CONFIG_EXT_REFS
   }
 #endif  // CONFIG_EXT_REFS
+
   aom_free(tile_ctxs);
   aom_free(cdf_ptrs);
 }
@@ -5077,47 +5853,6 @@ static void Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
 }
 #endif
 
-static void init_ref_frame_bufs(AV1_COMMON *cm) {
-  int i;
-  BufferPool *const pool = cm->buffer_pool;
-  cm->new_fb_idx = INVALID_IDX;
-  for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = INVALID_IDX;
-    pool->frame_bufs[i].ref_count = 0;
-  }
-}
-
-static void check_initial_width(AV1_COMP *cpi,
-#if CONFIG_HIGHBITDEPTH
-                                int use_highbitdepth,
-#endif
-                                int subsampling_x, int subsampling_y) {
-  AV1_COMMON *const cm = &cpi->common;
-
-  if (!cpi->initial_width ||
-#if CONFIG_HIGHBITDEPTH
-      cm->use_highbitdepth != use_highbitdepth ||
-#endif
-      cm->subsampling_x != subsampling_x ||
-      cm->subsampling_y != subsampling_y) {
-    cm->subsampling_x = subsampling_x;
-    cm->subsampling_y = subsampling_y;
-#if CONFIG_HIGHBITDEPTH
-    cm->use_highbitdepth = use_highbitdepth;
-#endif
-
-    alloc_raw_frame_buffers(cpi);
-    init_ref_frame_bufs(cm);
-    alloc_util_frame_buffers(cpi);
-
-    init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
-
-    cpi->initial_width = cm->width;
-    cpi->initial_height = cm->height;
-    cpi->initial_mbs = cm->MBs;
-  }
-}
-
 int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time) {
@@ -5169,7 +5904,7 @@ static int frame_is_reference(const AV1_COMP *cpi) {
   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
          cpi->refresh_golden_frame ||
 #if CONFIG_EXT_REFS
-         cpi->refresh_bwd_ref_frame ||
+         cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame ||
 #endif  // CONFIG_EXT_REFS
          cpi->refresh_alt_ref_frame || !cm->error_resilient_mode ||
          cm->lf.mode_ref_delta_update || cm->seg.update_map ||
@@ -5252,6 +5987,21 @@ static int get_brf_src_index(AV1_COMP *cpi) {
 
   return brf_src_index;
 }
+
+// Returns 0 if this is not an alt ref else the offset of the source frame
+// used as the arf midpoint.
+static int get_arf2_src_index(AV1_COMP *cpi) {
+  int arf2_src_index = 0;
+  if (is_altref_enabled(cpi) && cpi->num_extra_arfs) {
+    if (cpi->oxcf.pass == 2) {
+      const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+      if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
+        arf2_src_index = gf_group->arf_src_offset[gf_group->index];
+      }
+    }
+  }
+  return arf2_src_index;
+}
 #endif  // CONFIG_EXT_REFS
 
 static void check_src_altref(AV1_COMP *cpi,
@@ -5268,6 +6018,10 @@ static void check_src_altref(AV1_COMP *cpi,
         (gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
 #endif  // CONFIG_EXT_REFS
         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
+#if CONFIG_EXT_REFS
+    rc->is_src_frame_ext_arf =
+        gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
+#endif  // CONFIG_EXT_REFS
   } else {
     rc->is_src_frame_alt_ref =
         cpi->alt_ref_source && (source == cpi->alt_ref_source);
@@ -5277,9 +6031,20 @@ static void check_src_altref(AV1_COMP *cpi,
     // Current frame is an ARF overlay frame.
     cpi->alt_ref_source = NULL;
 
-    // Don't refresh the last buffer for an ARF overlay frame. It will
-    // become the GF so preserve last as an alternative prediction option.
-    cpi->refresh_last_frame = 0;
+#if CONFIG_EXT_REFS
+    if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) {
+      // For INTNL_OVERLAY, when show_existing_frame == 0, they do need to
+      // refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3,
+      // LAST becomes LAST2, and INTNL_OVERLAY becomes LAST.
+      cpi->refresh_last_frame = 1;
+    } else {
+#endif  // CONFIG_EXT_REFS
+      // Don't refresh the last buffer for an ARF overlay frame. It will
+      // become the GF so preserve last as an alternative prediction option.
+      cpi->refresh_last_frame = 0;
+#if CONFIG_EXT_REFS
+    }
+#endif  // CONFIG_EXT_REFS
   }
 }
 
@@ -5402,6 +6167,123 @@ static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
 }
 #endif  // CONFIG_INTERNAL_STATS
 
+#if CONFIG_AMVR
+static int is_integer_mv(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *cur_picture,
+                         const YV12_BUFFER_CONFIG *last_picture,
+                         hash_table *last_hash_table) {
+  aom_clear_system_state();
+  // check use hash ME
+  int k;
+  uint32_t hash_value_1;
+  uint32_t hash_value_2;
+
+  const int block_size = 8;
+  const double threshold_current = 0.8;
+  const double threshold_average = 0.95;
+  const int max_history_size = 32;
+  int T = 0;  // total block
+  int C = 0;  // match with collocated block
+  int S = 0;  // smooth region but not match with collocated block
+  int M = 0;  // match with other block
+
+  const int pic_width = cur_picture->y_width;
+  const int pic_height = cur_picture->y_height;
+  for (int i = 0; i + block_size <= pic_height; i += block_size) {
+    for (int j = 0; j + block_size <= pic_width; j += block_size) {
+      const int x_pos = j;
+      const int y_pos = i;
+      int match = 1;
+      T++;
+
+      // check whether collocated block match with current
+      uint8_t *p_cur = cur_picture->y_buffer;
+      uint8_t *p_ref = last_picture->y_buffer;
+      int stride_cur = cur_picture->y_stride;
+      int stride_ref = last_picture->y_stride;
+      p_cur += (y_pos * stride_cur + x_pos);
+      p_ref += (y_pos * stride_ref + x_pos);
+
+      for (int tmpY = 0; tmpY < block_size && match; tmpY++) {
+        for (int tmpX = 0; tmpX < block_size && match; tmpX++) {
+          if (p_cur[tmpX] != p_ref[tmpX]) {
+            match = 0;
+          }
+        }
+        p_cur += stride_cur;
+        p_ref += stride_ref;
+      }
+
+      if (match) {
+        C++;
+        continue;
+      }
+
+      if (av1_hash_is_horizontal_perfect(cur_picture, block_size, x_pos,
+                                         y_pos) ||
+          av1_hash_is_vertical_perfect(cur_picture, block_size, x_pos, y_pos)) {
+        S++;
+        continue;
+      }
+
+      av1_get_block_hash_value(
+          cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur,
+          block_size, &hash_value_1, &hash_value_2);
+
+      if (av1_has_exact_match(last_hash_table, hash_value_1, hash_value_2)) {
+        M++;
+      }
+    }
+  }
+
+  assert(T > 0);
+  double csm_rate = ((double)(C + S + M)) / ((double)(T));
+  double m_rate = ((double)(M)) / ((double)(T));
+
+  cpi->csm_rate_array[cpi->rate_index] = csm_rate;
+  cpi->m_rate_array[cpi->rate_index] = m_rate;
+
+  cpi->rate_index = (cpi->rate_index + 1) % max_history_size;
+  cpi->rate_size++;
+  cpi->rate_size = AOMMIN(cpi->rate_size, max_history_size);
+
+  if (csm_rate < threshold_current) {
+    return 0;
+  }
+
+  if (C == T) {
+    return 1;
+  }
+
+  double csm_average = 0.0;
+  double m_average = 0.0;
+
+  for (k = 0; k < cpi->rate_size; k++) {
+    csm_average += cpi->csm_rate_array[k];
+    m_average += cpi->m_rate_array[k];
+  }
+  csm_average /= cpi->rate_size;
+  m_average /= cpi->rate_size;
+
+  if (csm_average < threshold_average) {
+    return 0;
+  }
+
+  if (M > (T - C - S) / 3) {
+    return 1;
+  }
+
+  if (csm_rate > 0.99 && m_rate > 0.01) {
+    return 1;
+  }
+
+  if (csm_average + m_average > 1.01) {
+    return 1;
+  }
+
+  return 0;
+}
+#endif
+
 int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest, int64_t *time_stamp,
                             int64_t *time_end, int flush) {
@@ -5432,7 +6314,11 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
 
   aom_usec_timer_start(&cmptimer);
 
-  av1_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+#if CONFIG_AMVR
+  set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0);
+#else
+  set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+#endif
 
   // Is multi-arf enabled.
   // Note that at the moment multi_arf is only configured for 2 pass VBR
@@ -5441,8 +6327,10 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
   else
     cpi->multi_arf_allowed = 0;
 
-  // Normal defaults
+// Normal defaults
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
   cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+#endif
   cm->refresh_frame_context =
       (oxcf->error_resilient_mode || oxcf->frame_parallel_decoding_mode)
           ? REFRESH_FRAME_CONTEXT_FORWARD
@@ -5452,6 +6340,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
   cpi->refresh_golden_frame = 0;
 #if CONFIG_EXT_REFS
   cpi->refresh_bwd_ref_frame = 0;
+  cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
   cpi->refresh_alt_ref_frame = 0;
 
@@ -5537,7 +6426,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
 #endif  // CONFIG_BGSPRITE
           av1_temporal_filter(cpi,
 #if CONFIG_BGSPRITE
-                              NULL,
+                              NULL, &cpi->alt_ref_buffer,
 #endif  // CONFIG_BGSPRITE
                               arf_src_index);
         aom_extend_frame_borders(&cpi->alt_ref_buffer);
@@ -5547,14 +6436,64 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
       cm->show_frame = 0;
       cm->intra_only = 0;
       cpi->refresh_alt_ref_frame = 1;
-      cpi->refresh_golden_frame = 0;
       cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+#if CONFIG_EXT_REFS
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
+#endif  // CONFIG_EXT_REFS
       rc->is_src_frame_alt_ref = 0;
     }
     rc->source_alt_ref_pending = 0;
   }
 
 #if CONFIG_EXT_REFS
+  // Should we encode an arf2 frame.
+  arf_src_index = get_arf2_src_index(cpi);
+  if (arf_src_index) {
+    for (i = 0; i <= arf_src_index; ++i) {
+      struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i);
+      // Avoid creating an alt-ref if there's a forced keyframe pending.
+      if (e == NULL) {
+        break;
+      } else if (e->flags == AOM_EFLAG_FORCE_KF) {
+        arf_src_index = 0;
+        flush = 1;
+        break;
+      }
+    }
+  }
+
+  if (arf_src_index) {
+    assert(arf_src_index <= rc->frames_to_key);
+
+    if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
+      cpi->alt_ref_source = source;
+
+      if (oxcf->arnr_max_frames > 0) {
+        // Produce the filtered ARF frame.
+        av1_temporal_filter(cpi,
+#if CONFIG_BGSPRITE
+                            NULL, NULL,
+#endif  // CONFIG_BGSPRITE
+                            arf_src_index);
+        aom_extend_frame_borders(&cpi->alt_ref_buffer);
+        force_src_buffer = &cpi->alt_ref_buffer;
+      }
+
+      cm->show_frame = 0;
+      cm->intra_only = 0;
+      cpi->refresh_alt2_ref_frame = 1;
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt_ref_frame = 0;
+      rc->is_src_frame_alt_ref = 0;
+      rc->is_src_frame_ext_arf = 0;
+    }
+    rc->source_alt_ref_pending = 0;
+  }
+
   rc->is_bwd_ref_frame = 0;
   brf_src_index = get_brf_src_index(cpi);
   if (brf_src_index) {
@@ -5566,6 +6505,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
       cpi->refresh_bwd_ref_frame = 1;
       cpi->refresh_last_frame = 0;
       cpi->refresh_golden_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
 
       rc->is_bwd_ref_frame = 1;
@@ -5634,13 +6574,10 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
   if (cm->new_fb_idx == INVALID_IDX) return -1;
 
   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
-
-#if CONFIG_EXT_REFS
-  if (oxcf->pass == 2) {
-    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-    cpi->alt_fb_idx = cpi->arf_map[gf_group->arf_ref_idx[gf_group->index]];
-  }
-#else
+#if CONFIG_HIGHBITDEPTH && CONFIG_GLOBAL_MOTION
+  cm->cur_frame->buf.buf_8bit_valid = 0;
+#endif
+#if !CONFIG_EXT_REFS
   if (cpi->multi_arf_allowed) {
     if (cm->frame_type == KEY_FRAME) {
       init_buffer_indices(cpi);
@@ -5649,7 +6586,7 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
       cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
     }
   }
-#endif  // CONFIG_EXT_REFS
+#endif  // !CONFIG_EXT_REFS
 
   // Start with a 0 size frame.
   *size = 0;
@@ -5679,8 +6616,26 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
 #endif
 
 #if CONFIG_REFERENCE_BUFFER
-  if (*time_stamp == 0) {
-    cpi->common.current_frame_id = -1;
+  if (cm->seq_params.frame_id_numbers_present_flag) {
+    if (*time_stamp == 0) {
+      cpi->common.current_frame_id = -1;
+    }
+  }
+#endif  // CONFIG_REFERENCE_BUFFER
+#if CONFIG_AMVR
+  cpi->cur_poc++;
+  if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools) {
+    if (cpi->common.seq_mv_precision_level == 2) {
+      struct lookahead_entry *previous_entry =
+          cpi->lookahead->buf + cpi->previsous_index;
+      cpi->common.cur_frame_mv_precision_level = is_integer_mv(
+          cpi, cpi->source, &previous_entry->img, cpi->previsou_hash_table);
+    } else {
+      cpi->common.cur_frame_mv_precision_level =
+          cpi->common.seq_mv_precision_level;
+    }
+  } else {
+    cpi->common.cur_frame_mv_precision_level = 0;
   }
 #endif
 
@@ -5711,9 +6666,35 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
     Pass0Encode(cpi, size, dest, 0, frame_flags);
   }
 #endif
+#if CONFIG_HASH_ME
+  if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools) {
+#if CONFIG_AMVR
+    cpi->previsou_hash_table = &cm->cur_frame->hash_table;
+    {
+      int l;
+      for (l = -MAX_PRE_FRAMES; l < cpi->lookahead->max_sz; l++) {
+        if ((cpi->lookahead->buf + l) == source) {
+          cpi->previsous_index = l;
+          break;
+        }
+      }
+
+      if (l == cpi->lookahead->max_sz) {
+        aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                           "Failed to find last frame original buffer");
+      }
+    }
+#endif
+  }
+
+#endif
 
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
+#else
   if (!cm->error_resilient_mode)
     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+#endif  // CONFIG_NO_FRAME_CONTEXT_SIGNALING
 
   // No frame encoded, or frame was dropped, release scaled references.
   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
@@ -5776,7 +6757,6 @@ int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
 
 int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
                           AOM_SCALING vert_mode) {
-  AV1_COMMON *cm = &cpi->common;
   int hr = 0, hs = 0, vr = 0, vs = 0;
 
   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
@@ -5785,43 +6765,8 @@ int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
   Scale2Ratio(vert_mode, &vr, &vs);
 
   // always go to the next whole number
-  cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
-  cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
-  assert(cm->width <= cpi->initial_width);
-  assert(cm->height <= cpi->initial_height);
-
-  update_frame_size(cpi);
-
-  return 0;
-}
-
-int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
-  AV1_COMMON *cm = &cpi->common;
-#if CONFIG_HIGHBITDEPTH
-  check_initial_width(cpi, cm->use_highbitdepth, cm->subsampling_x,
-                      cm->subsampling_y);
-#else
-  check_initial_width(cpi, cm->subsampling_x, cm->subsampling_y);
-#endif  // CONFIG_HIGHBITDEPTH
-
-  if (width <= 0 || height <= 0) return 1;
-
-  cm->width = width;
-  if (cm->width > cpi->initial_width) {
-    cm->width = cpi->initial_width;
-    printf("Warning: Desired width too large, changed to %d\n", cm->width);
-  }
-
-  cm->height = height;
-  if (cm->height > cpi->initial_height) {
-    cm->height = cpi->initial_height;
-    printf("Warning: Desired height too large, changed to %d\n", cm->height);
-  }
-
-  assert(cm->width <= cpi->initial_width);
-  assert(cm->height <= cpi->initial_height);
-
-  update_frame_size(cpi);
+  cpi->resize_pending_width = (hs - 1 + cpi->oxcf.width * hr) / hs;
+  cpi->resize_pending_height = (vs - 1 + cpi->oxcf.height * vr) / vs;
 
   return 0;
 }
diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h
index 9b98975b7..eb779a3cd 100644
--- a/third_party/aom/av1/encoder/encoder.h
+++ b/third_party/aom/av1/encoder/encoder.h
@@ -53,23 +53,20 @@
 extern "C" {
 #endif
 
-#if CONFIG_SPEED_REFS
-#define MIN_SPEED_REFS_BLKSIZE BLOCK_16X16
-#endif  // CONFIG_SPEED_REFS
-
 typedef struct {
   int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
   int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
   int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
 
   // 0 = Intra, Last, GF, ARF
-  signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
+  int8_t last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
   // 0 = ZERO_MV, MV
-  signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
+  int8_t last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
 
   FRAME_CONTEXT fc;
 } CODING_CONTEXT;
 
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
 typedef enum {
   // regular inter frame
   REGULAR_FRAME = 0,
@@ -86,6 +83,7 @@ typedef enum {
   EXT_ARF_FRAME = 5
 #endif
 } FRAME_CONTEXT_INDEX;
+#endif
 
 typedef enum {
   NORMAL = 0,
@@ -105,8 +103,9 @@ typedef enum {
   FRAMEFLAGS_GOLDEN = 1 << 1,
 #if CONFIG_EXT_REFS
   FRAMEFLAGS_BWDREF = 1 << 2,
+  // TODO(zoeliu): To determine whether a frame flag is needed for ALTREF2_FRAME
   FRAMEFLAGS_ALTREF = 1 << 3,
-#else
+#else   // !CONFIG_EXT_REFS
   FRAMEFLAGS_ALTREF = 1 << 2,
 #endif  // CONFIG_EXT_REFS
 } FRAMETYPE_FLAGS;
@@ -116,7 +115,7 @@ typedef enum {
   VARIANCE_AQ = 1,
   COMPLEXITY_AQ = 2,
   CYCLIC_REFRESH_AQ = 3,
-#if CONFIG_DELTA_Q && !CONFIG_EXT_DELTA_Q
+#if !CONFIG_EXT_DELTA_Q
   DELTA_AQ = 4,
 #endif
   AQ_MODE_COUNT  // This should always be the last member of the enum
@@ -131,14 +130,20 @@ typedef enum {
 #endif
 typedef enum {
   RESIZE_NONE = 0,    // No frame resizing allowed.
-  RESIZE_FIXED = 1,   // All frames are coded at the specified dimension.
-  RESIZE_DYNAMIC = 2  // Coded size of each frame is determined by the codec.
+  RESIZE_FIXED = 1,   // All frames are coded at the specified scale.
+  RESIZE_RANDOM = 2,  // All frames are coded at a random scale.
+  RESIZE_MODES
 } RESIZE_MODE;
 #if CONFIG_FRAME_SUPERRES
 typedef enum {
-  SUPERRES_NONE = 0,
-  SUPERRES_FIXED = 1,
-  SUPERRES_DYNAMIC = 2
+  SUPERRES_NONE = 0,     // No frame superres allowed
+  SUPERRES_FIXED = 1,    // All frames are coded at the specified scale,
+                         // and super-resolved.
+  SUPERRES_RANDOM = 2,   // All frames are coded at a random scale,
+                         // and super-resolved.
+  SUPERRES_QTHRESH = 3,  // Superres scale for a frame is determined based on
+                         // q_index
+  SUPERRES_MODES
 } SUPERRES_MODE;
 #endif  // CONFIG_FRAME_SUPERRES
 
@@ -202,6 +207,9 @@ typedef struct AV1EncoderConfig {
   int qm_minlevel;
   int qm_maxlevel;
 #endif
+#if CONFIG_DIST_8X8
+  int using_dist_8x8;
+#endif
   unsigned int num_tile_groups;
   unsigned int mtu;
 
@@ -210,14 +218,16 @@ typedef struct AV1EncoderConfig {
 #endif
   // Internal frame size scaling.
   RESIZE_MODE resize_mode;
-  uint8_t resize_scale_numerator;
-  uint8_t resize_kf_scale_numerator;
+  uint8_t resize_scale_denominator;
+  uint8_t resize_kf_scale_denominator;
 
 #if CONFIG_FRAME_SUPERRES
   // Frame Super-Resolution size scaling.
   SUPERRES_MODE superres_mode;
-  uint8_t superres_scale_numerator;
-  uint8_t superres_kf_scale_numerator;
+  uint8_t superres_scale_denominator;
+  uint8_t superres_kf_scale_denominator;
+  int superres_qthresh;
+  int superres_kf_qthresh;
 #endif  // CONFIG_FRAME_SUPERRES
 
   // Enable feature to reduce the frame quantization every x frames.
@@ -255,6 +265,12 @@ typedef struct AV1EncoderConfig {
 
   int tile_columns;
   int tile_rows;
+#if CONFIG_MAX_TILE
+  int tile_width_count;
+  int tile_height_count;
+  int tile_widths[MAX_TILE_COLS];
+  int tile_heights[MAX_TILE_ROWS];
+#endif
 #if CONFIG_DEPENDENT_HORZTILES
   int dependent_horz_tiles;
 #endif
@@ -277,10 +293,8 @@ typedef struct AV1EncoderConfig {
   int use_highbitdepth;
 #endif
   aom_color_space_t color_space;
-#if CONFIG_COLORSPACE_HEADERS
   aom_transfer_function_t transfer_function;
   aom_chroma_sample_position_t chroma_sample_position;
-#endif
   int color_range;
   int render_width;
   int render_height;
@@ -320,7 +334,6 @@ typedef struct TileDataEnc {
 } TileDataEnc;
 
 typedef struct RD_COUNTS {
-  av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
   int64_t comp_pred_diff[REFERENCE_MODES];
 #if CONFIG_GLOBAL_MOTION
   // Stores number of 4x4 blocks using global motion per reference frame.
@@ -334,8 +347,9 @@ typedef struct ThreadData {
   MACROBLOCK mb;
   RD_COUNTS rd_counts;
   FRAME_COUNTS *counts;
-
+#if !CONFIG_CB4X4
   PICK_MODE_CONTEXT *leaf_tree;
+#endif
   PC_TREE *pc_tree;
   PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
 #if CONFIG_MOTION_VAR
@@ -345,9 +359,7 @@ typedef struct ThreadData {
   uint8_t *left_pred_buf;
 #endif
 
-#if CONFIG_PALETTE
   PALETTE_BUFFER *palette_buffer;
-#endif  // CONFIG_PALETTE
 } ThreadData;
 
 struct EncWorkerData;
@@ -381,6 +393,9 @@ typedef struct AV1_COMP {
   QUANTS quants;
   ThreadData td;
   MB_MODE_INFO_EXT *mbmi_ext_base;
+#if CONFIG_LV_MAP
+  CB_COEFF_BUFFER *coeff_buffer_base;
+#endif
   Dequants dequants;
   AV1_COMMON common;
   AV1EncoderConfig oxcf;
@@ -396,6 +411,15 @@ typedef struct AV1_COMP {
 
   // For a still frame, this flag is set to 1 to skip partition search.
   int partition_search_skippable_frame;
+#if CONFIG_AMVR
+  double csm_rate_array[32];
+  double m_rate_array[32];
+  int rate_size;
+  int rate_index;
+  hash_table *previsou_hash_table;
+  int previsous_index;
+  int cur_poc;  // DebugInfo
+#endif
 
   int scaled_ref_idx[TOTAL_REFS_PER_FRAME];
 #if CONFIG_EXT_REFS
@@ -405,9 +429,14 @@ typedef struct AV1_COMP {
 #endif  // CONFIG_EXT_REFS
   int gld_fb_idx;
 #if CONFIG_EXT_REFS
-  int bwd_fb_idx;  // BWD_REF_FRAME
-#endif             // CONFIG_EXT_REFS
+  int bwd_fb_idx;   // BWDREF_FRAME
+  int alt2_fb_idx;  // ALTREF2_FRAME
+#endif              // CONFIG_EXT_REFS
   int alt_fb_idx;
+#if CONFIG_EXT_REFS
+  int ext_fb_idx;      // extra ref frame buffer index
+  int refresh_fb_idx;  // ref frame buffer index to refresh
+#endif                 // CONFIG_EXT_REFS
 
   int last_show_frame_buf_idx;  // last show frame buffer index
 
@@ -415,6 +444,7 @@ typedef struct AV1_COMP {
   int refresh_golden_frame;
 #if CONFIG_EXT_REFS
   int refresh_bwd_ref_frame;
+  int refresh_alt2_ref_frame;
 #endif  // CONFIG_EXT_REFS
   int refresh_alt_ref_frame;
 
@@ -441,6 +471,11 @@ typedef struct AV1_COMP {
 
   CODING_CONTEXT coding_context;
 
+#if CONFIG_GLOBAL_MOTION
+  int gmtype_cost[TRANS_TYPES];
+  int gmparams_cost[TOTAL_REFS_PER_FRAME];
+#endif  // CONFIG_GLOBAL_MOTION
+
   int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
   int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
 
@@ -534,77 +569,17 @@ typedef struct AV1_COMP {
                     // number of MBs in the current frame when the frame is
                     // scaled.
 
+  // When resize is triggered through external control, the desired width/height
+  // are stored here until use in the next frame coded. They are effective only
+  // for
+  // one frame and are reset after use.
+  int resize_pending_width;
+  int resize_pending_height;
+
   int frame_flags;
 
   search_site_config ss_cfg;
 
-  int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
-  int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
-  int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
-  int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
-  int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
-
-  unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-#if CONFIG_EXT_INTER
-  unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
-                                       [INTER_COMPOUND_MODES];
-#if CONFIG_COMPOUND_SINGLEREF
-  unsigned int inter_singleref_comp_mode_cost[INTER_MODE_CONTEXTS]
-                                             [INTER_SINGLEREF_COMP_MODES];
-#endif  // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_INTERINTRA
-  unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
-#endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-  int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-  int motion_mode_cost1[BLOCK_SIZES_ALL][2];
-#endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
-  int ncobmc_mode_cost[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
-#endif  // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
-#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-  int intra_uv_mode_cost[INTRA_MODES][UV_INTRA_MODES];
-  int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
-  int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
-#if CONFIG_EXT_PARTITION_TYPES
-  int partition_cost[PARTITION_CONTEXTS + CONFIG_UNPOISON_PARTITION_CTX]
-                    [EXT_PARTITION_TYPES];
-#else
-  int partition_cost[PARTITION_CONTEXTS + CONFIG_UNPOISON_PARTITION_CTX]
-                    [PARTITION_TYPES];
-#endif
-#if CONFIG_PALETTE
-  int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
-  int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
-  int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
-                          [PALETTE_COLORS];
-  int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
-                           [PALETTE_COLORS];
-#endif  // CONFIG_PALETTE
-  int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
-#if CONFIG_EXT_TX
-  int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
-  int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                         [TX_TYPES];
-#else
-  int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
-  int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
-#endif  // CONFIG_EXT_TX
-#if CONFIG_EXT_INTRA
-#if CONFIG_INTRA_INTERP
-  int intra_filter_cost[INTRA_FILTERS + 1][INTRA_FILTERS];
-#endif  // CONFIG_INTRA_INTERP
-#endif  // CONFIG_EXT_INTRA
-#if CONFIG_LOOP_RESTORATION
-  int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
-#endif  // CONFIG_LOOP_RESTORATION
-#if CONFIG_GLOBAL_MOTION
-  int gmtype_cost[TRANS_TYPES];
-  int gmparams_cost[TOTAL_REFS_PER_FRAME];
-#endif  // CONFIG_GLOBAL_MOTION
-
   int multi_arf_allowed;
   int multi_arf_enabled;
   int multi_arf_last_grp_enabled;
@@ -639,25 +614,24 @@ typedef struct AV1_COMP {
   int is_arf_filter_off[MAX_EXT_ARFS + 1];
   int num_extra_arfs;
   int arf_map[MAX_EXT_ARFS + 1];
+  int arf_pos_in_gf[MAX_EXT_ARFS + 1];
+  int arf_pos_for_ovrly[MAX_EXT_ARFS + 1];
 #endif  // CONFIG_EXT_REFS
 #if CONFIG_GLOBAL_MOTION
   int global_motion_search_done;
 #endif
-#if CONFIG_REFERENCE_BUFFER
-  SequenceHeader seq_params;
-#endif
 #if CONFIG_LV_MAP
   tran_low_t *tcoeff_buf[MAX_MB_PLANE];
 #endif
 
-#if CONFIG_SPEED_REFS
-  int sb_scanning_pass_idx;
-#endif  // CONFIG_SPEED_REFS
-
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
   int extra_arf_allowed;
   int bwd_ref_allowed;
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
+
+#if CONFIG_BGSPRITE
+  int bgsprite_allowed;
+#endif  // CONFIG_BGSPRITE
 } AV1_COMP;
 
 void av1_initialize_enc(void);
@@ -686,11 +660,9 @@ int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags);
 
 void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags);
 
-int av1_copy_reference_enc(AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag,
-                           YV12_BUFFER_CONFIG *sd);
+int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
 
-int av1_set_reference_enc(AV1_COMP *cpi, AOM_REFFRAME ref_frame_flag,
-                          YV12_BUFFER_CONFIG *sd);
+int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd);
 
 int av1_update_entropy(AV1_COMP *cpi, int update);
 
@@ -701,14 +673,8 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
 int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode,
                           AOM_SCALING vert_mode);
 
-// Returns 1 if the assigned width or height was <= 0.
-int av1_set_size_literal(AV1_COMP *cpi, int width, int height);
-
 int av1_get_quantizer(struct AV1_COMP *cpi);
 
-void av1_full_to_model_counts(av1_coeff_count_model *model_count,
-                              av1_coeff_count *full_count);
-
 static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
          (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
@@ -727,6 +693,8 @@ static INLINE int get_ref_frame_map_idx(const AV1_COMP *cpi,
 #if CONFIG_EXT_REFS
   else if (ref_frame == BWDREF_FRAME)
     return cpi->bwd_fb_idx;
+  else if (ref_frame == ALTREF2_FRAME)
+    return cpi->alt2_fb_idx;
 #endif  // CONFIG_EXT_REFS
   else
     return cpi->alt_fb_idx;
@@ -739,6 +707,17 @@ static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi,
   return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
 }
 
+#if CONFIG_HASH_ME
+static INLINE hash_table *get_ref_frame_hash_map(const AV1_COMP *cpi,
+                                                 MV_REFERENCE_FRAME ref_frame) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+  return buf_idx != INVALID_IDX
+             ? &cm->buffer_pool->frame_bufs[buf_idx].hash_table
+             : NULL;
+}
+#endif
+
 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
     const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
   const AV1_COMMON *const cm = &cpi->common;
@@ -781,13 +760,6 @@ static INLINE unsigned int allocated_tokens(TileInfo tile) {
   return get_token_alloc(tile_mb_rows, tile_mb_cols);
 }
 
-void av1_alloc_compressor_data(AV1_COMP *cpi);
-
-void av1_scale_references(AV1_COMP *cpi);
-
-void av1_update_reference_frames(AV1_COMP *cpi);
-
-void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv);
 #if CONFIG_TEMPMV_SIGNALING
 void av1_set_temporal_mv_prediction(AV1_COMP *cpi, int allow_tempmv_prediction);
 #endif
diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c
index 3aa4c183e..6209d6fa4 100644
--- a/third_party/aom/av1/encoder/encodetxb.c
+++ b/third_party/aom/av1/encoder/encodetxb.c
@@ -38,7 +38,14 @@ void av1_alloc_txb_buf(AV1_COMP *cpi) {
         aom_malloc(sizeof(*cpi->tcoeff_buf[i]) * pixel_stride * pixel_height));
   }
 #else
-  (void)cpi;
+  AV1_COMMON *cm = &cpi->common;
+  int size = ((cm->mi_rows >> MAX_MIB_SIZE_LOG2) + 1) *
+             ((cm->mi_cols >> MAX_MIB_SIZE_LOG2) + 1);
+
+  av1_free_txb_buf(cpi);
+  // TODO(jingning): This should be further reduced.
+  CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
+                  aom_malloc(sizeof(*cpi->coeff_buffer_base) * size));
 #endif
 }
 
@@ -49,10 +56,27 @@ void av1_free_txb_buf(AV1_COMP *cpi) {
     aom_free(cpi->tcoeff_buf[i]);
   }
 #else
-  (void)cpi;
+  aom_free(cpi->coeff_buffer_base);
 #endif
 }
 
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                          int mi_row, int mi_col) {
+  int stride = (cpi->common.mi_cols >> MAX_MIB_SIZE_LOG2) + 1;
+  int offset =
+      (mi_row >> MAX_MIB_SIZE_LOG2) * stride + (mi_col >> MAX_MIB_SIZE_LOG2);
+  CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset];
+  const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+  for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset;
+    x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset;
+    x->mbmi_ext->txb_skip_ctx[plane] =
+        coeff_buf->txb_skip_ctx[plane] + txb_offset;
+    x->mbmi_ext->dc_sign_ctx[plane] =
+        coeff_buf->dc_sign_ctx[plane] + txb_offset;
+  }
+}
+
 static void write_golomb(aom_writer *w, int level) {
   int x = level + 1;
   int i = x;
@@ -69,12 +93,178 @@ static void write_golomb(aom_writer *w, int level) {
   for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01);
 }
 
+static INLINE void write_nz_map(aom_writer *w, const tran_low_t *tcoeff,
+                                uint16_t eob, int plane, const int16_t *scan,
+                                TX_SIZE tx_size, TX_TYPE tx_type,
+                                FRAME_CONTEXT *fc) {
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int height = tx_size_high[tx_size];
+#if CONFIG_CTX1D
+  const int width = tx_size_wide[tx_size];
+  const int eob_offset = width + height;
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int seg_eob =
+      (tx_class == TX_CLASS_2D) ? tx_size_2d[tx_size] : eob_offset;
+#else
+  const int seg_eob = tx_size_2d[tx_size];
+#endif
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+  aom_prob *eob_flag = fc->eob_flag[txs_ctx][plane_type];
+#endif
+
+  for (int c = 0; c < eob; ++c) {
+    int coeff_ctx = get_nz_map_ctx(tcoeff, c, scan, bwl, height, tx_type);
+    int eob_ctx = get_eob_ctx(tcoeff, scan[c], txs_ctx, tx_type);
+
+    tran_low_t v = tcoeff[scan[c]];
+    int is_nz = (v != 0);
+
+    if (c == seg_eob - 1) break;
+
+#if LV_MAP_PROB
+    aom_write_bin(w, is_nz, fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2);
+#else
+    aom_write(w, is_nz, nz_map[coeff_ctx]);
+#endif
+
+    if (is_nz) {
+#if LV_MAP_PROB
+      aom_write_bin(w, c == (eob - 1),
+                    fc->eob_flag_cdf[txs_ctx][plane_type][eob_ctx], 2);
+#else
+      aom_write(w, c == (eob - 1), eob_flag[eob_ctx]);
+#endif
+    }
+  }
+}
+
+#if CONFIG_CTX1D
+static INLINE void write_nz_map_vert(aom_writer *w, const tran_low_t *tcoeff,
+                                     uint16_t eob, int plane,
+                                     const int16_t *scan, const int16_t *iscan,
+                                     TX_SIZE tx_size, TX_TYPE tx_type,
+                                     FRAME_CONTEXT *fc) {
+  (void)eob;
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_vert(eob_ls, tcoeff, width, height);
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+#endif
+  for (int c = 0; c < width; ++c) {
+    int16_t veob = eob_ls[c];
+    assert(veob <= height);
+    int el_ctx = get_empty_line_ctx(c, eob_ls);
+#if LV_MAP_PROB
+    aom_write_bin(w, veob == 0,
+                  fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx], 2);
+#else
+    aom_write(w, veob == 0,
+              fc->empty_line[txs_ctx][plane_type][tx_class][el_ctx]);
+#endif
+    if (veob) {
+      for (int r = 0; r < veob; ++r) {
+        if (r + 1 != height) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = tcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeff, scan_idx, scan, bwl, height, tx_type);
+#if LV_MAP_PROB
+          aom_write_bin(w, is_nz,
+                        fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2);
+#else
+          aom_write(w, is_nz, nz_map[coeff_ctx]);
+#endif
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(c, r, eob_ls);
+#if LV_MAP_PROB
+            aom_write_bin(
+                w, r == veob - 1,
+                fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx], 2);
+#else
+            aom_write(w, r == veob - 1,
+                      fc->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx]);
+#endif
+          }
+        }
+      }
+    }
+  }
+}
+
+static INLINE void write_nz_map_horiz(aom_writer *w, const tran_low_t *tcoeff,
+                                      uint16_t eob, int plane,
+                                      const int16_t *scan, const int16_t *iscan,
+                                      TX_SIZE tx_size, TX_TYPE tx_type,
+                                      FRAME_CONTEXT *fc) {
+  (void)scan;
+  (void)eob;
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_horiz(eob_ls, tcoeff, width, height);
+#if !LV_MAP_PROB
+  aom_prob *nz_map = fc->nz_map[txs_ctx][plane_type];
+#endif
+  for (int r = 0; r < height; ++r) {
+    int16_t heob = eob_ls[r];
+    int el_ctx = get_empty_line_ctx(r, eob_ls);
+#if LV_MAP_PROB
+    aom_write_bin(w, heob == 0,
+                  fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx], 2);
+#else
+    aom_write(w, heob == 0,
+              fc->empty_line[txs_ctx][plane_type][tx_class][el_ctx]);
+#endif
+    if (heob) {
+      for (int c = 0; c < heob; ++c) {
+        if (c + 1 != width) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = tcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeff, scan_idx, scan, bwl, height, tx_type);
+#if LV_MAP_PROB
+          aom_write_bin(w, is_nz,
+                        fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], 2);
+#else
+          aom_write(w, is_nz, nz_map[coeff_ctx]);
+#endif
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(r, c, eob_ls);
+#if LV_MAP_PROB
+            aom_write_bin(
+                w, c == heob - 1,
+                fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx], 2);
+#else
+            aom_write(w, c == heob - 1,
+                      fc->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx]);
+#endif
+          }
+        }
+      }
+    }
+  }
+}
+#endif
+
 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                           aom_writer *w, int blk_row, int blk_col, int block,
                           int plane, TX_SIZE tx_size, const tran_low_t *tcoeff,
                           uint16_t eob, TXB_CTX *txb_ctx) {
-  aom_prob *nz_map;
-  aom_prob *eob_flag;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const PLANE_TYPE plane_type = get_plane_type(plane);
   const TX_SIZE txs_ctx = get_txsize_context(tx_size);
@@ -82,18 +272,21 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
   int c;
-  int is_nz;
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
   const int height = tx_size_high[tx_size];
-  const int seg_eob = tx_size_2d[tx_size];
   uint16_t update_eob = 0;
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
   (void)blk_row;
   (void)blk_col;
 
-  aom_write(w, eob == 0, cm->fc->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx]);
+#if LV_MAP_PROB
+  aom_write_bin(w, eob == 0,
+                ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2);
+#else
+  aom_write(w, eob == 0, ec_ctx->txb_skip[txs_ctx][txb_ctx->txb_skip_ctx]);
+#endif
 
   if (eob == 0) return;
 #if CONFIG_TXK_SEL
@@ -101,29 +294,42 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                     get_min_tx_size(tx_size), w);
 #endif
 
-  nz_map = cm->fc->nz_map[txs_ctx][plane_type];
-  eob_flag = cm->fc->eob_flag[txs_ctx][plane_type];
-
-  for (c = 0; c < eob; ++c) {
-    int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan);
-    int eob_ctx = get_eob_ctx(tcoeff, scan[c], txs_ctx);
-
-    tran_low_t v = tcoeff[scan[c]];
-    is_nz = (v != 0);
-
-    if (c == seg_eob - 1) break;
-
-    aom_write(w, is_nz, nz_map[coeff_ctx]);
-
-    if (is_nz) {
-      aom_write(w, c == (eob - 1), eob_flag[eob_ctx]);
+#if CONFIG_CTX1D
+  TX_CLASS tx_class = get_tx_class(tx_type);
+  if (tx_class == TX_CLASS_2D) {
+    write_nz_map(w, tcoeff, eob, plane, scan, tx_size, tx_type, ec_ctx);
+  } else {
+    const int width = tx_size_wide[tx_size];
+    const int eob_offset = width + height;
+    const int eob_mode = eob > eob_offset;
+#if LV_MAP_PROB
+    aom_write_bin(w, eob_mode,
+                  ec_ctx->eob_mode_cdf[txs_ctx][plane_type][tx_class], 2);
+#else
+    aom_write(w, eob_mode, ec_ctx->eob_mode[txs_ctx][plane_type][tx_class]);
+#endif
+    if (eob_mode == 0) {
+      write_nz_map(w, tcoeff, eob, plane, scan, tx_size, tx_type, ec_ctx);
+    } else {
+      const int16_t *iscan = scan_order->iscan;
+      assert(tx_class == TX_CLASS_VERT || tx_class == TX_CLASS_HORIZ);
+      if (tx_class == TX_CLASS_VERT)
+        write_nz_map_vert(w, tcoeff, eob, plane, scan, iscan, tx_size, tx_type,
+                          ec_ctx);
+      else
+        write_nz_map_horiz(w, tcoeff, eob, plane, scan, iscan, tx_size, tx_type,
+                           ec_ctx);
     }
   }
+#else
+  write_nz_map(w, tcoeff, eob, plane, scan, tx_size, tx_type, ec_ctx);
+#endif  // CONFIG_CTX1D
 
   int i;
   for (i = 0; i < NUM_BASE_LEVELS; ++i) {
-    aom_prob *coeff_base = cm->fc->coeff_base[txs_ctx][plane_type][i];
-
+#if !LV_MAP_PROB
+    aom_prob *coeff_base = ec_ctx->coeff_base[txs_ctx][plane_type][i];
+#endif
     update_eob = 0;
     for (c = eob - 1; c >= 0; --c) {
       tran_low_t v = tcoeff[scan[c]];
@@ -136,15 +342,32 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
       ctx = get_base_ctx(tcoeff, scan[c], bwl, height, i + 1);
 
       if (level == i + 1) {
+#if LV_MAP_PROB
+        aom_write_bin(w, 1, ec_ctx->coeff_base_cdf[txs_ctx][plane_type][i][ctx],
+                      2);
+#else
         aom_write(w, 1, coeff_base[ctx]);
+#endif
         if (c == 0) {
-          aom_write(w, sign, cm->fc->dc_sign[plane_type][txb_ctx->dc_sign_ctx]);
+#if LV_MAP_PROB
+          aom_write_bin(w, sign,
+                        ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx],
+                        2);
+#else
+          aom_write(w, sign, ec_ctx->dc_sign[plane_type][txb_ctx->dc_sign_ctx]);
+#endif
         } else {
           aom_write_bit(w, sign);
         }
         continue;
       }
+
+#if LV_MAP_PROB
+      aom_write_bin(w, 0, ec_ctx->coeff_base_cdf[txs_ctx][plane_type][i][ctx],
+                    2);
+#else
       aom_write(w, 0, coeff_base[ctx]);
+#endif
       update_eob = AOMMAX(update_eob, c);
     }
   }
@@ -159,21 +382,70 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
     if (level <= NUM_BASE_LEVELS) continue;
 
     if (c == 0) {
-      aom_write(w, sign, cm->fc->dc_sign[plane_type][txb_ctx->dc_sign_ctx]);
+#if LV_MAP_PROB
+      aom_write_bin(w, sign,
+                    ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2);
+#else
+      aom_write(w, sign, ec_ctx->dc_sign[plane_type][txb_ctx->dc_sign_ctx]);
+#endif
     } else {
       aom_write_bit(w, sign);
     }
 
     // level is above 1.
     ctx = get_br_ctx(tcoeff, scan[c], bwl, height);
+
+#if BR_NODE
+    int base_range = level - 1 - NUM_BASE_LEVELS;
+    int br_set_idx = 0;
+    int br_base = 0;
+    int br_offset = 0;
+
+    if (base_range >= COEFF_BASE_RANGE)
+      br_set_idx = BASE_RANGE_SETS;
+    else
+      br_set_idx = coeff_to_br_index[base_range];
+
+    for (idx = 0; idx < BASE_RANGE_SETS; ++idx) {
+      aom_write_bin(w, idx == br_set_idx,
+                    ec_ctx->coeff_br_cdf[txs_ctx][plane_type][idx][ctx], 2);
+      if (idx == br_set_idx) {
+        br_base = br_index_to_coeff[br_set_idx];
+        br_offset = base_range - br_base;
+        int extra_bits = (1 << br_extra_bits[idx]) - 1;
+        for (int tok = 0; tok < extra_bits; ++tok) {
+          if (tok == br_offset) {
+            aom_write_bin(w, 1, ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx],
+                          2);
+            break;
+          }
+          aom_write_bin(w, 0, ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx],
+                        2);
+        }
+        //        aom_write_literal(w, br_offset, br_extra_bits[idx]);
+        break;
+      }
+    }
+
+    if (br_set_idx < BASE_RANGE_SETS) continue;
+#else  // BR_NODE
     for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
       if (level == (idx + 1 + NUM_BASE_LEVELS)) {
-        aom_write(w, 1, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]);
+#if LV_MAP_PROB
+        aom_write_bin(w, 1, ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx], 2);
+#else
+        aom_write(w, 1, ec_ctx->coeff_lps[txs_ctx][plane_type][ctx]);
+#endif
         break;
       }
-      aom_write(w, 0, cm->fc->coeff_lps[txs_ctx][plane_type][ctx]);
+#if LV_MAP_PROB
+      aom_write_bin(w, 0, ec_ctx->coeff_lps_cdf[txs_ctx][plane_type][ctx], 2);
+#else
+      aom_write(w, 0, ec_ctx->coeff_lps[txs_ctx][plane_type][ctx]);
+#endif
     }
     if (idx < COEFF_BASE_RANGE) continue;
+#endif  // BR_NODE
 
     // use 0-th order Golomb code to handle the residual level.
     write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
@@ -247,50 +519,164 @@ static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs,
   }
 
   for (i = 0; i < NUM_BASE_LEVELS; ++i) {
-    ctx_set[i] = (ctx_set[i] + 1) >> 1;
-
-    if (row == 0 && col == 0)
-      ctx_set[i] = (ctx_set[i] << 1) + mag[i];
-    else if (row == 0)
-      ctx_set[i] = 8 + (ctx_set[i] << 1) + mag[i];
-    else if (col == 0)
-      ctx_set[i] = 18 + (ctx_set[i] << 1) + mag[i];
-    else
-      ctx_set[i] = 28 + (ctx_set[i] << 1) + mag[i];
+    ctx_set[i] = get_base_ctx_from_count_mag(row, col, ctx_set[i], mag[i]);
   }
   return;
 }
 
 static INLINE int get_br_cost(tran_low_t abs_qc, int ctx,
-                              const aom_prob *coeff_lps) {
+                              const int *coeff_lps) {
   const tran_low_t min_level = 1 + NUM_BASE_LEVELS;
   const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE;
+  (void)ctx;
   if (abs_qc >= min_level) {
-    const int cost0 = av1_cost_bit(coeff_lps[ctx], 0);
-    const int cost1 = av1_cost_bit(coeff_lps[ctx], 1);
+#if BR_NODE
+    if (abs_qc >= max_level)
+      return coeff_lps[COEFF_BASE_RANGE];  // COEFF_BASE_RANGE * cost0;
+    else
+      return coeff_lps[(abs_qc - min_level)];  //  * cost0 + cost1;
+#else
+    const int cost0 = coeff_lps[0];
+    const int cost1 = coeff_lps[1];
     if (abs_qc >= max_level)
       return COEFF_BASE_RANGE * cost0;
     else
       return (abs_qc - min_level) * cost0 + cost1;
+#endif
   } else {
     return 0;
   }
 }
 
 static INLINE int get_base_cost(tran_low_t abs_qc, int ctx,
-                                aom_prob (*coeff_base)[COEFF_BASE_CONTEXTS],
-                                int base_idx) {
+                                const int coeff_base[2], int base_idx) {
   const int level = base_idx + 1;
+  (void)ctx;
   if (abs_qc < level)
     return 0;
   else
-    return av1_cost_bit(coeff_base[base_idx][ctx], abs_qc == level);
+    return coeff_base[abs_qc == level];
+}
+
+int get_nz_eob_map_cost(const LV_MAP_COEFF_COST *coeff_costs,
+                        const tran_low_t *qcoeff, uint16_t eob, int plane,
+                        const int16_t *scan, TX_SIZE tx_size, TX_TYPE tx_type) {
+  (void)plane;
+  TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int height = tx_size_high[tx_size];
+#if CONFIG_CTX1D
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int width = tx_size_wide[tx_size];
+  const int eob_offset = width + height;
+  const int seg_eob =
+      (tx_class == TX_CLASS_2D) ? tx_size_2d[tx_size] : eob_offset;
+#else
+  const int seg_eob = tx_size_2d[tx_size];
+#endif
+  int cost = 0;
+  for (int c = 0; c < eob; ++c) {
+    tran_low_t v = qcoeff[scan[c]];
+    int is_nz = (v != 0);
+    if (c + 1 != seg_eob) {
+      int coeff_ctx = get_nz_map_ctx(qcoeff, c, scan, bwl, height, tx_type);
+      cost += coeff_costs->nz_map_cost[coeff_ctx][is_nz];
+      if (is_nz) {
+        int eob_ctx = get_eob_ctx(qcoeff, scan[c], txs_ctx, tx_type);
+        cost += coeff_costs->eob_cost[eob_ctx][c == (eob - 1)];
+      }
+    }
+  }
+  return cost;
+}
+
+#if CONFIG_CTX1D
+static INLINE int get_nz_eob_map_cost_vert(const LV_MAP_COEFF_COST *coeff_costs,
+                                           const tran_low_t *qcoeff,
+                                           uint16_t eob, int plane,
+                                           const int16_t *scan,
+                                           const int16_t *iscan,
+                                           TX_SIZE tx_size, TX_TYPE tx_type) {
+  (void)tx_size;
+  (void)scan;
+  (void)eob;
+  (void)plane;
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_vert(eob_ls, qcoeff, width, height);
+  int cost = 0;
+  for (int c = 0; c < width; ++c) {
+    int16_t veob = eob_ls[c];
+    assert(veob <= height);
+    int el_ctx = get_empty_line_ctx(c, eob_ls);
+    cost += coeff_costs->empty_line_cost[tx_class][el_ctx][veob == 0];
+    if (veob) {
+      for (int r = 0; r < veob; ++r) {
+        if (r + 1 != height) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = qcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(qcoeff, scan_idx, scan, bwl, height, tx_type);
+          cost += coeff_costs->nz_map_cost[coeff_ctx][is_nz];
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(c, r, eob_ls);
+            cost += coeff_costs->hv_eob_cost[tx_class][eob_ctx][r == veob - 1];
+          }
+        }
+      }
+    }
+  }
+  return cost;
+}
+
+static INLINE int get_nz_eob_map_cost_horiz(
+    const LV_MAP_COEFF_COST *coeff_costs, const tran_low_t *qcoeff,
+    uint16_t eob, int plane, const int16_t *scan, const int16_t *iscan,
+    TX_SIZE tx_size, TX_TYPE tx_type) {
+  (void)tx_size;
+  (void)scan;
+  (void)eob;
+  (void)plane;
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_horiz(eob_ls, qcoeff, width, height);
+  int cost = 0;
+  for (int r = 0; r < height; ++r) {
+    int16_t heob = eob_ls[r];
+    assert(heob <= width);
+    int el_ctx = get_empty_line_ctx(r, eob_ls);
+    cost += coeff_costs->empty_line_cost[tx_class][el_ctx][heob == 0];
+    if (heob) {
+      for (int c = 0; c < heob; ++c) {
+        if (c + 1 != width) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = qcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(qcoeff, scan_idx, scan, bwl, height, tx_type);
+          cost += coeff_costs->nz_map_cost[coeff_ctx][is_nz];
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(r, c, eob_ls);
+            cost += coeff_costs->hv_eob_cost[tx_class][eob_ctx][c == heob - 1];
+          }
+        }
+      }
+    }
+  }
+  return cost;
 }
+#endif
 
-int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
+int av1_cost_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                         int blk_row, int blk_col, int block, TX_SIZE tx_size,
                         TXB_CTX *txb_ctx) {
-  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   TX_SIZE txs_ctx = get_txsize_context(tx_size);
   const PLANE_TYPE plane_type = get_plane_type(plane);
@@ -301,43 +687,62 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
   const int eob = p->eobs[block];
   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   int c, cost;
-  const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1);
   int txb_skip_ctx = txb_ctx->txb_skip_ctx;
-  aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type];
 
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
   const int height = tx_size_high[tx_size];
 
-  aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] =
-      xd->fc->coeff_base[txs_ctx][plane_type];
-
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
+
+  LV_MAP_COEFF_COST *coeff_costs = &x->coeff_costs[txs_ctx][plane_type];
 
   cost = 0;
 
   if (eob == 0) {
-    cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 1);
+    cost = coeff_costs->txb_skip_cost[txb_skip_ctx][1];
     return cost;
   }
-
-  cost = av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_skip_ctx], 0);
+  cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
 
 #if CONFIG_TXK_SEL
-  cost += av1_tx_type_cost(cpi, xd, mbmi->sb_type, plane, tx_size, tx_type);
+  cost += av1_tx_type_cost(cm, x, xd, mbmi->sb_type, plane, tx_size, tx_type);
 #endif
 
+#if CONFIG_CTX1D
+  TX_CLASS tx_class = get_tx_class(tx_type);
+  if (tx_class == TX_CLASS_2D) {
+    cost += get_nz_eob_map_cost(coeff_costs, qcoeff, eob, plane, scan, tx_size,
+                                tx_type);
+  } else {
+    const int width = tx_size_wide[tx_size];
+    const int eob_offset = width + height;
+    const int eob_mode = eob > eob_offset;
+    cost += coeff_costs->eob_mode_cost[tx_class][eob_mode];
+    if (eob_mode == 0) {
+      cost += get_nz_eob_map_cost(coeff_costs, qcoeff, eob, plane, scan,
+                                  tx_size, tx_type);
+    } else {
+      const int16_t *iscan = scan_order->iscan;
+      assert(tx_class == TX_CLASS_VERT || tx_class == TX_CLASS_HORIZ);
+      if (tx_class == TX_CLASS_VERT)
+        cost += get_nz_eob_map_cost_vert(coeff_costs, qcoeff, eob, plane, scan,
+                                         iscan, tx_size, tx_type);
+      else
+        cost += get_nz_eob_map_cost_horiz(coeff_costs, qcoeff, eob, plane, scan,
+                                          iscan, tx_size, tx_type);
+    }
+  }
+#else   // CONFIG_CTX1D
+  cost += get_nz_eob_map_cost(coeff_costs, qcoeff, eob, plane, scan, tx_size,
+                              tx_type);
+#endif  // CONFIG_CTX1D
+
   for (c = 0; c < eob; ++c) {
     tran_low_t v = qcoeff[scan[c]];
     int is_nz = (v != 0);
     int level = abs(v);
 
-    if (c < seg_eob) {
-      int coeff_ctx = get_nz_map_ctx(qcoeff, scan[c], bwl, height, iscan);
-      cost += av1_cost_bit(nz_map[coeff_ctx], is_nz);
-    }
-
     if (is_nz) {
       int ctx_ls[NUM_BASE_LEVELS] = { 0 };
       int sign = (v < 0) ? 1 : 0;
@@ -345,8 +750,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
       // sign bit cost
       if (c == 0) {
         int dc_sign_ctx = txb_ctx->dc_sign_ctx;
-
-        cost += av1_cost_bit(xd->fc->dc_sign[plane_type][dc_sign_ctx], sign);
+        cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
       } else {
         cost += av1_cost_bit(128, sign);
       }
@@ -358,28 +762,33 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
         if (level <= i) continue;
 
         if (level == i + 1) {
-          cost += av1_cost_bit(coeff_base[i][ctx_ls[i]], 1);
+          cost += coeff_costs->base_cost[i][ctx_ls[i]][1];
           continue;
         }
-        cost += av1_cost_bit(coeff_base[i][ctx_ls[i]], 0);
+        cost += coeff_costs->base_cost[i][ctx_ls[i]][0];
       }
 
       if (level > NUM_BASE_LEVELS) {
-        int idx;
         int ctx;
-
         ctx = get_br_ctx(qcoeff, scan[c], bwl, height);
+#if BR_NODE
+        int base_range = level - 1 - NUM_BASE_LEVELS;
+        if (base_range < COEFF_BASE_RANGE) {
+          cost += coeff_costs->lps_cost[ctx][base_range];
+        } else {
+          cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
+        }
 
-        for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
+#else
+        for (int idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
           if (level == (idx + 1 + NUM_BASE_LEVELS)) {
-            cost +=
-                av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 1);
+            cost += coeff_costs->lps_cost[ctx][1];
             break;
           }
-          cost += av1_cost_bit(xd->fc->coeff_lps[txs_ctx][plane_type][ctx], 0);
+          cost += coeff_costs->lps_cost[ctx][0];
         }
-
-        if (idx >= COEFF_BASE_RANGE) {
+#endif
+        if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
           // residual cost
           int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
           int ri = r;
@@ -396,12 +805,6 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
             cost += av1_cost_bit(128, (r >> ri) & 0x01);
         }
       }
-
-      if (c < seg_eob) {
-        int eob_ctx = get_eob_ctx(qcoeff, scan[c], txs_ctx);
-        cost += av1_cost_bit(xd->fc->eob_flag[txs_ctx][plane_type][eob_ctx],
-                             c == (eob - 1));
-      }
     }
   }
 
@@ -413,118 +816,17 @@ static INLINE int has_base(tran_low_t qc, int base_idx) {
   return abs(qc) >= level;
 }
 
-static void gen_base_count_mag_arr(int (*base_count_arr)[MAX_TX_SQUARE],
-                                   int (*base_mag_arr)[2],
-                                   const tran_low_t *qcoeff, int stride,
-                                   int height, int eob, const int16_t *scan) {
-  for (int c = 0; c < eob; ++c) {
-    const int coeff_idx = scan[c];  // raster order
-    if (!has_base(qcoeff[coeff_idx], 0)) continue;
-    const int row = coeff_idx / stride;
-    const int col = coeff_idx % stride;
-    int *mag = base_mag_arr[coeff_idx];
-    get_mag(mag, qcoeff, stride, height, row, col, base_ref_offset,
-            BASE_CONTEXT_POSITION_NUM);
-    for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
-      if (!has_base(qcoeff[coeff_idx], i)) continue;
-      int *count = base_count_arr[i] + coeff_idx;
-      *count = get_level_count(qcoeff, stride, height, row, col, i,
-                               base_ref_offset, BASE_CONTEXT_POSITION_NUM);
-    }
-  }
-}
-
-static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff,
-                             int stride, int height, int eob,
-                             const SCAN_ORDER *scan_order) {
-  const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
-  for (int c = 0; c < eob; ++c) {
-    const int coeff_idx = scan[c];  // raster order
-    const int row = coeff_idx / stride;
-    const int col = coeff_idx % stride;
-    nz_count_arr[coeff_idx] =
-        get_nz_count(qcoeff, stride, height, row, col, iscan);
-  }
-}
-
-static void gen_nz_ctx_arr(int (*nz_ctx_arr)[2], int(*nz_count_arr),
-                           const tran_low_t *qcoeff, int bwl, int eob,
-                           const SCAN_ORDER *scan_order) {
-  const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
-  for (int c = 0; c < eob; ++c) {
-    const int coeff_idx = scan[c];  // raster order
-    const int count = nz_count_arr[coeff_idx];
-    nz_ctx_arr[coeff_idx][0] =
-        get_nz_map_ctx_from_count(count, qcoeff, coeff_idx, bwl, iscan);
-  }
-}
-
-static void gen_base_ctx_arr(int (*base_ctx_arr)[MAX_TX_SQUARE][2],
-                             int (*base_count_arr)[MAX_TX_SQUARE],
-                             int (*base_mag_arr)[2], const tran_low_t *qcoeff,
-                             int stride, int eob, const int16_t *scan) {
-  (void)qcoeff;
-  for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
-    for (int c = 0; c < eob; ++c) {
-      const int coeff_idx = scan[c];  // raster order
-      if (!has_base(qcoeff[coeff_idx], i)) continue;
-      const int row = coeff_idx / stride;
-      const int col = coeff_idx % stride;
-      const int count = base_count_arr[i][coeff_idx];
-      const int *mag = base_mag_arr[coeff_idx];
-      const int level = i + 1;
-      base_ctx_arr[i][coeff_idx][0] =
-          get_base_ctx_from_count_mag(row, col, count, mag[0], level);
-    }
-  }
-}
-
 static INLINE int has_br(tran_low_t qc) {
   return abs(qc) >= 1 + NUM_BASE_LEVELS;
 }
 
-static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2],
-                                 const tran_low_t *qcoeff, int stride,
-                                 int height, int eob, const int16_t *scan) {
-  for (int c = 0; c < eob; ++c) {
-    const int coeff_idx = scan[c];  // raster order
-    if (!has_br(qcoeff[coeff_idx])) continue;
-    const int row = coeff_idx / stride;
-    const int col = coeff_idx % stride;
-    int *count = br_count_arr + coeff_idx;
-    int *mag = br_mag_arr[coeff_idx];
-    *count = get_level_count(qcoeff, stride, height, row, col, NUM_BASE_LEVELS,
-                             br_ref_offset, BR_CONTEXT_POSITION_NUM);
-    get_mag(mag, qcoeff, stride, height, row, col, br_ref_offset,
-            BR_CONTEXT_POSITION_NUM);
-  }
-}
-
-static void gen_br_ctx_arr(int (*br_ctx_arr)[2], const int *br_count_arr,
-                           int (*br_mag_arr)[2], const tran_low_t *qcoeff,
-                           int stride, int eob, const int16_t *scan) {
-  (void)qcoeff;
-  for (int c = 0; c < eob; ++c) {
-    const int coeff_idx = scan[c];  // raster order
-    if (!has_br(qcoeff[coeff_idx])) continue;
-    const int row = coeff_idx / stride;
-    const int col = coeff_idx % stride;
-    const int count = br_count_arr[coeff_idx];
-    const int *mag = br_mag_arr[coeff_idx];
-    br_ctx_arr[coeff_idx][0] =
-        get_br_ctx_from_count_mag(row, col, count, mag[0]);
-  }
-}
-
 static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx,
-                                    const aom_prob *dc_sign_prob,
+                                    const int (*dc_sign_cost)[2],
                                     int dc_sign_ctx) {
   const int sign = (qc < 0) ? 1 : 0;
   // sign bit cost
   if (coeff_idx == 0) {
-    return av1_cost_bit(dc_sign_prob[dc_sign_ctx], sign);
+    return dc_sign_cost[dc_sign_ctx][sign];
   } else {
     return av1_cost_bit(128, sign);
   }
@@ -547,42 +849,80 @@ static INLINE int get_golomb_cost(int abs_qc) {
   }
 }
 
-// TODO(angiebird): add static once this function is called
 void gen_txb_cache(TxbCache *txb_cache, TxbInfo *txb_info) {
+  // gen_nz_count_arr
   const int16_t *scan = txb_info->scan_order->scan;
-  gen_nz_count_arr(txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->stride,
-                   txb_info->height, txb_info->eob, txb_info->scan_order);
-  gen_nz_ctx_arr(txb_cache->nz_ctx_arr, txb_cache->nz_count_arr,
-                 txb_info->qcoeff, txb_info->bwl, txb_info->eob,
-                 txb_info->scan_order);
-  gen_base_count_mag_arr(txb_cache->base_count_arr, txb_cache->base_mag_arr,
-                         txb_info->qcoeff, txb_info->stride, txb_info->height,
-                         txb_info->eob, scan);
-  gen_base_ctx_arr(txb_cache->base_ctx_arr, txb_cache->base_count_arr,
-                   txb_cache->base_mag_arr, txb_info->qcoeff, txb_info->stride,
-                   txb_info->eob, scan);
-  gen_br_count_mag_arr(txb_cache->br_count_arr, txb_cache->br_mag_arr,
-                       txb_info->qcoeff, txb_info->stride, txb_info->height,
-                       txb_info->eob, scan);
-  gen_br_ctx_arr(txb_cache->br_ctx_arr, txb_cache->br_count_arr,
-                 txb_cache->br_mag_arr, txb_info->qcoeff, txb_info->stride,
-                 txb_info->eob, scan);
+  const int bwl = txb_info->bwl;
+  const int height = txb_info->height;
+  tran_low_t *qcoeff = txb_info->qcoeff;
+  const BASE_CTX_TABLE *base_ctx_table =
+      txb_info->coeff_ctx_table->base_ctx_table;
+  for (int c = 0; c < txb_info->eob; ++c) {
+    const int coeff_idx = scan[c];  // raster order
+    const int row = coeff_idx >> bwl;
+    const int col = coeff_idx - (row << bwl);
+#if REDUCE_CONTEXT_DEPENDENCY
+    int prev_coeff_idx;
+    int prev_row;
+    int prev_col;
+    if (c > MIN_SCAN_IDX_REDUCE_CONTEXT_DEPENDENCY) {
+      prev_coeff_idx = scan[c - 1];  // raster order
+      prev_row = prev_coeff_idx >> bwl;
+      prev_col = prev_coeff_idx - (prev_row << bwl);
+    } else {
+      prev_coeff_idx = -1;
+      prev_row = -1;
+      prev_col = -1;
+    }
+    txb_cache->nz_count_arr[coeff_idx] =
+        get_nz_count(qcoeff, bwl, height, row, col, prev_row, prev_col);
+#else
+    txb_cache->nz_count_arr[coeff_idx] =
+        get_nz_count(qcoeff, bwl, height, row, col);
+#endif
+    const int nz_count = txb_cache->nz_count_arr[coeff_idx];
+    txb_cache->nz_ctx_arr[coeff_idx] =
+        get_nz_map_ctx_from_count(nz_count, coeff_idx, bwl, txb_info->tx_type);
+
+    // gen_base_count_mag_arr
+    if (!has_base(qcoeff[coeff_idx], 0)) continue;
+    int *base_mag = txb_cache->base_mag_arr[coeff_idx];
+    int count[NUM_BASE_LEVELS];
+    get_base_count_mag(base_mag, count, qcoeff, bwl, height, row, col);
+
+    for (int i = 0; i < NUM_BASE_LEVELS; ++i) {
+      if (!has_base(qcoeff[coeff_idx], i)) break;
+      txb_cache->base_count_arr[i][coeff_idx] = count[i];
+      const int level = i + 1;
+      txb_cache->base_ctx_arr[i][coeff_idx] =
+          base_ctx_table[row != 0][col != 0][base_mag[0] > level][count[i]];
+    }
+
+    // gen_br_count_mag_arr
+    if (!has_br(qcoeff[coeff_idx])) continue;
+    int *br_count = txb_cache->br_count_arr + coeff_idx;
+    int *br_mag = txb_cache->br_mag_arr[coeff_idx];
+    *br_count = get_br_count_mag(br_mag, qcoeff, bwl, height, row, col,
+                                 NUM_BASE_LEVELS);
+    txb_cache->br_ctx_arr[coeff_idx] =
+        get_br_ctx_from_count_mag(row, col, *br_count, br_mag[0]);
+  }
 }
 
-static INLINE aom_prob get_level_prob(int level, int coeff_idx,
-                                      const TxbCache *txb_cache,
-                                      const TxbProbs *txb_probs) {
+static INLINE const int *get_level_prob(int level, int coeff_idx,
+                                        const TxbCache *txb_cache,
+                                        const LV_MAP_COEFF_COST *txb_costs) {
   if (level == 0) {
-    const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0];
-    return txb_probs->nz_map[ctx];
+    const int ctx = txb_cache->nz_ctx_arr[coeff_idx];
+    return txb_costs->nz_map_cost[ctx];
   } else if (level >= 1 && level < 1 + NUM_BASE_LEVELS) {
     const int idx = level - 1;
-    const int ctx = txb_cache->base_ctx_arr[idx][coeff_idx][0];
-    return txb_probs->coeff_base[idx][ctx];
+    const int ctx = txb_cache->base_ctx_arr[idx][coeff_idx];
+    return txb_costs->base_cost[idx][ctx];
   } else if (level >= 1 + NUM_BASE_LEVELS &&
              level < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
-    const int ctx = txb_cache->br_ctx_arr[coeff_idx][0];
-    return txb_probs->coeff_lps[ctx];
+    const int ctx = txb_cache->br_ctx_arr[coeff_idx];
+    return txb_costs->lps_cost[ctx];
   } else if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
     printf("get_level_prob does not support golomb\n");
     assert(0);
@@ -657,7 +997,7 @@ static int neighbor_level_down_update(int *new_count, int *new_mag, int count,
 
 static int try_neighbor_level_down_br(int coeff_idx, int nb_coeff_idx,
                                       const TxbCache *txb_cache,
-                                      const TxbProbs *txb_probs,
+                                      const LV_MAP_COEFF_COST *txb_costs,
                                       const TxbInfo *txb_info) {
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
   const tran_low_t abs_qc = abs(qc);
@@ -676,11 +1016,12 @@ static int try_neighbor_level_down_br(int coeff_idx, int nb_coeff_idx,
   if (update) {
     const int row = coeff_idx >> txb_info->bwl;
     const int col = coeff_idx - (row << txb_info->bwl);
-    const int ctx = txb_cache->br_ctx_arr[coeff_idx][0];
-    const int org_cost = get_br_cost(abs_qc, ctx, txb_probs->coeff_lps);
+    const int ctx = txb_cache->br_ctx_arr[coeff_idx];
+    const int org_cost = get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]);
 
     const int new_ctx = get_br_ctx_from_count_mag(row, col, new_count, new_mag);
-    const int new_cost = get_br_cost(abs_qc, new_ctx, txb_probs->coeff_lps);
+    const int new_cost =
+        get_br_cost(abs_qc, new_ctx, txb_costs->lps_cost[new_ctx]);
     const int cost_diff = -org_cost + new_cost;
     return cost_diff;
   } else {
@@ -690,10 +1031,12 @@ static int try_neighbor_level_down_br(int coeff_idx, int nb_coeff_idx,
 
 static int try_neighbor_level_down_base(int coeff_idx, int nb_coeff_idx,
                                         const TxbCache *txb_cache,
-                                        const TxbProbs *txb_probs,
+                                        const LV_MAP_COEFF_COST *txb_costs,
                                         const TxbInfo *txb_info) {
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
   const tran_low_t abs_qc = abs(qc);
+  const BASE_CTX_TABLE *base_ctx_table =
+      txb_info->coeff_ctx_table->base_ctx_table;
 
   int cost_diff = 0;
   for (int base_idx = 0; base_idx < NUM_BASE_LEVELS; ++base_idx) {
@@ -713,14 +1056,14 @@ static int try_neighbor_level_down_base(int coeff_idx, int nb_coeff_idx,
     if (update) {
       const int row = coeff_idx >> txb_info->bwl;
       const int col = coeff_idx - (row << txb_info->bwl);
-      const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0];
-      const int org_cost =
-          get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx);
+      const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx];
+      const int org_cost = get_base_cost(
+          abs_qc, ctx, txb_costs->base_cost[base_idx][ctx], base_idx);
 
       const int new_ctx =
-          get_base_ctx_from_count_mag(row, col, new_count, new_mag, level);
-      const int new_cost =
-          get_base_cost(abs_qc, new_ctx, txb_probs->coeff_base, base_idx);
+          base_ctx_table[row != 0][col != 0][new_mag > level][new_count];
+      const int new_cost = get_base_cost(
+          abs_qc, new_ctx, txb_costs->base_cost[base_idx][new_ctx], base_idx);
       cost_diff += -org_cost + new_cost;
     }
   }
@@ -729,7 +1072,7 @@ static int try_neighbor_level_down_base(int coeff_idx, int nb_coeff_idx,
 
 static int try_neighbor_level_down_nz(int coeff_idx, int nb_coeff_idx,
                                       const TxbCache *txb_cache,
-                                      const TxbProbs *txb_probs,
+                                      const LV_MAP_COEFF_COST *txb_costs,
                                       TxbInfo *txb_info) {
   // assume eob doesn't change
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
@@ -746,12 +1089,12 @@ static int try_neighbor_level_down_nz(int coeff_idx, int nb_coeff_idx,
     assert(count > 0);
     txb_info->qcoeff[nb_coeff_idx] = get_lower_coeff(nb_coeff);
     const int new_ctx = get_nz_map_ctx_from_count(
-        count - 1, txb_info->qcoeff, coeff_idx, txb_info->bwl, iscan);
+        count - 1, coeff_idx, txb_info->bwl, txb_info->tx_type);
     txb_info->qcoeff[nb_coeff_idx] = nb_coeff;
-    const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0];
+    const int ctx = txb_cache->nz_ctx_arr[coeff_idx];
     const int is_nz = abs_qc > 0;
-    const int org_cost = av1_cost_bit(txb_probs->nz_map[ctx], is_nz);
-    const int new_cost = av1_cost_bit(txb_probs->nz_map[new_ctx], is_nz);
+    const int org_cost = txb_costs->nz_map_cost[ctx][is_nz];
+    const int new_cost = txb_costs->nz_map_cost[new_ctx][is_nz];
     const int cost_diff = new_cost - org_cost;
     return cost_diff;
   } else {
@@ -761,7 +1104,8 @@ static int try_neighbor_level_down_nz(int coeff_idx, int nb_coeff_idx,
 
 static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx,
                                const TxbCache *txb_cache,
-                               const TxbProbs *txb_probs, TxbInfo *txb_info) {
+                               const LV_MAP_COEFF_COST *txb_costs,
+                               TxbInfo *txb_info) {
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
   if (qc == 0) {
     *low_coeff = 0;
@@ -772,44 +1116,68 @@ static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx,
   int cost_diff;
   if (*low_coeff == 0) {
     const int scan_idx = txb_info->scan_order->iscan[coeff_idx];
-    const aom_prob level_prob =
-        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs);
-    const aom_prob low_level_prob =
-        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
+    const int *level_cost =
+        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_costs);
+    const int *low_level_cost =
+        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_costs);
     if (scan_idx < txb_info->seg_eob) {
       // When level-0, we code the binary of abs_qc > level
       // but when level-k k > 0 we code the binary of abs_qc == level
       // That's why wee need this special treatment for level-0 map
       // TODO(angiebird): make leve-0 consistent to other levels
-      cost_diff = -av1_cost_bit(level_prob, 1) +
-                  av1_cost_bit(low_level_prob, 0) -
-                  av1_cost_bit(low_level_prob, 1);
+      cost_diff = -level_cost[1] + low_level_cost[0] - low_level_cost[1];
     } else {
-      cost_diff = -av1_cost_bit(level_prob, 1);
+      cost_diff = -level_cost[1];
     }
 
     if (scan_idx < txb_info->seg_eob) {
-      const int eob_ctx =
-          get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx);
-      cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx],
-                                scan_idx == (txb_info->eob - 1));
+      const int eob_ctx = get_eob_ctx(txb_info->qcoeff, coeff_idx,
+                                      txb_info->txs_ctx, txb_info->tx_type);
+      cost_diff -=
+          txb_costs->eob_cost[eob_ctx][scan_idx == (txb_info->eob - 1)];
     }
 
     const int sign_cost = get_sign_bit_cost(
-        qc, coeff_idx, txb_probs->dc_sign_prob, txb_info->txb_ctx->dc_sign_ctx);
+        qc, coeff_idx, txb_costs->dc_sign_cost, txb_info->txb_ctx->dc_sign_ctx);
     cost_diff -= sign_cost;
+  } else if (abs_qc <= NUM_BASE_LEVELS) {
+    const int *level_cost =
+        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_costs);
+    const int *low_level_cost =
+        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_costs);
+    cost_diff = -level_cost[1] + low_level_cost[1] - low_level_cost[0];
+  } else if (abs_qc == NUM_BASE_LEVELS + 1) {
+    const int *level_cost =
+        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_costs);
+    const int *low_level_cost =
+        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_costs);
+#if BR_NODE
+    cost_diff = -level_cost[0] + low_level_cost[1] - low_level_cost[0];
+#else
+    cost_diff = -level_cost[1] + low_level_cost[1] - low_level_cost[0];
+#endif
   } else if (abs_qc < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
-    const aom_prob level_prob =
-        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs);
-    const aom_prob low_level_prob =
-        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
-    cost_diff = -av1_cost_bit(level_prob, 1) + av1_cost_bit(low_level_prob, 1) -
-                av1_cost_bit(low_level_prob, 0);
+    const int *level_cost =
+        get_level_prob(abs_qc, coeff_idx, txb_cache, txb_costs);
+    const int *low_level_cost =
+        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_costs);
+
+#if BR_NODE
+    cost_diff = -level_cost[abs_qc - 1 - NUM_BASE_LEVELS] +
+                low_level_cost[abs(*low_coeff) - 1 - NUM_BASE_LEVELS];
+#else
+    cost_diff = -level_cost[1] + low_level_cost[1] - low_level_cost[0];
+#endif
   } else if (abs_qc == 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
-    const aom_prob low_level_prob =
-        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs);
-    cost_diff = -get_golomb_cost(abs_qc) + av1_cost_bit(low_level_prob, 1) -
-                av1_cost_bit(low_level_prob, 0);
+    const int *low_level_cost =
+        get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_costs);
+#if BR_NODE
+    cost_diff = -get_golomb_cost(abs_qc) - low_level_cost[COEFF_BASE_RANGE] +
+                low_level_cost[COEFF_BASE_RANGE - 1];
+#else
+    cost_diff =
+        -get_golomb_cost(abs_qc) + low_level_cost[1] - low_level_cost[0];
+#endif
   } else {
     assert(abs_qc > 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE);
     const tran_low_t abs_low_coeff = abs(*low_coeff);
@@ -831,10 +1199,26 @@ static INLINE int check_br_neighbor(tran_low_t qc) {
   return abs(qc) > BR_MAG_OFFSET;
 }
 
+#define FAST_OPTIMIZE_TXB 1
+
+#if FAST_OPTIMIZE_TXB
+#define ALNB_REF_OFFSET_NUM 2
+static int alnb_ref_offset[ALNB_REF_OFFSET_NUM][2] = {
+  { -1, 0 }, { 0, -1 },
+};
+#define NB_REF_OFFSET_NUM 4
+static int nb_ref_offset[NB_REF_OFFSET_NUM][2] = {
+  { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 },
+};
+#endif  // FAST_OPTIMIZE_TXB
+
 // TODO(angiebird): add static to this function once it's called
 int try_level_down(int coeff_idx, const TxbCache *txb_cache,
-                   const TxbProbs *txb_probs, TxbInfo *txb_info,
-                   int (*cost_map)[COST_MAP_SIZE]) {
+                   const LV_MAP_COEFF_COST *txb_costs, TxbInfo *txb_info,
+                   int (*cost_map)[COST_MAP_SIZE], int fast_mode) {
+#if !FAST_OPTIMIZE_TXB
+  (void)fast_mode;
+#endif
   if (cost_map) {
     for (int i = 0; i < COST_MAP_SIZE; ++i) av1_zero(cost_map[i]);
   }
@@ -849,7 +1233,7 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
   const int scan_idx = iscan[coeff_idx];
   if (scan_idx < eob) {
     const int cost_diff = try_self_level_down(&low_coeff, coeff_idx, txb_cache,
-                                              txb_probs, txb_info);
+                                              txb_costs, txb_info);
     if (cost_map)
       cost_map[0 + COST_MAP_OFFSET][0 + COST_MAP_OFFSET] = cost_diff;
     accu_cost_diff += cost_diff;
@@ -858,19 +1242,33 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
   const int row = coeff_idx >> txb_info->bwl;
   const int col = coeff_idx - (row << txb_info->bwl);
   if (check_nz_neighbor(qc)) {
-    for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) {
-      const int nb_row = row - sig_ref_offset[i][0];
-      const int nb_col = col - sig_ref_offset[i][1];
+#if FAST_OPTIMIZE_TXB
+    int(*ref_offset)[2];
+    int ref_num;
+    if (fast_mode) {
+      ref_offset = alnb_ref_offset;
+      ref_num = ALNB_REF_OFFSET_NUM;
+    } else {
+      ref_offset = sig_ref_offset;
+      ref_num = SIG_REF_OFFSET_NUM;
+    }
+#else
+    int(*ref_offset)[2] = sig_ref_offset;
+    const int ref_num = SIG_REF_OFFSET_NUM;
+#endif
+    for (int i = 0; i < ref_num; ++i) {
+      const int nb_row = row - ref_offset[i][0];
+      const int nb_col = col - ref_offset[i][1];
       const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
 
-      if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-            nb_col < txb_info->stride))
+      if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
+          nb_col >= txb_info->stride)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
       if (nb_scan_idx < eob) {
         const int cost_diff = try_neighbor_level_down_nz(
-            nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+            nb_coeff_idx, coeff_idx, txb_cache, txb_costs, txb_info);
         if (cost_map)
           cost_map[nb_row - row + COST_MAP_OFFSET]
                   [nb_col - col + COST_MAP_OFFSET] += cost_diff;
@@ -880,19 +1278,33 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
   }
 
   if (check_base_neighbor(qc)) {
-    for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) {
-      const int nb_row = row - base_ref_offset[i][0];
-      const int nb_col = col - base_ref_offset[i][1];
+#if FAST_OPTIMIZE_TXB
+    int(*ref_offset)[2];
+    int ref_num;
+    if (fast_mode) {
+      ref_offset = nb_ref_offset;
+      ref_num = NB_REF_OFFSET_NUM;
+    } else {
+      ref_offset = base_ref_offset;
+      ref_num = BASE_CONTEXT_POSITION_NUM;
+    }
+#else
+    int(*ref_offset)[2] = base_ref_offset;
+    int ref_num = BASE_CONTEXT_POSITION_NUM;
+#endif
+    for (int i = 0; i < ref_num; ++i) {
+      const int nb_row = row - ref_offset[i][0];
+      const int nb_col = col - ref_offset[i][1];
       const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
 
-      if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-            nb_col < txb_info->stride))
+      if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
+          nb_col >= txb_info->stride)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
       if (nb_scan_idx < eob) {
         const int cost_diff = try_neighbor_level_down_base(
-            nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+            nb_coeff_idx, coeff_idx, txb_cache, txb_costs, txb_info);
         if (cost_map)
           cost_map[nb_row - row + COST_MAP_OFFSET]
                   [nb_col - col + COST_MAP_OFFSET] += cost_diff;
@@ -902,19 +1314,33 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
   }
 
   if (check_br_neighbor(qc)) {
-    for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) {
-      const int nb_row = row - br_ref_offset[i][0];
-      const int nb_col = col - br_ref_offset[i][1];
+#if FAST_OPTIMIZE_TXB
+    int(*ref_offset)[2];
+    int ref_num;
+    if (fast_mode) {
+      ref_offset = nb_ref_offset;
+      ref_num = NB_REF_OFFSET_NUM;
+    } else {
+      ref_offset = br_ref_offset;
+      ref_num = BR_CONTEXT_POSITION_NUM;
+    }
+#else
+    int(*ref_offset)[2] = br_ref_offset;
+    const int ref_num = BR_CONTEXT_POSITION_NUM;
+#endif
+    for (int i = 0; i < ref_num; ++i) {
+      const int nb_row = row - ref_offset[i][0];
+      const int nb_col = col - ref_offset[i][1];
       const int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
 
-      if (!(nb_row >= 0 && nb_col >= 0 && nb_row < txb_info->height &&
-            nb_col < txb_info->stride))
+      if (nb_row < 0 || nb_col < 0 || nb_row >= txb_info->height ||
+          nb_col >= txb_info->stride)
         continue;
 
       const int nb_scan_idx = iscan[nb_coeff_idx];
       if (nb_scan_idx < eob) {
         const int cost_diff = try_neighbor_level_down_br(
-            nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info);
+            nb_coeff_idx, coeff_idx, txb_cache, txb_costs, txb_info);
         if (cost_map)
           cost_map[nb_row - row + COST_MAP_OFFSET]
                   [nb_col - col + COST_MAP_OFFSET] += cost_diff;
@@ -927,7 +1353,7 @@ int try_level_down(int coeff_idx, const TxbCache *txb_cache,
 }
 
 static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache,
-                              const TxbProbs *txb_probs,
+                              const LV_MAP_COEFF_COST *txb_costs,
                               const TxbInfo *txb_info) {
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
   const int abs_qc = abs(qc);
@@ -935,22 +1361,21 @@ static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache,
   int cost = 0;
   const int scan_idx = txb_info->scan_order->iscan[coeff_idx];
   if (scan_idx < txb_info->seg_eob) {
-    const aom_prob level_prob =
-        get_level_prob(0, coeff_idx, txb_cache, txb_probs);
-    cost += av1_cost_bit(level_prob, qc != 0);
+    const int *level_cost = get_level_prob(0, coeff_idx, txb_cache, txb_costs);
+    cost += level_cost[qc != 0];
   }
 
   if (qc != 0) {
     const int base_idx = 0;
-    const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0];
-    cost += get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx);
+    const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx];
+    cost += get_base_cost(abs_qc, ctx, txb_costs->base_cost[base_idx][ctx],
+                          base_idx);
     if (scan_idx < txb_info->seg_eob) {
-      const int eob_ctx =
-          get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->txs_ctx);
-      cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
-                           scan_idx == (txb_info->eob - 1));
+      const int eob_ctx = get_eob_ctx(txb_info->qcoeff, coeff_idx,
+                                      txb_info->txs_ctx, txb_info->tx_type);
+      cost += txb_costs->eob_cost[eob_ctx][scan_idx == (txb_info->eob - 1)];
     }
-    cost += get_sign_bit_cost(qc, coeff_idx, txb_probs->dc_sign_prob,
+    cost += get_sign_bit_cost(qc, coeff_idx, txb_costs->dc_sign_cost,
                               txb_info->txb_ctx->dc_sign_ctx);
   }
   return cost;
@@ -963,7 +1388,8 @@ static INLINE void set_eob(TxbInfo *txb_info, int eob) {
 
 // TODO(angiebird): add static to this function once it's called
 int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache,
-                   const TxbProbs *txb_probs, TxbInfo *txb_info) {
+                   const LV_MAP_COEFF_COST *txb_costs, TxbInfo *txb_info,
+                   int fast_mode) {
   assert(txb_info->eob > 0);
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
   const int abs_qc = abs(qc);
@@ -976,7 +1402,7 @@ int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache,
   const int scan_idx = iscan[coeff_idx];
   *new_eob = 0;
   int cost_diff = 0;
-  cost_diff -= get_low_coeff_cost(coeff_idx, txb_cache, txb_probs, txb_info);
+  cost_diff -= get_low_coeff_cost(coeff_idx, txb_cache, txb_costs, txb_info);
   // int coeff_cost =
   //     get_coeff_cost(qc, scan_idx, txb_info, txb_probs);
   // if (-cost_diff != coeff_cost) {
@@ -990,26 +1416,27 @@ int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache,
       *new_eob = si + 1;
       break;
     } else {
-      cost_diff -= get_low_coeff_cost(ci, txb_cache, txb_probs, txb_info);
+      cost_diff -= get_low_coeff_cost(ci, txb_cache, txb_costs, txb_info);
     }
   }
 
   const int org_eob = txb_info->eob;
   set_eob(txb_info, *new_eob);
-  cost_diff += try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL);
+  cost_diff += try_level_down(coeff_idx, txb_cache, txb_costs, txb_info, NULL,
+                              fast_mode);
   set_eob(txb_info, org_eob);
 
   if (*new_eob > 0) {
     // Note that get_eob_ctx does NOT actually account for qcoeff, so we don't
     // need to lower down the qcoeff here
-    const int eob_ctx =
-        get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->txs_ctx);
-    cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], 0);
-    cost_diff += av1_cost_bit(txb_probs->eob_flag[eob_ctx], 1);
+    const int eob_ctx = get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1],
+                                    txb_info->txs_ctx, txb_info->tx_type);
+    cost_diff -= txb_costs->eob_cost[eob_ctx][0];
+    cost_diff += txb_costs->eob_cost[eob_ctx][1];
   } else {
     const int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx;
-    cost_diff -= av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0);
-    cost_diff += av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1);
+    cost_diff -= txb_costs->txb_skip_cost[txb_skip_ctx][0];
+    cost_diff += txb_costs->txb_skip_cost[txb_skip_ctx][1];
   }
   return cost_diff;
 }
@@ -1053,17 +1480,19 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
           assert(txb_cache->nz_count_arr[nb_coeff_idx] >= 0);
         }
         const int count = txb_cache->nz_count_arr[nb_coeff_idx];
-        txb_cache->nz_ctx_arr[nb_coeff_idx][0] = get_nz_map_ctx_from_count(
-            count, txb_info->qcoeff, nb_coeff_idx, txb_info->bwl, iscan);
+        txb_cache->nz_ctx_arr[nb_coeff_idx] = get_nz_map_ctx_from_count(
+            count, nb_coeff_idx, txb_info->bwl, txb_info->tx_type);
         // int ref_ctx = get_nz_map_ctx(txb_info->qcoeff, nb_coeff_idx,
-        // txb_info->bwl, iscan);
-        // if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx][0])
+        // txb_info->bwl, tx_type);
+        // if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx])
         //   printf("nz ctx %d ref_ctx %d\n",
-        //   txb_cache->nz_ctx_arr[nb_coeff_idx][0], ref_ctx);
+        //   txb_cache->nz_ctx_arr[nb_coeff_idx], ref_ctx);
       }
     }
   }
 
+  const BASE_CTX_TABLE *base_ctx_table =
+      txb_info->coeff_ctx_table->base_ctx_table;
   for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) {
     const int nb_row = row - base_ref_offset[i][0];
     const int nb_col = col - base_ref_offset[i][1];
@@ -1089,13 +1518,13 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
           assert(txb_cache->base_count_arr[base_idx][nb_coeff_idx] >= 0);
         }
         const int count = txb_cache->base_count_arr[base_idx][nb_coeff_idx];
-        txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0] =
-            get_base_ctx_from_count_mag(nb_row, nb_col, count, mag, level);
+        txb_cache->base_ctx_arr[base_idx][nb_coeff_idx] =
+            base_ctx_table[nb_row != 0][nb_col != 0][mag > level][count];
         // int ref_ctx = get_base_ctx(txb_info->qcoeff, nb_coeff_idx,
         // txb_info->bwl, level);
-        // if (ref_ctx != txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0]) {
+        // if (ref_ctx != txb_cache->base_ctx_arr[base_idx][nb_coeff_idx]) {
         //   printf("base ctx %d ref_ctx %d\n",
-        //   txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0], ref_ctx);
+        //   txb_cache->base_ctx_arr[base_idx][nb_coeff_idx], ref_ctx);
         // }
       }
     }
@@ -1123,35 +1552,35 @@ void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) {
         update_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx], abs_qc);
       const int count = txb_cache->br_count_arr[nb_coeff_idx];
       const int mag = get_mag_from_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx]);
-      txb_cache->br_ctx_arr[nb_coeff_idx][0] =
+      txb_cache->br_ctx_arr[nb_coeff_idx] =
           get_br_ctx_from_count_mag(nb_row, nb_col, count, mag);
       // int ref_ctx = get_level_ctx(txb_info->qcoeff, nb_coeff_idx,
       // txb_info->bwl);
-      // if (ref_ctx != txb_cache->br_ctx_arr[nb_coeff_idx][0]) {
+      // if (ref_ctx != txb_cache->br_ctx_arr[nb_coeff_idx]) {
       //   printf("base ctx %d ref_ctx %d\n",
-      //   txb_cache->br_ctx_arr[nb_coeff_idx][0], ref_ctx);
+      //   txb_cache->br_ctx_arr[nb_coeff_idx], ref_ctx);
       // }
     }
   }
 }
 
 static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
-                          const TxbProbs *txb_probs) {
+                          const LV_MAP_COEFF_COST *txb_costs) {
   const TXB_CTX *txb_ctx = txb_info->txb_ctx;
   const int is_nz = (qc != 0);
   const tran_low_t abs_qc = abs(qc);
   int cost = 0;
   const int16_t *scan = txb_info->scan_order->scan;
-  const int16_t *iscan = txb_info->scan_order->iscan;
 
   if (scan_idx < txb_info->seg_eob) {
-    int coeff_ctx = get_nz_map_ctx(txb_info->qcoeff, scan[scan_idx],
-                                   txb_info->bwl, txb_info->height, iscan);
-    cost += av1_cost_bit(txb_probs->nz_map[coeff_ctx], is_nz);
+    int coeff_ctx =
+        get_nz_map_ctx(txb_info->qcoeff, scan_idx, scan, txb_info->bwl,
+                       txb_info->height, txb_info->tx_type);
+    cost += txb_costs->nz_map_cost[coeff_ctx][is_nz];
   }
 
   if (is_nz) {
-    cost += get_sign_bit_cost(qc, scan_idx, txb_probs->dc_sign_prob,
+    cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost,
                               txb_ctx->dc_sign_ctx);
 
     int ctx_ls[NUM_BASE_LEVELS] = { 0 };
@@ -1160,21 +1589,21 @@ static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info,
 
     int i;
     for (i = 0; i < NUM_BASE_LEVELS; ++i) {
-      cost += get_base_cost(abs_qc, ctx_ls[i], txb_probs->coeff_base, i);
+      cost += get_base_cost(abs_qc, ctx_ls[i],
+                            txb_costs->base_cost[i][ctx_ls[i]], i);
     }
 
     if (abs_qc > NUM_BASE_LEVELS) {
       int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl,
                            txb_info->height);
-      cost += get_br_cost(abs_qc, ctx, txb_probs->coeff_lps);
+      cost += get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]);
       cost += get_golomb_cost(abs_qc);
     }
 
     if (scan_idx < txb_info->seg_eob) {
-      int eob_ctx =
-          get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->txs_ctx);
-      cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx],
-                           scan_idx == (txb_info->eob - 1));
+      int eob_ctx = get_eob_ctx(txb_info->qcoeff, scan[scan_idx],
+                                txb_info->txs_ctx, txb_info->tx_type);
+      cost += txb_costs->eob_cost[eob_ctx][scan_idx == (txb_info->eob - 1)];
     }
   }
   return cost;
@@ -1188,7 +1617,7 @@ static int all_ref_offset[ALL_REF_OFFSET_NUM][2] = {
   { 1, 0 },  { 2, 0 },   { 0, 1 },  { 0, 2 },  { 1, 1 },
 };
 
-static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs,
+static int try_level_down_ref(int coeff_idx, const LV_MAP_COEFF_COST *txb_costs,
                               TxbInfo *txb_info,
                               int (*cost_map)[COST_MAP_SIZE]) {
   if (cost_map) {
@@ -1205,9 +1634,9 @@ static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs,
     int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
     int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
     if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
-        nb_row < txb_info->stride && nb_col < txb_info->stride) {
+        nb_row < txb_info->height && nb_col < txb_info->stride) {
       tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
-      int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs);
+      int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_costs);
       if (cost_map)
         cost_map[nb_row - row + COST_MAP_OFFSET]
                 [nb_col - col + COST_MAP_OFFSET] -= cost;
@@ -1222,9 +1651,9 @@ static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs,
     int nb_coeff_idx = nb_row * txb_info->stride + nb_col;
     int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx];
     if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 &&
-        nb_row < txb_info->stride && nb_col < txb_info->stride) {
+        nb_row < txb_info->height && nb_col < txb_info->stride) {
       tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx];
-      int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs);
+      int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_costs);
       if (cost_map)
         cost_map[nb_row - row + COST_MAP_OFFSET]
                 [nb_col - col + COST_MAP_OFFSET] += cost;
@@ -1236,13 +1665,14 @@ static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs,
 }
 
 static void test_level_down(int coeff_idx, const TxbCache *txb_cache,
-                            const TxbProbs *txb_probs, TxbInfo *txb_info) {
+                            const LV_MAP_COEFF_COST *txb_costs,
+                            TxbInfo *txb_info) {
   int cost_map[COST_MAP_SIZE][COST_MAP_SIZE];
   int ref_cost_map[COST_MAP_SIZE][COST_MAP_SIZE];
   const int cost_diff =
-      try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, cost_map);
+      try_level_down(coeff_idx, txb_cache, txb_costs, txb_info, cost_map, 0);
   const int cost_diff_ref =
-      try_level_down_ref(coeff_idx, txb_probs, txb_info, ref_cost_map);
+      try_level_down_ref(coeff_idx, txb_costs, txb_info, ref_cost_map);
   if (cost_diff != cost_diff_ref) {
     printf("qc %d cost_diff %d cost_diff_ref %d\n", txb_info->qcoeff[coeff_idx],
            cost_diff, cost_diff_ref);
@@ -1257,25 +1687,25 @@ static void test_level_down(int coeff_idx, const TxbCache *txb_cache,
 #endif
 
 // TODO(angiebird): make this static once it's called
-int get_txb_cost(TxbInfo *txb_info, const TxbProbs *txb_probs) {
+int get_txb_cost(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs) {
   int cost = 0;
   int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx;
   const int16_t *scan = txb_info->scan_order->scan;
   if (txb_info->eob == 0) {
-    cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1);
+    cost = txb_costs->txb_skip_cost[txb_skip_ctx][1];
     return cost;
   }
-  cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0);
+  cost = txb_costs->txb_skip_cost[txb_skip_ctx][0];
   for (int c = 0; c < txb_info->eob; ++c) {
     tran_low_t qc = txb_info->qcoeff[scan[c]];
-    int coeff_cost = get_coeff_cost(qc, c, txb_info, txb_probs);
+    int coeff_cost = get_coeff_cost(qc, c, txb_info, txb_costs);
     cost += coeff_cost;
   }
   return cost;
 }
 
 #if TEST_OPTIMIZE_TXB
-void test_try_change_eob(TxbInfo *txb_info, TxbProbs *txb_probs,
+void test_try_change_eob(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
                          TxbCache *txb_cache) {
   int eob = txb_info->eob;
   const int16_t *scan = txb_info->scan_order->scan;
@@ -1286,13 +1716,13 @@ void test_try_change_eob(TxbInfo *txb_info, TxbProbs *txb_probs,
     if (abs(last_coeff) == 1) {
       int new_eob;
       int cost_diff =
-          try_change_eob(&new_eob, last_ci, txb_cache, txb_probs, txb_info);
+          try_change_eob(&new_eob, last_ci, txb_cache, txb_costs, txb_info, 0);
       int org_eob = txb_info->eob;
-      int cost = get_txb_cost(txb_info, txb_probs);
+      int cost = get_txb_cost(txb_info, txb_costs);
 
       txb_info->qcoeff[last_ci] = get_lower_coeff(last_coeff);
       set_eob(txb_info, new_eob);
-      int new_cost = get_txb_cost(txb_info, txb_probs);
+      int new_cost = get_txb_cost(txb_info, txb_costs);
       set_eob(txb_info, org_eob);
       txb_info->qcoeff[last_ci] = last_coeff;
 
@@ -1323,8 +1753,9 @@ typedef struct LevelDownStats {
 } LevelDownStats;
 
 void try_level_down_facade(LevelDownStats *stats, int scan_idx,
-                           const TxbCache *txb_cache, const TxbProbs *txb_probs,
-                           TxbInfo *txb_info) {
+                           const TxbCache *txb_cache,
+                           const LV_MAP_COEFF_COST *txb_costs,
+                           TxbInfo *txb_info, int fast_mode) {
   const int16_t *scan = txb_info->scan_order->scan;
   const int coeff_idx = scan[scan_idx];
   const tran_low_t qc = txb_info->qcoeff[coeff_idx];
@@ -1350,12 +1781,12 @@ void try_level_down_facade(LevelDownStats *stats, int scan_idx,
   stats->new_eob = txb_info->eob;
   if (scan_idx == txb_info->eob - 1 && abs(qc) == 1) {
     stats->cost_diff = try_change_eob(&stats->new_eob, coeff_idx, txb_cache,
-                                      txb_probs, txb_info);
+                                      txb_costs, txb_info, fast_mode);
   } else {
-    stats->cost_diff =
-        try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL);
+    stats->cost_diff = try_level_down(coeff_idx, txb_cache, txb_costs, txb_info,
+                                      NULL, fast_mode);
 #if TEST_OPTIMIZE_TXB
-    test_level_down(coeff_idx, txb_cache, txb_probs, txb_info);
+    test_level_down(coeff_idx, txb_cache, txb_costs, txb_info);
 #endif
   }
   stats->rd_diff = RDCOST(txb_info->rdmult, stats->cost_diff, stats->dist_diff);
@@ -1363,8 +1794,8 @@ void try_level_down_facade(LevelDownStats *stats, int scan_idx,
   return;
 }
 
-static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
-                        TxbCache *txb_cache, int dry_run) {
+static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
+                        TxbCache *txb_cache, int dry_run, int fast_mode) {
   int update = 0;
   if (txb_info->eob == 0) return update;
   int cost_diff = 0;
@@ -1377,7 +1808,7 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
   int64_t org_dist =
       av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) *
       (1 << (2 * txb_info->shift));
-  int org_cost = get_txb_cost(txb_info, txb_probs);
+  int org_cost = get_txb_cost(txb_info, txb_costs);
 #endif
 
   tran_low_t *org_qcoeff = txb_info->qcoeff;
@@ -1402,7 +1833,8 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
     tran_low_t qc = txb_info->qcoeff[coeff_idx];
     if (abs(qc) == 1) {
       LevelDownStats stats;
-      try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info);
+      try_level_down_facade(&stats, si, txb_cache, txb_costs, txb_info,
+                            fast_mode);
       if (stats.update) {
         update = 1;
         cost_diff += stats.cost_diff;
@@ -1415,10 +1847,17 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
   }
 
   // backward optimize the level-k map
+  int eob_fix = 0;
   for (int si = txb_info->eob - 1; si >= 0; --si) {
-    LevelDownStats stats;
-    try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info);
     const int coeff_idx = scan[si];
+    if (eob_fix == 1 && txb_info->qcoeff[coeff_idx] == 1) {
+      // when eob is fixed, there is not need to optimize again when
+      // abs(qc) == 1
+      continue;
+    }
+    LevelDownStats stats;
+    try_level_down_facade(&stats, si, txb_cache, txb_costs, txb_info,
+                          fast_mode);
     if (stats.update) {
 #if TEST_OPTIMIZE_TXB
 // printf("si %d low_qc %d cost_diff %d dist_diff %ld rd_diff %ld eob %d new_eob
@@ -1432,13 +1871,14 @@ static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs,
       update_level_down(coeff_idx, txb_cache, txb_info);
       set_eob(txb_info, stats.new_eob);
     }
+    if (eob_fix == 0 && txb_info->qcoeff[coeff_idx] != 0) eob_fix = 1;
     if (si > txb_info->eob) si = txb_info->eob;
   }
 #if TEST_OPTIMIZE_TXB
   int64_t new_dist =
       av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) *
       (1 << (2 * txb_info->shift));
-  int new_cost = get_txb_cost(txb_info, txb_probs);
+  int new_cost = get_txb_cost(txb_info, txb_costs);
   int64_t ref_dist_diff = new_dist - org_dist;
   int ref_cost_diff = new_cost - org_cost;
   if (cost_diff != ref_cost_diff || dist_diff != ref_dist_diff)
@@ -1463,7 +1903,7 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
 
 int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
                      int blk_row, int blk_col, int block, TX_SIZE tx_size,
-                     TXB_CTX *txb_ctx) {
+                     TXB_CTX *txb_ctx, int fast_mode) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const PLANE_TYPE plane_type = get_plane_type(plane);
   const TX_SIZE txs_ctx = get_txsize_context(tx_size);
@@ -1478,38 +1918,40 @@ int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
   const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block);
   const int16_t *dequant = pd->dequant;
   const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1);
-  const aom_prob *nz_map = xd->fc->nz_map[txs_ctx][plane_type];
-
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
   const int stride = 1 << bwl;
   const int height = tx_size_high[tx_size];
-  aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] =
-      xd->fc->coeff_base[txs_ctx][plane_type];
-
-  const aom_prob *coeff_lps = xd->fc->coeff_lps[txs_ctx][plane_type];
-
   const int is_inter = is_inter_block(mbmi);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
-
-  const TxbProbs txb_probs = { xd->fc->dc_sign[plane_type],
-                               nz_map,
-                               coeff_base,
-                               coeff_lps,
-                               xd->fc->eob_flag[txs_ctx][plane_type],
-                               xd->fc->txb_skip[txs_ctx] };
+  const LV_MAP_COEFF_COST txb_costs = x->coeff_costs[txs_ctx][plane_type];
 
   const int shift = av1_get_tx_scale(tx_size);
   const int64_t rdmult =
       (x->rdmult * plane_rd_mult[is_inter][plane_type] + 2) >> 2;
 
-  TxbInfo txb_info = { qcoeff,  dqcoeff, tcoeff,     dequant, shift,
-                       tx_size, txs_ctx, bwl,        stride,  height,
-                       eob,     seg_eob, scan_order, txb_ctx, rdmult };
+  TxbInfo txb_info = { qcoeff,
+                       dqcoeff,
+                       tcoeff,
+                       dequant,
+                       shift,
+                       tx_size,
+                       txs_ctx,
+                       tx_type,
+                       bwl,
+                       stride,
+                       height,
+                       eob,
+                       seg_eob,
+                       scan_order,
+                       txb_ctx,
+                       rdmult,
+                       &cm->coeff_ctx_table };
 
   TxbCache txb_cache;
   gen_txb_cache(&txb_cache, &txb_info);
 
-  const int update = optimize_txb(&txb_info, &txb_probs, &txb_cache, 0);
+  const int update =
+      optimize_txb(&txb_info, &txb_costs, &txb_cache, 0, fast_mode);
   if (update) p->eobs[block] = txb_info.eob;
   return txb_info.eob;
 }
@@ -1518,6 +1960,8 @@ int av1_get_txb_entropy_context(const tran_low_t *qcoeff,
   const int16_t *scan = scan_order->scan;
   int cul_level = 0;
   int c;
+
+  if (eob == 0) return 0;
   for (c = 0; c < eob; ++c) {
     cul_level += abs(qcoeff[scan[c]]);
   }
@@ -1552,6 +1996,153 @@ void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col,
   av1_set_contexts(xd, pd, plane, tx_size, cul_level, blk_col, blk_row);
 }
 
+static INLINE void av1_update_nz_eob_counts(FRAME_CONTEXT *fc,
+                                            FRAME_COUNTS *counts, uint16_t eob,
+                                            const tran_low_t *tcoeff, int plane,
+                                            TX_SIZE tx_size, TX_TYPE tx_type,
+                                            const int16_t *scan) {
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int height = tx_size_high[tx_size];
+  TX_SIZE txsize_ctx = get_txsize_context(tx_size);
+#if CONFIG_CTX1D
+  const int width = tx_size_wide[tx_size];
+  const int eob_offset = width + height;
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int seg_eob =
+      (tx_class == TX_CLASS_2D) ? tx_size_2d[tx_size] : eob_offset;
+#else
+  const int seg_eob = tx_size_2d[tx_size];
+#endif
+  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
+      &counts->nz_map[txsize_ctx][plane_type];
+  for (int c = 0; c < eob; ++c) {
+    tran_low_t v = tcoeff[scan[c]];
+    int is_nz = (v != 0);
+    int coeff_ctx = get_nz_map_ctx(tcoeff, c, scan, bwl, height, tx_type);
+    int eob_ctx = get_eob_ctx(tcoeff, scan[c], txsize_ctx, tx_type);
+
+    if (c == seg_eob - 1) break;
+
+    ++(*nz_map_count)[coeff_ctx][is_nz];
+#if LV_MAP_PROB
+    update_bin(fc->nz_map_cdf[txsize_ctx][plane_type][coeff_ctx], is_nz, 2);
+#endif
+
+    if (is_nz) {
+      ++counts->eob_flag[txsize_ctx][plane_type][eob_ctx][c == (eob - 1)];
+#if LV_MAP_PROB
+      update_bin(fc->eob_flag_cdf[txsize_ctx][plane_type][eob_ctx],
+                 c == (eob - 1), 2);
+#endif
+    }
+  }
+}
+
+#if CONFIG_CTX1D
+static INLINE void av1_update_nz_eob_counts_vert(
+    FRAME_CONTEXT *fc, FRAME_COUNTS *counts, uint16_t eob,
+    const tran_low_t *tcoeff, int plane, TX_SIZE tx_size, TX_TYPE tx_type,
+    const int16_t *scan, const int16_t *iscan) {
+  (void)eob;
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_vert(eob_ls, tcoeff, width, height);
+  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
+      &counts->nz_map[txs_ctx][plane_type];
+  for (int c = 0; c < width; ++c) {
+    int16_t veob = eob_ls[c];
+    assert(veob <= height);
+    int el_ctx = get_empty_line_ctx(c, eob_ls);
+    ++counts->empty_line[txs_ctx][plane_type][tx_class][el_ctx][veob == 0];
+#if LV_MAP_PROB
+    update_bin(fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx],
+               veob == 0, 2);
+#endif
+    if (veob) {
+      for (int r = 0; r < veob; ++r) {
+        if (r + 1 != height) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = tcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeff, scan_idx, scan, bwl, height, tx_type);
+          ++(*nz_map_count)[coeff_ctx][is_nz];
+#if LV_MAP_PROB
+          update_bin(fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], is_nz, 2);
+#endif
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(c, r, eob_ls);
+            ++counts->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx]
+                            [r == veob - 1];
+#if LV_MAP_PROB
+            update_bin(fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx],
+                       r == veob - 1, 2);
+#endif
+          }
+        }
+      }
+    }
+  }
+}
+
+static INLINE void av1_update_nz_eob_counts_horiz(
+    FRAME_CONTEXT *fc, FRAME_COUNTS *counts, uint16_t eob,
+    const tran_low_t *tcoeff, int plane, TX_SIZE tx_size, TX_TYPE tx_type,
+    const int16_t *scan, const int16_t *iscan) {
+  (void)eob;
+  (void)scan;
+  const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_CLASS tx_class = get_tx_class(tx_type);
+  const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
+  const int width = tx_size_wide[tx_size];
+  const int height = tx_size_high[tx_size];
+  int16_t eob_ls[MAX_HVTX_SIZE];
+  get_eob_horiz(eob_ls, tcoeff, width, height);
+  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2] =
+      &counts->nz_map[txs_ctx][plane_type];
+  for (int r = 0; r < height; ++r) {
+    int16_t heob = eob_ls[r];
+    int el_ctx = get_empty_line_ctx(r, eob_ls);
+    ++counts->empty_line[txs_ctx][plane_type][tx_class][el_ctx][heob == 0];
+#if LV_MAP_PROB
+    update_bin(fc->empty_line_cdf[txs_ctx][plane_type][tx_class][el_ctx],
+               heob == 0, 2);
+#endif
+    if (heob) {
+      for (int c = 0; c < heob; ++c) {
+        if (c + 1 != width) {
+          int coeff_idx = r * width + c;
+          int scan_idx = iscan[coeff_idx];
+          int is_nz = tcoeff[coeff_idx] != 0;
+          int coeff_ctx =
+              get_nz_map_ctx(tcoeff, scan_idx, scan, bwl, height, tx_type);
+          ++(*nz_map_count)[coeff_ctx][is_nz];
+#if LV_MAP_PROB
+          update_bin(fc->nz_map_cdf[txs_ctx][plane_type][coeff_ctx], is_nz, 2);
+#endif
+          if (is_nz) {
+            int eob_ctx = get_hv_eob_ctx(r, c, eob_ls);
+            ++counts->hv_eob[txs_ctx][plane_type][tx_class][eob_ctx]
+                            [c == heob - 1];
+#if LV_MAP_PROB
+            update_bin(fc->hv_eob_cdf[txs_ctx][plane_type][tx_class][eob_ctx],
+                       c == heob - 1, 2);
+#endif
+          }
+        }
+      }
+    }
+  }
+}
+#endif  // CONFIG_CTX1D
+
 void av1_update_and_record_txb_context(int plane, int block, int blk_row,
                                        int blk_col, BLOCK_SIZE plane_bsize,
                                        TX_SIZE tx_size, void *arg) {
@@ -1573,8 +2164,7 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int16_t *scan = scan_order->scan;
-  const int16_t *iscan = scan_order->iscan;
-  const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
+  const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
   int c, i;
   TXB_CTX txb_ctx;
   get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col,
@@ -1582,15 +2172,17 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
   const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2;
   const int height = tx_size_high[tx_size];
   int cul_level = 0;
-  unsigned int(*nz_map_count)[SIG_COEF_CONTEXTS][2];
 
   TX_SIZE txsize_ctx = get_txsize_context(tx_size);
-
-  nz_map_count = &td->counts->nz_map[txsize_ctx][plane_type];
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
   memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob);
 
   ++td->counts->txb_skip[txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0];
+#if LV_MAP_PROB
+  update_bin(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0,
+             2);
+#endif
   x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx;
 
   x->mbmi_ext->eobs[plane][block] = eob;
@@ -1605,20 +2197,39 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
                            mbmi->sb_type, get_min_tx_size(tx_size), td->counts);
 #endif
 
-  for (c = 0; c < eob; ++c) {
-    tran_low_t v = qcoeff[scan[c]];
-    int is_nz = (v != 0);
-    int coeff_ctx = get_nz_map_ctx(tcoeff, scan[c], bwl, height, iscan);
-    int eob_ctx = get_eob_ctx(tcoeff, scan[c], txsize_ctx);
-
-    if (c == seg_eob - 1) break;
-
-    ++(*nz_map_count)[coeff_ctx][is_nz];
-
-    if (is_nz) {
-      ++td->counts->eob_flag[txsize_ctx][plane_type][eob_ctx][c == (eob - 1)];
+#if CONFIG_CTX1D
+  TX_CLASS tx_class = get_tx_class(tx_type);
+  if (tx_class == TX_CLASS_2D) {
+    av1_update_nz_eob_counts(ec_ctx, td->counts, eob, tcoeff, plane, tx_size,
+                             tx_type, scan);
+  } else {
+    const int width = tx_size_wide[tx_size];
+    const int eob_offset = width + height;
+    const int eob_mode = eob > eob_offset;
+    const TX_SIZE txs_ctx = get_txsize_context(tx_size);
+    ++td->counts->eob_mode[txs_ctx][plane_type][tx_class][eob_mode];
+#if LV_MAP_PROB
+    update_bin(ec_ctx->eob_mode_cdf[txs_ctx][plane_type][tx_class], eob_mode,
+               2);
+#endif
+    if (eob_mode == 0) {
+      av1_update_nz_eob_counts(ec_ctx, td->counts, eob, tcoeff, plane, tx_size,
+                               tx_type, scan);
+    } else {
+      const int16_t *iscan = scan_order->iscan;
+      assert(tx_class == TX_CLASS_VERT || tx_class == TX_CLASS_HORIZ);
+      if (tx_class == TX_CLASS_VERT)
+        av1_update_nz_eob_counts_vert(ec_ctx, td->counts, eob, tcoeff, plane,
+                                      tx_size, tx_type, scan, iscan);
+      else
+        av1_update_nz_eob_counts_horiz(ec_ctx, td->counts, eob, tcoeff, plane,
+                                       tx_size, tx_type, scan, iscan);
     }
   }
+#else   // CONFIG_CTX1D
+  av1_update_nz_eob_counts(ec_ctx, td->counts, eob, tcoeff, plane, tx_size,
+                           tx_type, scan);
+#endif  // CONFIG_CTX1D
 
   // Reverse process order to handle coefficient level and sign.
   for (i = 0; i < NUM_BASE_LEVELS; ++i) {
@@ -1634,16 +2245,26 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
 
       if (level == i + 1) {
         ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][1];
+#if LV_MAP_PROB
+        update_bin(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][i][ctx], 1,
+                   2);
+#endif
         if (c == 0) {
           int dc_sign_ctx = txb_ctx.dc_sign_ctx;
 
           ++td->counts->dc_sign[plane_type][dc_sign_ctx][v < 0];
+#if LV_MAP_PROB
+          update_bin(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], v < 0, 2);
+#endif
           x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx;
         }
         cul_level += level;
         continue;
       }
       ++td->counts->coeff_base[txsize_ctx][plane_type][i][ctx][0];
+#if LV_MAP_PROB
+      update_bin(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][i][ctx], 0, 2);
+#endif
       update_eob = AOMMAX(update_eob, c);
     }
   }
@@ -1661,20 +2282,68 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
       int dc_sign_ctx = txb_ctx.dc_sign_ctx;
 
       ++td->counts->dc_sign[plane_type][dc_sign_ctx][v < 0];
+#if LV_MAP_PROB
+      update_bin(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], v < 0, 2);
+#endif
       x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx;
     }
 
     // level is above 1.
     ctx = get_br_ctx(tcoeff, scan[c], bwl, height);
+
+#if BR_NODE
+    int base_range = level - 1 - NUM_BASE_LEVELS;
+    int br_set_idx = base_range < COEFF_BASE_RANGE
+                         ? coeff_to_br_index[base_range]
+                         : BASE_RANGE_SETS;
+
+    for (idx = 0; idx < BASE_RANGE_SETS; ++idx) {
+      if (idx == br_set_idx) {
+        int br_base = br_index_to_coeff[br_set_idx];
+        int br_offset = base_range - br_base;
+        ++td->counts->coeff_br[txsize_ctx][plane_type][idx][ctx][1];
+#if LV_MAP_PROB
+        update_bin(ec_ctx->coeff_br_cdf[txsize_ctx][plane_type][idx][ctx], 1,
+                   2);
+#endif
+        int extra_bits = (1 << br_extra_bits[idx]) - 1;
+        for (int tok = 0; tok < extra_bits; ++tok) {
+          if (br_offset == tok) {
+            ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][1];
+#if LV_MAP_PROB
+            update_bin(ec_ctx->coeff_lps_cdf[txsize_ctx][plane_type][ctx], 1,
+                       2);
+#endif
+            break;
+          }
+          ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][0];
+#if LV_MAP_PROB
+          update_bin(ec_ctx->coeff_lps_cdf[txsize_ctx][plane_type][ctx], 0, 2);
+#endif
+        }
+        break;
+      }
+      ++td->counts->coeff_br[txsize_ctx][plane_type][idx][ctx][0];
+#if LV_MAP_PROB
+      update_bin(ec_ctx->coeff_br_cdf[txsize_ctx][plane_type][idx][ctx], 0, 2);
+#endif
+    }
+#else  // BR_NODE
     for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) {
       if (level == (idx + 1 + NUM_BASE_LEVELS)) {
         ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][1];
+#if LV_MAP_PROB
+        update_bin(ec_ctx->coeff_lps_cdf[txsize_ctx][plane_type][ctx], 1, 2);
+#endif
         break;
       }
       ++td->counts->coeff_lps[txsize_ctx][plane_type][ctx][0];
+#if LV_MAP_PROB
+      update_bin(ec_ctx->coeff_lps_cdf[txsize_ctx][plane_type][ctx], 0, 2);
+#endif
     }
     if (idx < COEFF_BASE_RANGE) continue;
-
+#endif  // BR_NODE
     // use 0-th order Golomb code to handle the residual level.
   }
 
@@ -1848,6 +2517,10 @@ void av1_write_txb_probs(AV1_COMP *cpi, aom_writer *w) {
   TX_SIZE tx_size;
   int ctx, plane;
 
+#if LV_MAP_PROB
+  return;
+#endif
+
   for (plane = 0; plane < PLANE_TYPES; ++plane)
     for (ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
       av1_cond_prob_diff_update(w, &cpi->common.fc->dc_sign[plane][ctx],
@@ -1888,12 +2561,11 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
     }
 
 #if CONFIG_EXT_TX
-    int is_inter = is_inter_block(mbmi);
-    int ext_tx_set = get_ext_tx_set(get_min_tx_size(tx_size), mbmi->sb_type,
-                                    is_inter, cm->reduced_tx_set_used);
-    if (!(is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) &&
-        !(!is_inter && ext_tx_used_intra[ext_tx_set][tx_type]))
-      continue;
+    const int is_inter = is_inter_block(mbmi);
+    const TxSetType tx_set_type =
+        get_ext_tx_set_type(get_min_tx_size(tx_size), mbmi->sb_type, is_inter,
+                            cm->reduced_tx_set_used);
+    if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
 #endif  // CONFIG_EXT_TX
 
     RD_STATS this_rd_stats;
@@ -1901,7 +2573,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
     av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     coeff_ctx, AV1_XFORM_QUANT_FP);
     av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
-                   a, l);
+                   a, l, 1);
     av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
                    &this_rd_stats.dist, &this_rd_stats.sse,
                    OUTPUT_HAS_PREDICTED_PIXELS);
@@ -1921,10 +2593,6 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
 
   av1_merge_rd_stats(rd_stats, &best_rd_stats);
 
-  //  if (x->plane[plane].eobs[block] == 0)
-  //    if (best_tx_type != DCT_DCT)
-  //      exit(0);
-
   if (best_eob == 0 && is_inter_block(mbmi)) best_tx_type = DCT_DCT;
 
   if (plane == 0) mbmi->txk_type[(blk_row << 4) + blk_col] = best_tx_type;
@@ -1936,7 +2604,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
     av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
                     coeff_ctx, AV1_XFORM_QUANT_FP);
     av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
-                   a, l);
+                   a, l, 1);
 
     av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
                                        x->plane[plane].eobs[block]);
diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h
index cbafe59c9..76a04bb41 100644
--- a/third_party/aom/av1/encoder/encodetxb.h
+++ b/third_party/aom/av1/encoder/encodetxb.h
@@ -31,6 +31,7 @@ typedef struct TxbInfo {
   int shift;
   TX_SIZE tx_size;
   TX_SIZE txs_ctx;
+  TX_TYPE tx_type;
   int bwl;
   int stride;
   int height;
@@ -39,20 +40,21 @@ typedef struct TxbInfo {
   const SCAN_ORDER *scan_order;
   TXB_CTX *txb_ctx;
   int64_t rdmult;
+  const LV_MAP_CTX_TABLE *coeff_ctx_table;
 } TxbInfo;
 
 typedef struct TxbCache {
   int nz_count_arr[MAX_TX_SQUARE];
-  int nz_ctx_arr[MAX_TX_SQUARE][2];
+  int nz_ctx_arr[MAX_TX_SQUARE];
   int base_count_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE];
   int base_mag_arr[MAX_TX_SQUARE]
                   [2];  // [0]: max magnitude [1]: num of max magnitude
-  int base_ctx_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE][2];  // [1]: not used
+  int base_ctx_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE];
 
   int br_count_arr[MAX_TX_SQUARE];
   int br_mag_arr[MAX_TX_SQUARE]
                 [2];  // [0]: max magnitude [1]: num of max magnitude
-  int br_ctx_arr[MAX_TX_SQUARE][2];  // [1]: not used
+  int br_ctx_arr[MAX_TX_SQUARE];
 } TxbCache;
 
 typedef struct TxbProbs {
@@ -62,11 +64,14 @@ typedef struct TxbProbs {
   const aom_prob *coeff_lps;
   const aom_prob *eob_flag;
   const aom_prob *txb_skip;
+#if BR_NODE
+  const aom_prob *coeff_br;
+#endif
 } TxbProbs;
 
 void av1_alloc_txb_buf(AV1_COMP *cpi);
 void av1_free_txb_buf(AV1_COMP *cpi);
-int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
+int av1_cost_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                         int blk_row, int blk_col, int block, TX_SIZE tx_size,
                         TXB_CTX *txb_ctx);
 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
@@ -90,6 +95,9 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
                                        int blk_col, BLOCK_SIZE plane_bsize,
                                        TX_SIZE tx_size, void *arg);
 
+void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                          int mi_row, int mi_col);
+
 #if CONFIG_TXK_SEL
 int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                             int block, int blk_row, int blk_col,
@@ -99,7 +107,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
 #endif
 int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
                      int blk_row, int blk_col, int block, TX_SIZE tx_size,
-                     TXB_CTX *txb_ctx);
+                     TXB_CTX *txb_ctx, int fast_mode);
 #ifdef __cplusplus
 }
 #endif
diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c
index 1aa1d52a2..edc9b1d61 100644
--- a/third_party/aom/av1/encoder/ethread.c
+++ b/third_party/aom/av1/encoder/ethread.c
@@ -15,13 +15,11 @@
 #include "aom_dsp/aom_dsp_common.h"
 
 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
-  int i, j, k, l, m, n;
-
-  for (i = 0; i < REFERENCE_MODES; i++)
+  for (int i = 0; i < REFERENCE_MODES; i++)
     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
 
 #if CONFIG_GLOBAL_MOTION
-  for (i = 0; i < TOTAL_REFS_PER_FRAME; i++)
+  for (int i = 0; i < TOTAL_REFS_PER_FRAME; i++)
     td->rd_counts.global_motion_used[i] +=
         td_t->rd_counts.global_motion_used[i];
 #endif  // CONFIG_GLOBAL_MOTION
@@ -29,15 +27,6 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
   td->rd_counts.compound_ref_used_flag |=
       td_t->rd_counts.compound_ref_used_flag;
   td->rd_counts.single_ref_used_flag |= td_t->rd_counts.single_ref_used_flag;
-
-  for (i = 0; i < TX_SIZES; i++)
-    for (j = 0; j < PLANE_TYPES; j++)
-      for (k = 0; k < REF_TYPES; k++)
-        for (l = 0; l < COEF_BANDS; l++)
-          for (m = 0; m < COEFF_CONTEXTS; m++)
-            for (n = 0; n < ENTROPY_TOKENS; n++)
-              td->rd_counts.coef_counts[i][j][k][l][m][n] +=
-                  td_t->rd_counts.coef_counts[i][j][k][l][m][n];
 }
 
 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
@@ -92,8 +81,10 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
                         aom_memalign(32, sizeof(*thread_data->td)));
         av1_zero(*thread_data->td);
 
-        // Set up pc_tree.
+// Set up pc_tree.
+#if !CONFIG_CB4X4
         thread_data->td->leaf_tree = NULL;
+#endif
         thread_data->td->pc_tree = NULL;
         av1_setup_pc_tree(cm, thread_data->td);
 
@@ -105,12 +96,14 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
 #endif
         CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
                         (uint8_t *)aom_memalign(
-                            16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                    sizeof(*thread_data->td->above_pred_buf)));
+                            16,
+                            buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+                                sizeof(*thread_data->td->above_pred_buf)));
         CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
                         (uint8_t *)aom_memalign(
-                            16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
-                                    sizeof(*thread_data->td->left_pred_buf)));
+                            16,
+                            buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
+                                sizeof(*thread_data->td->left_pred_buf)));
         CHECK_MEM_ERROR(
             cm, thread_data->td->wsrc_buf,
             (int32_t *)aom_memalign(
@@ -124,12 +117,10 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
         CHECK_MEM_ERROR(cm, thread_data->td->counts,
                         aom_calloc(1, sizeof(*thread_data->td->counts)));
 
-#if CONFIG_PALETTE
         // Allocate buffers used by palette coding mode.
         CHECK_MEM_ERROR(
             cm, thread_data->td->palette_buffer,
             aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
-#endif  // CONFIG_PALETTE
 
         // Create threads
         if (!winterface->reset(worker))
@@ -169,10 +160,8 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) {
              sizeof(cpi->common.counts));
     }
 
-#if CONFIG_PALETTE
     if (i < num_workers - 1)
       thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
-#endif  // CONFIG_PALETTE
   }
 
   // Encode a frame
diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c
index e7d78d83e..2a4200887 100644
--- a/third_party/aom/av1/encoder/firstpass.c
+++ b/third_party/aom/av1/encoder/firstpass.c
@@ -27,8 +27,11 @@
 #include "av1/common/entropymv.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/reconinter.h"  // av1_setup_dst_planes()
-#include "av1/encoder/av1_quantize.h"
+#if CONFIG_LV_MAP
+#include "av1/common/txb_common.h"
+#endif
 #include "av1/encoder/aq_variance.h"
+#include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/block.h"
 #include "av1/encoder/encodeframe.h"
 #include "av1/encoder/encodemb.h"
@@ -112,7 +115,7 @@ static void output_stats(FIRSTPASS_STATS *stats,
     fprintf(fpfile,
             "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
             "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
-            "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+            "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf\n",
             stats->frame, stats->weight, stats->intra_error, stats->coded_error,
             stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion,
             stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct,
@@ -456,7 +459,7 @@ static void set_first_pass_params(AV1_COMP *cpi) {
   cpi->rc.frames_to_key = INT_MAX;
 }
 
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
 static double raw_motion_error_stdev(int *raw_motion_err_list,
                                      int raw_motion_err_counts) {
   int64_t sum_raw_err = 0;
@@ -468,7 +471,7 @@ static double raw_motion_error_stdev(int *raw_motion_err_list,
   for (i = 0; i < raw_motion_err_counts; i++) {
     sum_raw_err += raw_motion_err_list[i];
   }
-  raw_err_avg = sum_raw_err / raw_motion_err_counts;
+  raw_err_avg = (double)sum_raw_err / raw_motion_err_counts;
   for (i = 0; i < raw_motion_err_counts; i++) {
     raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
                      (raw_motion_err_list[i] - raw_err_avg);
@@ -479,7 +482,7 @@ static double raw_motion_error_stdev(int *raw_motion_err_list,
   raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
   return raw_err_stdev;
 }
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
 
 #define UL_INTRA_THRESH 50
 #define INVALID_ROW -1
@@ -531,13 +534,13 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
   od_adapt_ctx pvq_context;
 #endif
 
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
   int *raw_motion_err_list;
   int raw_motion_err_counts = 0;
   CHECK_MEM_ERROR(
       cm, raw_motion_err_list,
       aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list)));
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
   // First pass code requires valid last and new frame buffers.
   assert(new_yv12 != NULL);
   assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
@@ -575,8 +578,8 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
 
 #if CONFIG_CFL
   // Don't store luma on the fist pass since chroma is not computed
-  x->cfl_store_y = 0;
-#endif
+  xd->cfl->store_y = 0;
+#endif  // CONFIG_CFL
   av1_frame_init_quantizer(cpi);
 
 #if CONFIG_PVQ
@@ -623,6 +626,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
   }
 
   av1_init_mv_probs(cm);
+#if CONFIG_LV_MAP
+  av1_init_lv_map(cm);
+#endif
 #if CONFIG_ADAPT_SCAN
   av1_init_scan_order(cm);
   av1_deliver_eob_threshold(cm, xd);
@@ -1000,9 +1006,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
             }
           }
         }
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
         raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error;
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
       } else {
         sr_coded_error += (int64_t)this_error;
       }
@@ -1025,10 +1031,12 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
 
     aom_clear_system_state();
   }
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
   const double raw_err_stdev =
       raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
-#endif  // CONFIG_FLEX_REFS
+  aom_free(raw_motion_err_list);
+#endif  // CONFIG_EXT_REFS
+
 #if CONFIG_PVQ
 #if !CONFIG_ANS
   od_ec_enc_clear(&x->daala_enc.w.ec);
@@ -1082,9 +1090,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
     fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
     fps.inactive_zone_rows = (double)image_data_start_row;
     fps.inactive_zone_cols = (double)0;  // TODO(paulwilkins): fix
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
     fps.raw_error_stdev = raw_err_stdev;
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
 
     if (mvcount > 0) {
       fps.MVr = (double)sum_mvr / mvcount;
@@ -1666,47 +1674,618 @@ static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
   arf_buffer_indices[0] = ARF_SLOT1;
   arf_buffer_indices[1] = ARF_SLOT2;
 }
-#endif
+#endif  // !CONFIG_EXT_REFS
 
-static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
-                                   double group_error, int gf_arf_bits) {
+#if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+// === GF Group of 16 ===
+#define GF_INTERVAL_16 16
+#define GF_FRAME_PARAMS (REF_FRAMES + 5)
+
+// GF Group of 16: multi-layer hierarchical coding structure
+//   1st Layer: Frame 0 and Frame 16 (ALTREF)
+//   2nd Layer: Frame 8 (ALTREF2)
+//   3rd Layer: Frame 4 and 12 (ALTREF2)
+//   4th Layer: Frame 2, 6, 10, and 14 (BWDREF)
+//   5th Layer: Frame 1, 3, 5, 7, 9, 11, 13, and 15
+static const unsigned char gf16_multi_layer_params[][GF_FRAME_PARAMS] = {
+  // gf_group->index: coding order
+  // (Frame #)      : display order
+  {
+      // gf_group->index == 0 (Frame 0)
+      OVERLAY_UPDATE,  // update_type
+      0,               // arf_src_offset
+      0,               // brf_src_offset
+      // References (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF_FRAME,  // Index (current) of reference to get updated
+      GOLDEN_FRAME   // cpi->refresh_golden_frame = 1
+  },
+  {
+      // gf_group->index == 1 (Frame 16)
+      ARF_UPDATE,          // update_type
+      GF_INTERVAL_16 - 1,  // arf_src_offset
+      0,                   // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      ALTREF_FRAME,   // cpi->alt_fb_idx ===> cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      GOLDEN_FRAME,   // cpi->gld_fb_idx ===> cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF_FRAME,  // Index (current) of reference to get updated
+      ALTREF_FRAME   // cpi->refresh_alt_ref_frame = 1
+  },
+  {
+      // gf_group->index == 2 (Frame 8)
+      INTNL_ARF_UPDATE,           // update_type
+      (GF_INTERVAL_16 >> 1) - 1,  // arf_src_offset
+      0,                          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF2_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME   // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 3 (Frame 4)
+      INTNL_ARF_UPDATE,           // update_type
+      (GF_INTERVAL_16 >> 2) - 1,  // arf_src_offset
+      0,                          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx
+                      // (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx
+                      // (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF2_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME   // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 4 (Frame 2)
+      BRF_UPDATE,  // update_type
+      0,           // arf_src_offset
+      1,           // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx
+                      // (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx
+                      // (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      REF_FRAMES,   // Index (current) of reference to get updated
+      BWDREF_FRAME  // cpi->refresh_bwd_ref_frame = 1
+  },
+  {
+      // gf_group->index == 5 (Frame 1)
+      LAST_BIPRED_UPDATE,  // update_type
+      0,                   // arf_src_offset
+      0,                   // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->alt_fb_idx (ALTREF_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx ===> cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 6 (Frame 3)
+      LF_UPDATE,  // update_type
+      0,          // arf_src_offset
+      0,          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx ===> cpi->alt2_fb_idx (ALTREF2_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx ===> cpi->alt_fb_idx (ALTREF_FRAME)
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 7 (Frame 4 - OVERLAY)
+      INTNL_OVERLAY_UPDATE,  // update_type
+      0,                     // arf_src_offset
+      0,                     // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      BWDREF_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME  // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 8 (Frame 6)
+      BRF_UPDATE,  // update_type
+      0,           // arf_src_offset
+      1,           // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx -> cpi->bwd_fb_idx (BWDREF_FRAME)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF2_FRAME,  // Index (current) of reference to get updated
+      BWDREF_FRAME    // cpi->refresh_bwd_frame = 1
+  },
+  {
+      // gf_group->index == 9 (Frame 5)
+      LAST_BIPRED_UPDATE,  // update_type
+      0,                   // arf_src_offset
+      0,                   // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 10 (Frame 7)
+      LF_UPDATE,  // update_type
+      0,          // arf_src_offset
+      0,          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 11 (Frame 8 - OVERLAY)
+      INTNL_OVERLAY_UPDATE,  // update_type
+      0,                     // arf_src_offset
+      0,                     // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      BWDREF_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME  // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 12 (Frame 12)
+      INTNL_ARF_UPDATE,           // update_type
+      (GF_INTERVAL_16 >> 2) - 1,  // arf_src_offset
+      0,                          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    //  cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      //  cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF2_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME   // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 13 (Frame 10)
+      BRF_UPDATE,  // update_type
+      0,           // arf_src_offset
+      1,           // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF2_FRAME,  // Index (current) of reference to get updated
+      BWDREF_FRAME    // cpi->refresh_bwd_frame = 1
+  },
+  {
+      // gf_group->index == 14 (Frame 9)
+      LAST_BIPRED_UPDATE,  // update_type
+      0,                   // arf_src_offset
+      0,                   // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 15 (Frame 11)
+      LF_UPDATE,  // update_type
+      0,          // arf_src_offset
+      0,          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx ===> cpi->bwd_fb_idx (BWDREF_FRAME)
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 16 (Frame 12 - OVERLAY)
+      INTNL_OVERLAY_UPDATE,  // update_type
+      0,                     // arf_src_offset
+      0,                     // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      BWDREF_FRAME,  // Index (current) of reference to get updated
+      ALTREF2_FRAME  // cpi->refresh_alt2_ref_frame = 1
+  },
+  {
+      // gf_group->index == 17 (Frame 14)
+      BRF_UPDATE,  // update_type
+      0,           // arf_src_offset
+      1,           // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      BWDREF_FRAME,  // Index (current) of reference to get updated
+      BWDREF_FRAME   // cpi->refresh_bwd_frame = 1
+  },
+  {
+      // gf_group->index == 18 (Frame 13)
+      LAST_BIPRED_UPDATE,  // update_type
+      0,                   // arf_src_offset
+      0,                   // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 19 (Frame 15)
+      LF_UPDATE,  // update_type
+      0,          // arf_src_offset
+      0,          // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx ===> cpi->lst_fb_idxes[LAST_FRAME -
+                      // LAST_FRAME]
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      LAST3_FRAME,  // Index (current) of reference to get updated
+      LAST_FRAME    // cpi->refresh_last_frame = 1
+  },
+  {
+      // gf_group->index == 20 (Frame 16 - OVERLAY: Belonging to the next GF
+      // group)
+      OVERLAY_UPDATE,  // update_type
+      0,               // arf_src_offset
+      0,               // brf_src_offset
+      // Reference frame indexes (previous ===> current)
+      LAST3_FRAME,    // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]
+      LAST_FRAME,     // cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME]
+      LAST2_FRAME,    // cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] ===>
+                      // cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME]
+      GOLDEN_FRAME,   // cpi->gld_fb_idx (GOLDEN_FRAME)
+      BWDREF_FRAME,   // cpi->bwd_fb_idx (BWDREF_FRAME)
+      ALTREF2_FRAME,  // cpi->alt2_fb_idx (ALTREF2_FRAME)
+      ALTREF_FRAME,   // cpi->alt_fb_idx (ALTREF_FRAME)
+      REF_FRAMES,     // cpi->ext_fb_idx (extra ref frame)
+      // Refreshment (index, flag)
+      ALTREF_FRAME,  // Index (current) of reference to get updated
+      GOLDEN_FRAME   // cpi->refresh_golden_frame = 1
+  }
+};
+
+// === GF Group of 16 ===
+static void define_gf_group_structure_16(AV1_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
-  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   TWO_PASS *const twopass = &cpi->twopass;
   GF_GROUP *const gf_group = &twopass->gf_group;
-  FIRSTPASS_STATS frame_stats;
+  const int key_frame = cpi->common.frame_type == KEY_FRAME;
+
+  assert(rc->baseline_gf_interval == GF_INTERVAL_16);
+
+  // Total number of frames to consider for GF group of 16:
+  //   = GF group interval + number of OVERLAY's
+  //   = rc->baseline_gf_interval + MAX_EXT_ARFS + 1 + 1
+  // NOTE: The OVERLAY frame for the next GF group also needs to consider to
+  //       prepare for the reference frame index mapping.
+
+  const int gf_update_frames = rc->baseline_gf_interval + MAX_EXT_ARFS + 2;
+
+  for (int frame_index = 0; frame_index < gf_update_frames; ++frame_index) {
+    int param_idx = 0;
+
+    // Treat KEY_FRAME differently
+    if (frame_index == 0 && key_frame) {
+      gf_group->update_type[frame_index] = KF_UPDATE;
+
+      gf_group->rf_level[frame_index] = KF_STD;
+      gf_group->arf_src_offset[frame_index] = 0;
+      gf_group->brf_src_offset[frame_index] = 0;
+      gf_group->bidir_pred_enabled[frame_index] = 0;
+      for (int ref_idx = 0; ref_idx < REF_FRAMES; ++ref_idx)
+        gf_group->ref_fb_idx_map[frame_index][ref_idx] = ref_idx;
+      gf_group->refresh_idx[frame_index] =
+          cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME];
+      gf_group->refresh_flag[frame_index] =
+          cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME];
+
+      continue;
+    }
+
+    // == update_type ==
+    gf_group->update_type[frame_index] =
+        gf16_multi_layer_params[frame_index][param_idx++];
+
+    // == rf_level ==
+    // Derive rf_level from update_type
+    switch (gf_group->update_type[frame_index]) {
+      case LF_UPDATE: gf_group->rf_level[frame_index] = INTER_NORMAL; break;
+      case ARF_UPDATE: gf_group->rf_level[frame_index] = GF_ARF_LOW; break;
+      case OVERLAY_UPDATE:
+        gf_group->rf_level[frame_index] = INTER_NORMAL;
+        break;
+      case BRF_UPDATE: gf_group->rf_level[frame_index] = GF_ARF_LOW; break;
+      case LAST_BIPRED_UPDATE:
+        gf_group->rf_level[frame_index] = INTER_NORMAL;
+        break;
+      case BIPRED_UPDATE: gf_group->rf_level[frame_index] = INTER_NORMAL; break;
+      case INTNL_ARF_UPDATE:
+        gf_group->rf_level[frame_index] = GF_ARF_LOW;
+        break;
+      case INTNL_OVERLAY_UPDATE:
+        gf_group->rf_level[frame_index] = INTER_NORMAL;
+        break;
+      default: gf_group->rf_level[frame_index] = INTER_NORMAL; break;
+    }
+
+    // == arf_src_offset ==
+    gf_group->arf_src_offset[frame_index] =
+        gf16_multi_layer_params[frame_index][param_idx++];
+
+    // == brf_src_offset ==
+    gf_group->brf_src_offset[frame_index] =
+        gf16_multi_layer_params[frame_index][param_idx++];
+
+    // == bidir_pred_enabled ==
+    // Derive bidir_pred_enabled from bidir_src_offset
+    gf_group->bidir_pred_enabled[frame_index] =
+        gf_group->brf_src_offset[frame_index] ? 1 : 0;
+
+    // == ref_fb_idx_map ==
+    for (int ref_idx = 0; ref_idx < REF_FRAMES; ++ref_idx)
+      gf_group->ref_fb_idx_map[frame_index][ref_idx] =
+          gf16_multi_layer_params[frame_index][param_idx++];
+
+    // == refresh_idx ==
+    gf_group->refresh_idx[frame_index] =
+        gf16_multi_layer_params[frame_index][param_idx++];
+
+    // == refresh_flag ==
+    gf_group->refresh_flag[frame_index] =
+        gf16_multi_layer_params[frame_index][param_idx];
+  }
+
+  // Mark the ARF_UPDATE / INTNL_ARF_UPDATE and OVERLAY_UPDATE /
+  // INTNL_OVERLAY_UPDATE for rate allocation
+  // NOTE: Indexes are designed in the display order backward:
+  //       ALT[3] .. ALT[2] .. ALT[1] .. ALT[0],
+  //       but their coding order is as follows:
+  // ALT0-ALT2-ALT3 .. OVERLAY3 .. OVERLAY2-ALT1 .. OVERLAY1 .. OVERLAY0
+
+  const int num_arfs_in_gf = cpi->num_extra_arfs + 1;
+  const int sub_arf_interval = rc->baseline_gf_interval / num_arfs_in_gf;
+
+  // == arf_pos_for_ovrly ==: Position for OVERLAY
+  for (int arf_idx = 0; arf_idx < num_arfs_in_gf; arf_idx++) {
+    const int prior_num_arfs =
+        (arf_idx <= 1) ? num_arfs_in_gf : (num_arfs_in_gf - 1);
+    cpi->arf_pos_for_ovrly[arf_idx] =
+        sub_arf_interval * (num_arfs_in_gf - arf_idx) + prior_num_arfs;
+  }
+
+  // == arf_pos_in_gf ==: Position for ALTREF
+  cpi->arf_pos_in_gf[0] = 1;
+  cpi->arf_pos_in_gf[1] = cpi->arf_pos_for_ovrly[2] + 1;
+  cpi->arf_pos_in_gf[2] = 2;
+  cpi->arf_pos_in_gf[3] = 3;
+
+  // == arf_update_idx ==
+  // == arf_ref_idx ==
+  // NOTE: Due to the hierarchical nature of GF16, these two parameters only
+  //       relect the index to the nearest future overlay.
+  int start_frame_index = 0;
+  for (int arf_idx = (num_arfs_in_gf - 1); arf_idx >= 0; --arf_idx) {
+    const int end_frame_index = cpi->arf_pos_for_ovrly[arf_idx];
+    for (int frame_index = start_frame_index; frame_index <= end_frame_index;
+         ++frame_index) {
+      gf_group->arf_update_idx[frame_index] = arf_idx;
+      gf_group->arf_ref_idx[frame_index] = arf_idx;
+    }
+    start_frame_index = end_frame_index + 1;
+  }
+}
+#endif  // USE_GF16_MULTI_LAYER
+#endif  // CONFIG_EXT_REFS
+
+static void define_gf_group_structure(AV1_COMP *cpi) {
+  RATE_CONTROL *const rc = &cpi->rc;
+
+#if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+  if (rc->baseline_gf_interval == 16) {
+    define_gf_group_structure_16(cpi);
+    return;
+  }
+#endif  // USE_GF16_MULTI_LAYER
+#endif  // CONFIG_EXT_REFS
+
+  TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
   int i;
   int frame_index = 0;
-  int target_frame_size;
-  int key_frame;
-  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
-  int64_t total_group_bits = gf_group_bits;
-  double modified_err = 0.0;
-  double err_fraction;
-  int mid_boost_bits = 0;
+  const int key_frame = cpi->common.frame_type == KEY_FRAME;
+
 #if CONFIG_EXT_REFS
   // The use of bi-predictive frames are only enabled when following 3
   // conditions are met:
-  // (1) Alt-ref is enabled;
+  // (1) ALTREF is enabled;
   // (2) The bi-predictive group interval is at least 2; and
   // (3) The bi-predictive group interval is strictly smaller than the
   //     golden group interval.
   const int is_bipred_enabled =
-#if CONFIG_FLEX_REFS
-      cpi->bwd_ref_allowed &&
-#endif
-      rc->source_alt_ref_pending && rc->bipred_group_interval &&
+      cpi->bwd_ref_allowed && rc->source_alt_ref_pending &&
+      rc->bipred_group_interval &&
       rc->bipred_group_interval <=
           (rc->baseline_gf_interval - rc->source_alt_ref_pending);
   int bipred_group_end = 0;
   int bipred_frame_index = 0;
 
-  int arf_pos[MAX_EXT_ARFS + 1];
   const unsigned char ext_arf_interval =
       (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1);
   int which_arf = cpi->num_extra_arfs;
   int subgroup_interval[MAX_EXT_ARFS + 1];
-  int ext_arf_boost[MAX_EXT_ARFS];
   int is_sg_bipred_enabled = is_bipred_enabled;
   int accumulative_subgroup_interval = 0;
 #else
@@ -1714,27 +2293,20 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
   unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
 #endif  // CONFIG_EXT_REFS
 
-#if CONFIG_EXT_REFS
-  av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
-#endif  // CONFIG_EXT_REFS
-
-  key_frame = cpi->common.frame_type == KEY_FRAME;
-
 #if !CONFIG_EXT_REFS
   get_arf_buffer_indices(arf_buffer_indices);
 #endif  // !CONFIG_EXT_REFS
 
   // For key frames the frame target rate is already set and it
   // is also the golden frame.
+  // === [frame_index == 0] ===
   if (!key_frame) {
     if (rc->source_alt_ref_active) {
       gf_group->update_type[frame_index] = OVERLAY_UPDATE;
       gf_group->rf_level[frame_index] = INTER_NORMAL;
-      gf_group->bit_allocation[frame_index] = 0;
     } else {
       gf_group->update_type[frame_index] = GF_UPDATE;
       gf_group->rf_level[frame_index] = GF_ARF_STD;
-      gf_group->bit_allocation[frame_index] = gf_arf_bits;
     }
 #if CONFIG_EXT_REFS
     gf_group->arf_update_idx[frame_index] = 0;
@@ -1743,8 +2315,6 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
     gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
 #endif  // CONFIG_EXT_REFS
-    // Step over the golden frame / overlay frame
-    if (EOF == input_stats(twopass, &frame_stats)) return;
   }
 
 #if CONFIG_EXT_REFS
@@ -1752,22 +2322,16 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
   gf_group->brf_src_offset[frame_index] = 0;
 #endif  // CONFIG_EXT_REFS
 
-  // Deduct the boost bits for arf (or gf if it is not a key frame)
-  // from the group total.
-  if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
-
   frame_index++;
 
 #if CONFIG_EXT_REFS
   bipred_frame_index++;
 #endif  // CONFIG_EXT_REFS
 
-  // Store the bits to spend on the ARF if there is one.
+  // === [frame_index == 1] ===
   if (rc->source_alt_ref_pending) {
     gf_group->update_type[frame_index] = ARF_UPDATE;
     gf_group->rf_level[frame_index] = GF_ARF_STD;
-    gf_group->bit_allocation[frame_index] = gf_arf_bits;
-
     gf_group->arf_src_offset[frame_index] =
         (unsigned char)(rc->baseline_gf_interval - 1);
 
@@ -1792,34 +2356,38 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0
     // but code them in the following order:
     // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0
-    arf_pos[0] =
-        frame_index + cpi->num_extra_arfs + gf_group->arf_src_offset[1] + 1;
+    //
+    // arf_pos_for_ovrly[]: Position for OVERLAY
+    // arf_pos_in_gf[]:     Position for ALTREF
+    cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs +
+                                gf_group->arf_src_offset[frame_index] + 1;
     for (i = 0; i < cpi->num_extra_arfs; ++i) {
-      arf_pos[i + 1] =
+      cpi->arf_pos_for_ovrly[i + 1] =
           frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2);
-      subgroup_interval[i] = arf_pos[i] - arf_pos[i + 1] - (i == 0 ? 1 : 2);
+      subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] -
+                             cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2);
     }
-    subgroup_interval[cpi->num_extra_arfs] = arf_pos[cpi->num_extra_arfs] -
-                                             frame_index -
-                                             (cpi->num_extra_arfs == 0 ? 1 : 2);
+    subgroup_interval[cpi->num_extra_arfs] =
+        cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index -
+        (cpi->num_extra_arfs == 0 ? 1 : 2);
 #endif  // CONFIG_EXT_REFS
 
     ++frame_index;
 
 #if CONFIG_EXT_REFS
     // Insert an extra ARF
+    // === [frame_index == 2] ===
     if (cpi->num_extra_arfs) {
-      gf_group->update_type[frame_index] = ARF_UPDATE;
-      // Note (weitinglin): GF_ARF_LOW is also used as an identifier
-      //                    for internal ALT_REF's:
+      gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
       gf_group->rf_level[frame_index] = GF_ARF_LOW;
       gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+
       gf_group->arf_update_idx[frame_index] = which_arf;
       gf_group->arf_ref_idx[frame_index] = 0;
       ++frame_index;
     }
     accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs];
-#else
+#else   // !CONFIG_EXT_ARFS
     if (cpi->multi_arf_enabled) {
       // Set aside a slot for a level 1 arf.
       gf_group->update_type[frame_index] = ARF_UPDATE;
@@ -1838,30 +2406,14 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
   mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
 #endif  // !CONFIG_EXT_REFS
 
-  // Allocate bits to the other frames in the group.
   for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
 #if !CONFIG_EXT_REFS
     int arf_idx = 0;
-#endif  // !CONFIG_EXT_REFS
-
-    if (EOF == input_stats(twopass, &frame_stats)) break;
-
-    modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
-
-    if (group_error > 0)
-      err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
-    else
-      err_fraction = 0.0;
-
-    target_frame_size = (int)((double)total_group_bits * err_fraction);
 
     if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
-      mid_boost_bits += (target_frame_size >> 4);
-      target_frame_size -= (target_frame_size >> 4);
-#if !CONFIG_EXT_REFS
       if (frame_index <= mid_frame_idx) arf_idx = 1;
-#endif  // !CONFIG_EXT_REFS
     }
+#endif  // !CONFIG_EXT_REFS
 
 #if CONFIG_EXT_REFS
     gf_group->arf_update_idx[frame_index] = which_arf;
@@ -1871,12 +2423,12 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
 #endif  // CONFIG_EXT_REFS
 
-    target_frame_size =
-        clamp(target_frame_size, 0, AOMMIN(max_bits, (int)total_group_bits));
-
 #if CONFIG_EXT_REFS
-    // If we are going to have ARFs, check if we can have BWDREF in this
-    // subgroup.
+    // If we are going to have ARFs, check whether we can have BWDREF in this
+    // subgroup, and further, whether we can have ARF subgroup which contains
+    // the BWDREF subgroup but contained within the GF group:
+    //
+    // GF group --> ARF subgroup --> BWDREF subgroup
     if (rc->source_alt_ref_pending) {
       is_sg_bipred_enabled =
           is_bipred_enabled &&
@@ -1890,24 +2442,26 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     if (is_sg_bipred_enabled && !bipred_group_end) {
       const int cur_brf_src_offset = rc->bipred_group_interval - 1;
 
-      // --- BRF_UPDATE ---
       if (bipred_frame_index == 1) {
+        // --- BRF_UPDATE ---
         gf_group->update_type[frame_index] = BRF_UPDATE;
-        gf_group->bidir_pred_enabled[frame_index] = 1;
+        gf_group->rf_level[frame_index] = GF_ARF_LOW;
         gf_group->brf_src_offset[frame_index] = cur_brf_src_offset;
-        // --- LAST_BIPRED_UPDATE ---
       } else if (bipred_frame_index == rc->bipred_group_interval) {
+        // --- LAST_BIPRED_UPDATE ---
         gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE;
-        gf_group->bidir_pred_enabled[frame_index] = 1;
+        gf_group->rf_level[frame_index] = INTER_NORMAL;
         gf_group->brf_src_offset[frame_index] = 0;
+
         // Reset the bi-predictive frame index.
         bipred_frame_index = 0;
-        // --- BIPRED_UPDATE ---
       } else {
+        // --- BIPRED_UPDATE ---
         gf_group->update_type[frame_index] = BIPRED_UPDATE;
-        gf_group->bidir_pred_enabled[frame_index] = 1;
+        gf_group->rf_level[frame_index] = INTER_NORMAL;
         gf_group->brf_src_offset[frame_index] = 0;
       }
+      gf_group->bidir_pred_enabled[frame_index] = 1;
 
       bipred_frame_index++;
       // Check whether the next bi-predictive frame group would entirely be
@@ -1920,51 +2474,30 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     } else {
 #endif  // CONFIG_EXT_REFS
       gf_group->update_type[frame_index] = LF_UPDATE;
+      gf_group->rf_level[frame_index] = INTER_NORMAL;
 #if CONFIG_EXT_REFS
       gf_group->bidir_pred_enabled[frame_index] = 0;
       gf_group->brf_src_offset[frame_index] = 0;
     }
 #endif  // CONFIG_EXT_REFS
 
-#if CONFIG_EXT_REFS
-    if (gf_group->update_type[frame_index] == BRF_UPDATE) {
-      // Boost up the allocated bits on BWDREF_FRAME
-      gf_group->rf_level[frame_index] = GF_ARF_LOW;
-      gf_group->bit_allocation[frame_index] =
-          target_frame_size + (target_frame_size >> 2);
-    } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
-      // Press down the allocated bits on LAST_BIPRED_UPDATE frames
-      gf_group->rf_level[frame_index] = INTER_NORMAL;
-      gf_group->bit_allocation[frame_index] =
-          target_frame_size - (target_frame_size >> 1);
-    } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
-      // TODO(zoeliu): To investigate whether the allocated bits on
-      // BIPRED_UPDATE frames need to be further adjusted.
-      gf_group->rf_level[frame_index] = INTER_NORMAL;
-      gf_group->bit_allocation[frame_index] = target_frame_size;
-    } else {
-#endif  // CONFIG_EXT_REFS
-      gf_group->rf_level[frame_index] = INTER_NORMAL;
-      gf_group->bit_allocation[frame_index] = target_frame_size;
-#if CONFIG_EXT_REFS
-    }
-#endif  // CONFIG_EXT_REFS
-
     ++frame_index;
 
 #if CONFIG_EXT_REFS
-    // Check if we need to update the ARF
+    // Check if we need to update the ARF.
     if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 &&
-        frame_index > arf_pos[which_arf]) {
+        frame_index > cpi->arf_pos_for_ovrly[which_arf]) {
       --which_arf;
       accumulative_subgroup_interval += subgroup_interval[which_arf] + 1;
-      // Meet the new subgroup. Reset the bipred_group_end flag;
+
+      // Meet the new subgroup; Reset the bipred_group_end flag.
       bipred_group_end = 0;
       // Insert another extra ARF after the overlay frame
       if (which_arf) {
-        gf_group->update_type[frame_index] = ARF_UPDATE;
+        gf_group->update_type[frame_index] = INTNL_ARF_UPDATE;
         gf_group->rf_level[frame_index] = GF_ARF_LOW;
         gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+
         gf_group->arf_update_idx[frame_index] = which_arf;
         gf_group->arf_ref_idx[frame_index] = 0;
         ++frame_index;
@@ -1973,10 +2506,9 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
 #endif  // CONFIG_EXT_REFS
   }
 
-// Note:
-// We need to configure the frame at the end of the sequence + 1 that will be
-// the start frame for the next group. Otherwise prior to the call to
-// av1_rc_get_second_pass_params() the data will be undefined.
+// NOTE: We need to configure the frame at the end of the sequence + 1 that will
+//       be the start frame for the next group. Otherwise prior to the call to
+//       av1_rc_get_second_pass_params() the data will be undefined.
 #if CONFIG_EXT_REFS
   gf_group->arf_update_idx[frame_index] = 0;
   gf_group->arf_ref_idx[frame_index] = 0;
@@ -1990,23 +2522,22 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
     gf_group->rf_level[frame_index] = INTER_NORMAL;
 
 #if CONFIG_EXT_REFS
+    cpi->arf_pos_in_gf[0] = 1;
     if (cpi->num_extra_arfs) {
+      // Overwrite the update_type for extra-ARF's corresponding internal
+      // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE.
       for (i = cpi->num_extra_arfs; i > 0; --i) {
-        int arf_pos_in_gf = (i == cpi->num_extra_arfs ? 2 : arf_pos[i + 1] + 1);
-        gf_group->bit_allocation[arf_pos_in_gf] =
-            gf_group->bit_allocation[arf_pos[i]];
-        gf_group->update_type[arf_pos[i]] = INTNL_OVERLAY_UPDATE;
-        gf_group->bit_allocation[arf_pos[i]] = 0;
-        gf_group->rf_level[arf_pos[i]] = INTER_NORMAL;
+        cpi->arf_pos_in_gf[i] =
+            (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1);
+
+        gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE;
+        gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL;
       }
     }
 #else
     // Final setup for second arf and its overlay.
     if (cpi->multi_arf_enabled) {
-      gf_group->bit_allocation[2] =
-          gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
       gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
-      gf_group->bit_allocation[mid_frame_idx] = 0;
     }
 #endif  // CONFIG_EXT_REFS
   } else {
@@ -2018,6 +2549,168 @@ static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
   gf_group->bidir_pred_enabled[frame_index] = 0;
   gf_group->brf_src_offset[frame_index] = 0;
 #endif  // CONFIG_EXT_REFS
+}
+
+static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits,
+                                   double group_error, int gf_arf_bits) {
+  RATE_CONTROL *const rc = &cpi->rc;
+  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
+  TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
+  FIRSTPASS_STATS frame_stats;
+  int i;
+  int frame_index = 0;
+  int target_frame_size;
+  int key_frame;
+  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+  int64_t total_group_bits = gf_group_bits;
+  double modified_err = 0.0;
+  double err_fraction;
+  int mid_boost_bits = 0;
+#if CONFIG_EXT_REFS
+  int ext_arf_boost[MAX_EXT_ARFS];
+#else
+  int mid_frame_idx;
+#endif  // CONFIG_EXT_REFS
+
+  define_gf_group_structure(cpi);
+
+#if CONFIG_EXT_REFS
+  av1_zero_array(ext_arf_boost, MAX_EXT_ARFS);
+#endif  // CONFIG_EXT_REFS
+
+  key_frame = cpi->common.frame_type == KEY_FRAME;
+
+  // For key frames the frame target rate is already set and it
+  // is also the golden frame.
+  // === [frame_index == 0] ===
+  if (!key_frame) {
+    if (rc->source_alt_ref_active)
+      gf_group->bit_allocation[frame_index] = 0;
+    else
+      gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+    // Step over the golden frame / overlay frame
+    if (EOF == input_stats(twopass, &frame_stats)) return;
+  }
+
+  // Deduct the boost bits for arf (or gf if it is not a key frame)
+  // from the group total.
+  if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+
+  frame_index++;
+
+  // Store the bits to spend on the ARF if there is one.
+  // === [frame_index == 1] ===
+  if (rc->source_alt_ref_pending) {
+    gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+    ++frame_index;
+
+#if CONFIG_EXT_REFS
+    // Skip all the extra-ARF's right after ARF at the starting segment of
+    // the current GF group.
+    if (cpi->num_extra_arfs) {
+      while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+        ++frame_index;
+    }
+#else   // !CONFIG_EXT_ARFS
+    // Set aside a slot for a level 1 arf.
+    if (cpi->multi_arf_enabled) ++frame_index;
+#endif  // CONFIG_EXT_ARFS
+  }
+
+#if !CONFIG_EXT_REFS
+  // Define middle frame
+  mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
+#endif  // !CONFIG_EXT_REFS
+
+  // Allocate bits to the other frames in the group.
+  for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
+    if (EOF == input_stats(twopass, &frame_stats)) break;
+
+    modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
+
+    if (group_error > 0)
+      err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
+    else
+      err_fraction = 0.0;
+
+    target_frame_size = (int)((double)total_group_bits * err_fraction);
+
+    if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+      mid_boost_bits += (target_frame_size >> 4);
+      target_frame_size -= (target_frame_size >> 4);
+    }
+
+    target_frame_size =
+        clamp(target_frame_size, 0, AOMMIN(max_bits, (int)total_group_bits));
+
+#if CONFIG_EXT_REFS
+    if (gf_group->update_type[frame_index] == BRF_UPDATE) {
+      // Boost up the allocated bits on BWDREF_FRAME
+      gf_group->bit_allocation[frame_index] =
+          target_frame_size + (target_frame_size >> 2);
+    } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
+      // Press down the allocated bits on LAST_BIPRED_UPDATE frames
+      gf_group->bit_allocation[frame_index] =
+          target_frame_size - (target_frame_size >> 1);
+    } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
+      // TODO(zoeliu): To investigate whether the allocated bits on
+      // BIPRED_UPDATE frames need to be further adjusted.
+      gf_group->bit_allocation[frame_index] = target_frame_size;
+    } else {
+      assert(gf_group->update_type[frame_index] == LF_UPDATE ||
+             gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE);
+#endif  // CONFIG_EXT_REFS
+      gf_group->bit_allocation[frame_index] = target_frame_size;
+#if CONFIG_EXT_REFS
+    }
+#endif  // CONFIG_EXT_REFS
+
+    ++frame_index;
+
+#if CONFIG_EXT_REFS
+    // Skip all the extra-ARF's.
+    if (cpi->num_extra_arfs) {
+      while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE)
+        ++frame_index;
+    }
+#endif  // CONFIG_EXT_REFS
+  }
+
+  // NOTE: We need to configure the frame at the end of the sequence + 1 that
+  //       will be the start frame for the next group. Otherwise prior to the
+  //       call to av1_rc_get_second_pass_params() the data will be undefined.
+  if (rc->source_alt_ref_pending) {
+#if CONFIG_EXT_REFS
+    if (cpi->num_extra_arfs) {
+      // NOTE: For bit allocation, move the allocated bits associated with
+      //       INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE.
+      //       i > 0 for extra-ARF's and i == 0 for ARF:
+      //         arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE
+      //         arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE
+      for (i = cpi->num_extra_arfs; i > 0; --i) {
+        assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] ==
+               INTNL_OVERLAY_UPDATE);
+
+        // Encoder's choice:
+        //   Set show_existing_frame == 1 for all extra-ARF's, and hence
+        //   allocate zero bit for both all internal OVERLAY frames.
+        gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] =
+            gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]];
+        gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0;
+      }
+    }
+#else
+    // Final setup for second arf and its overlay.
+    if (cpi->multi_arf_enabled) {
+      gf_group->bit_allocation[2] =
+          gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
+      gf_group->bit_allocation[mid_frame_idx] = 0;
+    }
+#endif  // CONFIG_EXT_REFS
+  }
 
   // Note whether multi-arf was enabled this group for next time.
   cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
@@ -2068,10 +2761,10 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   const int is_key_frame = frame_is_intra_only(cm);
   const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
 
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS
   cpi->extra_arf_allowed = 1;
   cpi->bwd_ref_allowed = 1;
-#endif
+#endif  // CONFIG_EXT_REFS
 
   // Reset the GF group data structures unless this is a key
   // frame in which case it will already have been done.
@@ -2133,11 +2826,15 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     }
   }
 
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
   double avg_sr_coded_error = 0;
   double avg_raw_err_stdev = 0;
   int non_zero_stdev_count = 0;
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+  double avg_pcnt_second_ref = 0;
+  int non_zero_pcnt_second_ref_count = 0;
+#endif
 
   i = 0;
   while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
@@ -2162,14 +2859,20 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     accumulate_frame_motion_stats(
         &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
         &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
     // sum up the metric values of current gf group
     avg_sr_coded_error += next_frame.sr_coded_error;
-    if (next_frame.raw_error_stdev) {
+    if (fabs(next_frame.raw_error_stdev) > 0.000001) {
       non_zero_stdev_count++;
       avg_raw_err_stdev += next_frame.raw_error_stdev;
     }
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+    if (this_frame->pcnt_second_ref) {
+      avg_pcnt_second_ref += this_frame->pcnt_second_ref;
+    }
+    non_zero_pcnt_second_ref_count++;
+#endif  // CONFIG_BGSPRITE
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
@@ -2209,8 +2912,18 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
              (abs_mv_in_out_accumulator > 3.0) ||
              (mv_in_out_accumulator < -2.0) ||
              ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
-      boost_score = old_boost_score;
-      break;
+#if CONFIG_EXT_REFS
+      // If GF group interval is < 12, we force it to be 8. Otherwise,
+      // if it is >= 12, we keep it as is.
+      // NOTE: 'i' is 1 more than the GF group interval candidate that is being
+      //       checked.
+      if (i == (8 + 1) || i >= (12 + 1)) {
+#endif  // CONFIG_EXT_REFS
+        boost_score = old_boost_score;
+        break;
+#if CONFIG_EXT_REFS
+      }
+#endif  // CONFIG_EXT_REFS
     }
 
     *this_frame = next_frame;
@@ -2221,6 +2934,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Was the group length constrained by the requirement for a new KF?
   rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
 
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
+  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+                                                             : cpi->common.MBs;
+  assert(num_mbs > 0);
+  if (i) avg_sr_coded_error /= i;
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+
   // Should we use the alternate reference frame.
   if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
       (i >= rc->min_gf_interval)) {
@@ -2235,6 +2955,17 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
          (zero_motion_accumulator < 0.995))
             ? 1
             : 0;
+#if CONFIG_BGSPRITE
+    if (non_zero_pcnt_second_ref_count) {
+      avg_pcnt_second_ref /= non_zero_pcnt_second_ref_count;
+    }
+
+    cpi->bgsprite_allowed = 1;
+    if (abs_mv_in_out_accumulator > 0.30 || decay_accumulator < 0.90 ||
+        avg_sr_coded_error / num_mbs < 20 || avg_pcnt_second_ref < 0.30) {
+      cpi->bgsprite_allowed = 0;
+    }
+#endif  // CONFIG_BGSPRITE
   } else {
     rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST);
     rc->source_alt_ref_pending = 0;
@@ -2243,19 +2974,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Set the interval until the next gf.
   rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
 #if CONFIG_EXT_REFS
-#if CONFIG_FLEX_REFS
-  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
-                                                             : cpi->common.MBs;
-  if (i) avg_sr_coded_error /= i;
   if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
 
-  // Disable extra alter refs and backward ref for "still" gf group
-  // zero_motion_accumulator indicates the minimum percentage of (0, 0) motion
-  // in gf group
-  // avg_sr_coded_error indicates the average of the sse per pixel of each frame
-  // in gf group
-  // avg_raw_err_stdev indicates the average of the standard deviation of (0, 0)
-  // motion error per block of each frame in gf group
+  // Disable extra altrefs and backward refs for "still" gf group:
+  //   zero_motion_accumulator: minimum percentage of (0,0) motion;
+  //   avg_sr_coded_error:      average of the SSE per pixel of each frame;
+  //   avg_raw_err_stdev:       average of the standard deviation of (0,0)
+  //                            motion error per block of each frame.
   assert(num_mbs > 0);
   const int disable_bwd_extarf =
       (zero_motion_accumulator > MIN_ZERO_MOTION &&
@@ -2264,13 +2989,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   if (disable_bwd_extarf) cpi->extra_arf_allowed = cpi->bwd_ref_allowed = 0;
 
-  if (!cpi->extra_arf_allowed)
+  if (!cpi->extra_arf_allowed) {
     cpi->num_extra_arfs = 0;
-  else
-#endif  // CONFIG_FLEX_REFS
+  } else {
     // Compute how many extra alt_refs we can have
     cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
                                                    rc->source_alt_ref_pending);
+  }
   // Currently at maximum two extra ARFs' are allowed
   assert(cpi->num_extra_arfs <= MAX_EXT_ARFS);
 #endif  // CONFIG_EXT_REFS
@@ -2652,7 +3377,8 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       boost_score += (decay_accumulator * frame_boost);
     }
   }
-  av_decay_accumulator /= (double)loop_decay_counter;
+  if (loop_decay_counter > 0)
+    av_decay_accumulator /= (double)loop_decay_counter;
 
   reset_fpf_position(twopass, start_position);
 
@@ -2698,11 +3424,158 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   twopass->modified_error_left -= kf_group_err;
 }
 
+#if USE_GF16_MULTI_LAYER
+// === GF Group of 16 ===
+void av1_ref_frame_map_idx_updates(AV1_COMP *cpi, int gf_frame_index) {
+  TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
+
+  int ref_fb_idx_prev[REF_FRAMES];
+  int ref_fb_idx_curr[REF_FRAMES];
+
+  ref_fb_idx_prev[LAST_FRAME - LAST_FRAME] =
+      cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME];
+  ref_fb_idx_prev[LAST2_FRAME - LAST_FRAME] =
+      cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME];
+  ref_fb_idx_prev[LAST3_FRAME - LAST_FRAME] =
+      cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME];
+  ref_fb_idx_prev[GOLDEN_FRAME - LAST_FRAME] = cpi->gld_fb_idx;
+  ref_fb_idx_prev[BWDREF_FRAME - LAST_FRAME] = cpi->bwd_fb_idx;
+  ref_fb_idx_prev[ALTREF2_FRAME - LAST_FRAME] = cpi->alt2_fb_idx;
+  ref_fb_idx_prev[ALTREF_FRAME - LAST_FRAME] = cpi->alt_fb_idx;
+  ref_fb_idx_prev[REF_FRAMES - LAST_FRAME] = cpi->ext_fb_idx;
+
+  // Update map index for each reference frame
+  for (int ref_idx = 0; ref_idx < REF_FRAMES; ++ref_idx) {
+    int ref_frame = gf_group->ref_fb_idx_map[gf_frame_index][ref_idx];
+    ref_fb_idx_curr[ref_idx] = ref_fb_idx_prev[ref_frame - LAST_FRAME];
+  }
+
+  cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME] =
+      ref_fb_idx_curr[LAST_FRAME - LAST_FRAME];
+  cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME] =
+      ref_fb_idx_curr[LAST2_FRAME - LAST_FRAME];
+  cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME] =
+      ref_fb_idx_curr[LAST3_FRAME - LAST_FRAME];
+  cpi->gld_fb_idx = ref_fb_idx_curr[GOLDEN_FRAME - LAST_FRAME];
+  cpi->bwd_fb_idx = ref_fb_idx_curr[BWDREF_FRAME - LAST_FRAME];
+  cpi->alt2_fb_idx = ref_fb_idx_curr[ALTREF2_FRAME - LAST_FRAME];
+  cpi->alt_fb_idx = ref_fb_idx_curr[ALTREF_FRAME - LAST_FRAME];
+  cpi->ext_fb_idx = ref_fb_idx_curr[REF_FRAMES - LAST_FRAME];
+}
+
+// Define the reference buffers that will be updated post encode.
+static void configure_buffer_updates_16(AV1_COMP *cpi) {
+  TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
+
+  if (gf_group->update_type[gf_group->index] == KF_UPDATE) {
+    cpi->refresh_fb_idx = 0;
+
+    cpi->refresh_last_frame = 1;
+    cpi->refresh_golden_frame = 1;
+    cpi->refresh_bwd_ref_frame = 1;
+    cpi->refresh_alt2_ref_frame = 1;
+    cpi->refresh_alt_ref_frame = 1;
+
+    return;
+  }
+
+  // Update reference frame map indexes
+  av1_ref_frame_map_idx_updates(cpi, gf_group->index);
+
+  // Update refresh index
+  switch (gf_group->refresh_idx[gf_group->index]) {
+    case LAST_FRAME:
+      cpi->refresh_fb_idx = cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME];
+      break;
+
+    case LAST2_FRAME:
+      cpi->refresh_fb_idx = cpi->lst_fb_idxes[LAST2_FRAME - LAST_FRAME];
+      break;
+
+    case LAST3_FRAME:
+      cpi->refresh_fb_idx = cpi->lst_fb_idxes[LAST3_FRAME - LAST_FRAME];
+      break;
+
+    case GOLDEN_FRAME: cpi->refresh_fb_idx = cpi->gld_fb_idx; break;
+
+    case BWDREF_FRAME: cpi->refresh_fb_idx = cpi->bwd_fb_idx; break;
+
+    case ALTREF2_FRAME: cpi->refresh_fb_idx = cpi->alt2_fb_idx; break;
+
+    case ALTREF_FRAME: cpi->refresh_fb_idx = cpi->alt_fb_idx; break;
+
+    case REF_FRAMES: cpi->refresh_fb_idx = cpi->ext_fb_idx; break;
+
+    default: assert(0); break;
+  }
+
+  // Update refresh flags
+  switch (gf_group->refresh_flag[gf_group->index]) {
+    case LAST_FRAME:
+      cpi->refresh_last_frame = 1;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
+      cpi->refresh_alt_ref_frame = 0;
+      break;
+
+    case GOLDEN_FRAME:
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 1;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
+      cpi->refresh_alt_ref_frame = 0;
+      break;
+
+    case BWDREF_FRAME:
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 1;
+      cpi->refresh_alt2_ref_frame = 0;
+      cpi->refresh_alt_ref_frame = 0;
+      break;
+
+    case ALTREF2_FRAME:
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 1;
+      cpi->refresh_alt_ref_frame = 0;
+      break;
+
+    case ALTREF_FRAME:
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
+      cpi->refresh_alt_ref_frame = 1;
+      break;
+
+    default: assert(0); break;
+  }
+
+  switch (gf_group->update_type[gf_group->index]) {
+    case BRF_UPDATE: cpi->rc.is_bwd_ref_frame = 1; break;
+
+    case LAST_BIPRED_UPDATE: cpi->rc.is_last_bipred_frame = 1; break;
+
+    case BIPRED_UPDATE: cpi->rc.is_bipred_frame = 1; break;
+
+    case INTNL_OVERLAY_UPDATE: cpi->rc.is_src_frame_ext_arf = 1;
+    case OVERLAY_UPDATE: cpi->rc.is_src_frame_alt_ref = 1; break;
+
+    default: break;
+  }
+}
+#endif  // USE_GF16_MULTI_LAYER
+
 // Define the reference buffers that will be updated post encode.
 static void configure_buffer_updates(AV1_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
 
-  // Wei-Ting: Should we define another function to take care of
+  // NOTE(weitinglin): Should we define another function to take care of
   // cpi->rc.is_$Source_Type to make this function as it is in the comment?
 
   cpi->rc.is_src_frame_alt_ref = 0;
@@ -2711,45 +3584,42 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
   cpi->rc.is_last_bipred_frame = 0;
   cpi->rc.is_bipred_frame = 0;
   cpi->rc.is_src_frame_ext_arf = 0;
+
+#if USE_GF16_MULTI_LAYER
+  RATE_CONTROL *const rc = &cpi->rc;
+  if (rc->baseline_gf_interval == 16) {
+    configure_buffer_updates_16(cpi);
+    return;
+  }
+#endif  // USE_GF16_MULTI_LAYER
 #endif  // CONFIG_EXT_REFS
 
   switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
-    case KF_UPDATE:
+    case KF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 1;
 #if CONFIG_EXT_REFS
       cpi->refresh_bwd_ref_frame = 1;
+      cpi->refresh_alt2_ref_frame = 1;
 #endif  // CONFIG_EXT_REFS
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
       cpi->refresh_alt_ref_frame = 1;
       break;
 
-    case LF_UPDATE:
+    case LF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0;
 #if CONFIG_EXT_REFS
-      // If we have extra ALT_REFs, we can use the farthest ALT (ALT0) as
-      // the BWD_REF.
-      if (cpi->num_extra_arfs) {
-        int tmp = cpi->bwd_fb_idx;
-
-        cpi->bwd_fb_idx = cpi->alt_fb_idx;
-        cpi->alt_fb_idx = cpi->arf_map[0];
-        cpi->arf_map[0] = tmp;
-
-        cpi->rc.is_bwd_ref_frame = 1;
-      } else {
-        cpi->rc.is_bwd_ref_frame = 0;
-      }
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
       break;
 
     case GF_UPDATE:
+      // TODO(zoeliu): To further investigate whether 'refresh_last_frame' is
+      //               needed.
+      cpi->refresh_last_frame = 1;
+      cpi->refresh_golden_frame = 1;
 #if CONFIG_EXT_REFS
       cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
-      cpi->refresh_last_frame = 1;
-      cpi->refresh_golden_frame = 1;
       cpi->refresh_alt_ref_frame = 0;
       break;
 
@@ -2758,17 +3628,19 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
       cpi->refresh_golden_frame = 1;
 #if CONFIG_EXT_REFS
       cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
       cpi->refresh_alt_ref_frame = 0;
+
       cpi->rc.is_src_frame_alt_ref = 1;
       break;
 
-    case ARF_UPDATE:
+    case ARF_UPDATE: cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 0;
 #if CONFIG_EXT_REFS
-      cpi->refresh_bwd_ref_frame = 1;
+      // NOTE: BWDREF does not get updated along with ALTREF_FRAME.
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
 #endif  // CONFIG_EXT_REFS
-      cpi->refresh_last_frame = 0;
-      cpi->refresh_golden_frame = 0;
       cpi->refresh_alt_ref_frame = 1;
       break;
 
@@ -2777,26 +3649,19 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
       cpi->refresh_last_frame = 0;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_bwd_ref_frame = 1;
+      cpi->refresh_alt2_ref_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
+
       cpi->rc.is_bwd_ref_frame = 1;
-      if (cpi->num_extra_arfs) {
-        // Allow BRF use the farthest ALT_REF (ALT0) as BWD_REF by swapping
-        // the virtual indices.
-        // NOTE: The indices will be swapped back after this frame is encoded
-        //       (in av1_update_reference_frames()).
-        int tmp = cpi->bwd_fb_idx;
-
-        cpi->bwd_fb_idx = cpi->alt_fb_idx;
-        cpi->alt_fb_idx = cpi->arf_map[0];
-        cpi->arf_map[0] = tmp;
-      }
       break;
 
     case LAST_BIPRED_UPDATE:
       cpi->refresh_last_frame = 1;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
+
       cpi->rc.is_last_bipred_frame = 1;
       break;
 
@@ -2804,7 +3669,9 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
       cpi->refresh_last_frame = 1;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
+
       cpi->rc.is_bipred_frame = 1;
       break;
 
@@ -2812,10 +3679,20 @@ static void configure_buffer_updates(AV1_COMP *cpi) {
       cpi->refresh_last_frame = 1;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 0;
       cpi->refresh_alt_ref_frame = 0;
+
       cpi->rc.is_src_frame_alt_ref = 1;
       cpi->rc.is_src_frame_ext_arf = 1;
       break;
+
+    case INTNL_ARF_UPDATE:
+      cpi->refresh_last_frame = 0;
+      cpi->refresh_golden_frame = 0;
+      cpi->refresh_bwd_ref_frame = 0;
+      cpi->refresh_alt2_ref_frame = 1;
+      cpi->refresh_alt_ref_frame = 0;
+      break;
 #endif  // CONFIG_EXT_REFS
 
     default: assert(0); break;
@@ -2857,7 +3734,11 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
 
   // If this is an arf frame then we dont want to read the stats file or
   // advance the input pointer as we already have what we need.
-  if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
+  if (gf_group->update_type[gf_group->index] == ARF_UPDATE
+#if CONFIG_EXT_REFS
+      || gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE
+#endif  // CONFIG_EXT_REFS
+      ) {
     configure_buffer_updates(cpi);
     target_rate = gf_group->bit_allocation[gf_group->index];
     target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
@@ -2935,7 +3816,7 @@ void av1_rc_get_second_pass_params(AV1_COMP *cpi) {
       FILE *fpfile;
       fpfile = fopen("arf.stt", "a");
       ++arf_count;
-      fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame,
+      fprintf(fpfile, "%10d %10d %10d %10d %10d\n", cm->current_video_frame,
               rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
               rc->gfu_boost);
 
diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h
index 266766d99..9ac542bf3 100644
--- a/third_party/aom/av1/encoder/firstpass.h
+++ b/third_party/aom/av1/encoder/firstpass.h
@@ -12,6 +12,8 @@
 #ifndef AV1_ENCODER_FIRSTPASS_H_
 #define AV1_ENCODER_FIRSTPASS_H_
 
+#include "av1/common/enums.h"
+#include "av1/common/onyxc_int.h"
 #include "av1/encoder/lookahead.h"
 #include "av1/encoder/ratectrl.h"
 
@@ -45,19 +47,24 @@ typedef struct {
 // NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
 //       number of bi-predictive frames.
 #define BFG_INTERVAL 2
-// The maximum number of extra ALT_REF's
-// NOTE: This number cannot be greater than 2 or the reference frame buffer will
-//       overflow.
-#define MAX_EXT_ARFS 2
+// The maximum number of extra ALTREF's except ALTREF_FRAME
+// NOTE: REF_FRAMES indicates the maximum number of frames that may be buffered
+//       to serve as references. Currently REF_FRAMES == 8.
+#define USE_GF16_MULTI_LAYER 0
+
+#if USE_GF16_MULTI_LAYER
+#define MAX_EXT_ARFS (REF_FRAMES - BWDREF_FRAME)
+#else  // !USE_GF16_MULTI_LAYER
+#define MAX_EXT_ARFS (REF_FRAMES - BWDREF_FRAME - 1)
+#endif  // USE_GF16_MULTI_LAYER
+
 #define MIN_EXT_ARF_INTERVAL 4
-#endif  // CONFIG_EXT_REFS
 
-#if CONFIG_FLEX_REFS
 #define MIN_ZERO_MOTION 0.95
 #define MAX_SR_CODED_ERROR 40
 #define MAX_RAW_ERR_VAR 2000
 #define MIN_MV_IN_OUT 0.4
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
 
 #define VLOW_MOTION_THRESHOLD 950
 
@@ -84,10 +91,10 @@ typedef struct {
   double new_mv_count;
   double duration;
   double count;
-#if CONFIG_FLEX_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
   // standard deviation for (0, 0) motion prediction error
   double raw_error_stdev;
-#endif  // CONFIG_FLEX_REFS
+#endif  // CONFIG_EXT_REFS
 } FIRSTPASS_STATS;
 
 typedef enum {
@@ -101,8 +108,9 @@ typedef enum {
   LAST_BIPRED_UPDATE = 6,    // Last Bi-predictive Frame
   BIPRED_UPDATE = 7,         // Bi-predictive Frame, but not the last one
   INTNL_OVERLAY_UPDATE = 8,  // Internal Overlay Frame
-  FRAME_UPDATE_TYPES = 9
-#else
+  INTNL_ARF_UPDATE = 9,      // Internal Altref Frame (candidate for ALTREF2)
+  FRAME_UPDATE_TYPES = 10
+#else   // !CONFIG_EXT_REFS
   FRAME_UPDATE_TYPES = 5
 #endif  // CONFIG_EXT_REFS
 } FRAME_UPDATE_TYPE;
@@ -124,6 +132,9 @@ typedef struct {
 #if CONFIG_EXT_REFS
   unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
   unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1];
+  unsigned char ref_fb_idx_map[(MAX_LAG_BUFFERS * 2) + 1][REF_FRAMES];
+  unsigned char refresh_idx[(MAX_LAG_BUFFERS * 2) + 1];
+  unsigned char refresh_flag[(MAX_LAG_BUFFERS * 2) + 1];
 #endif  // CONFIG_EXT_REFS
   int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
 } GF_GROUP;
@@ -183,12 +194,15 @@ void av1_end_first_pass(struct AV1_COMP *cpi);
 
 void av1_init_second_pass(struct AV1_COMP *cpi);
 void av1_rc_get_second_pass_params(struct AV1_COMP *cpi);
-void av1_twopass_postencode_update(struct AV1_COMP *cpi);
 
 // Post encode update of the rate control parameters for 2-pass
 void av1_twopass_postencode_update(struct AV1_COMP *cpi);
 
 #if CONFIG_EXT_REFS
+#if USE_GF16_MULTI_LAYER
+void av1_ref_frame_map_idx_updates(struct AV1_COMP *cpi, int gf_frame_index);
+#endif  // USE_GF16_MULTI_LAYER
+
 static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
   if (arf_pending && MAX_EXT_ARFS > 0)
     return interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)
diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c
index 661a1feb4..4d44e9a6f 100644
--- a/third_party/aom/av1/encoder/global_motion.c
+++ b/third_party/aom/av1/encoder/global_motion.c
@@ -244,14 +244,18 @@ static unsigned char *downconvert_frame(YV12_BUFFER_CONFIG *frm,
                                         int bit_depth) {
   int i, j;
   uint16_t *orig_buf = CONVERT_TO_SHORTPTR(frm->y_buffer);
-  uint8_t *buf = malloc(frm->y_height * frm->y_stride * sizeof(*buf));
-
-  for (i = 0; i < frm->y_height; ++i)
-    for (j = 0; j < frm->y_width; ++j)
-      buf[i * frm->y_stride + j] =
-          orig_buf[i * frm->y_stride + j] >> (bit_depth - 8);
-
-  return buf;
+  uint8_t *buf_8bit = frm->y_buffer_8bit;
+  assert(buf_8bit);
+  if (!frm->buf_8bit_valid) {
+    for (i = 0; i < frm->y_height; ++i) {
+      for (j = 0; j < frm->y_width; ++j) {
+        buf_8bit[i * frm->y_stride + j] =
+            orig_buf[i * frm->y_stride + j] >> (bit_depth - 8);
+      }
+    }
+    frm->buf_8bit_valid = 1;
+  }
+  return buf_8bit;
 }
 #endif
 
@@ -274,16 +278,10 @@ int compute_global_motion_feature_based(
   if (frm->flags & YV12_FLAG_HIGHBITDEPTH) {
     // The frame buffer is 16-bit, so we need to convert to 8 bits for the
     // following code. We cache the result until the frame is released.
-    if (frm->y_buffer_8bit)
-      frm_buffer = frm->y_buffer_8bit;
-    else
-      frm_buffer = frm->y_buffer_8bit = downconvert_frame(frm, bit_depth);
+    frm_buffer = downconvert_frame(frm, bit_depth);
   }
   if (ref->flags & YV12_FLAG_HIGHBITDEPTH) {
-    if (ref->y_buffer_8bit)
-      ref_buffer = ref->y_buffer_8bit;
-    else
-      ref_buffer = ref->y_buffer_8bit = downconvert_frame(ref, bit_depth);
+    ref_buffer = downconvert_frame(ref, bit_depth);
   }
 #endif
 
diff --git a/third_party/aom/av1/encoder/hash.c b/third_party/aom/av1/encoder/hash.c
new file mode 100644
index 000000000..89c5bd8a3
--- /dev/null
+++ b/third_party/aom/av1/encoder/hash.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/encoder/hash.h"
+
+static void crc_calculator_process_data(CRC_CALCULATOR *p_crc_calculator,
+                                        uint8_t *pData, uint32_t dataLength) {
+  for (uint32_t i = 0; i < dataLength; i++) {
+    const uint8_t index =
+        (p_crc_calculator->remainder >> (p_crc_calculator->bits - 8)) ^
+        pData[i];
+    p_crc_calculator->remainder <<= 8;
+    p_crc_calculator->remainder ^= p_crc_calculator->table[index];
+  }
+}
+
+void crc_calculator_reset(CRC_CALCULATOR *p_crc_calculator) {
+  p_crc_calculator->remainder = 0;
+}
+
+static uint32_t crc_calculator_get_crc(CRC_CALCULATOR *p_crc_calculator) {
+  return p_crc_calculator->remainder & p_crc_calculator->final_result_mask;
+}
+
+static void crc_calculator_init_table(CRC_CALCULATOR *p_crc_calculator) {
+  const uint32_t high_bit = 1 << (p_crc_calculator->bits - 1);
+  const uint32_t byte_high_bit = 1 << (8 - 1);
+
+  for (uint32_t value = 0; value < 256; value++) {
+    uint32_t remainder = 0;
+    for (uint8_t mask = byte_high_bit; mask != 0; mask >>= 1) {
+      if (value & mask) {
+        remainder ^= high_bit;
+      }
+
+      if (remainder & high_bit) {
+        remainder <<= 1;
+        remainder ^= p_crc_calculator->trunc_poly;
+      } else {
+        remainder <<= 1;
+      }
+    }
+    p_crc_calculator->table[value] = remainder;
+  }
+}
+
+void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
+                             uint32_t truncPoly) {
+  p_crc_calculator->remainder = 0;
+  p_crc_calculator->bits = bits;
+  p_crc_calculator->trunc_poly = truncPoly;
+  p_crc_calculator->final_result_mask = (1 << bits) - 1;
+  crc_calculator_init_table(p_crc_calculator);
+}
+
+uint32_t av1_get_crc_value(CRC_CALCULATOR *p_crc_calculator, uint8_t *p,
+                           int length) {
+  crc_calculator_reset(p_crc_calculator);
+  crc_calculator_process_data(p_crc_calculator, p, length);
+  return crc_calculator_get_crc(p_crc_calculator);
+}
diff --git a/third_party/aom/av1/encoder/hash.h b/third_party/aom/av1/encoder/hash.h
new file mode 100644
index 000000000..a0fd54fb6
--- /dev/null
+++ b/third_party/aom/av1/encoder/hash.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_HASH_H_
+#define AV1_ENCODER_HASH_H_
+
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _crc_calculator {
+  uint32_t remainder;
+  uint32_t trunc_poly;
+  uint32_t bits;
+  uint32_t table[256];
+  uint32_t final_result_mask;
+} CRC_CALCULATOR;
+
+// Initialize the crc calculator. It must be executed at least once before
+// calling av1_get_crc_value().
+void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits,
+                             uint32_t truncPoly);
+
+uint32_t av1_get_crc_value(CRC_CALCULATOR *p_crc_calculator, uint8_t *p,
+                           int length);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AV1_ENCODER_HASH_H_
diff --git a/third_party/aom/av1/encoder/hash_motion.c b/third_party/aom/av1/encoder/hash_motion.c
new file mode 100644
index 000000000..2378597ad
--- /dev/null
+++ b/third_party/aom/av1/encoder/hash_motion.c
@@ -0,0 +1,380 @@
+#include <assert.h>
+#include "av1/encoder/hash.h"
+#include "av1/encoder/hash_motion.h"
+#include "./av1_rtcd.h"
+
+static const int crc_bits = 16;
+static const int block_size_bits = 3;
+static CRC_CALCULATOR crc_calculator1;
+static CRC_CALCULATOR crc_calculator2;
+static int g_crc_initialized = 0;
+
+static void hash_table_clear_all(hash_table *p_hash_table) {
+  if (p_hash_table->p_lookup_table == NULL) {
+    return;
+  }
+  int max_addr = 1 << (crc_bits + block_size_bits);
+  for (int i = 0; i < max_addr; i++) {
+    if (p_hash_table->p_lookup_table[i] != NULL) {
+      vector_destroy(p_hash_table->p_lookup_table[i]);
+      aom_free(p_hash_table->p_lookup_table[i]);
+      p_hash_table->p_lookup_table[i] = NULL;
+    }
+  }
+}
+
+// TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported?
+// If yes, fix this function
+static void get_pixels_in_1D_char_array_by_block_2x2(uint8_t *y_src, int stride,
+                                                     uint8_t *p_pixels_in1D) {
+  uint8_t *p_pel = y_src;
+  int index = 0;
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 2; j++) {
+      p_pixels_in1D[index++] = p_pel[j];
+    }
+    p_pel += stride;
+  }
+}
+
+static int is_block_2x2_row_same_value(uint8_t *p) {
+  if (p[0] != p[1] || p[2] != p[3]) {
+    return 0;
+  }
+
+  return 1;
+}
+
+static int is_block_2x2_col_same_value(uint8_t *p) {
+  if ((p[0] != p[2]) || (p[1] != p[3])) {
+    return 0;
+  }
+
+  return 1;
+}
+
+// the hash value (hash_value1 consists two parts, the first 3 bits relate to
+// the block size and the remaining 16 bits are the crc values. This fuction
+// is used to get the first 3 bits.
+static int hash_block_size_to_index(int block_size) {
+  switch (block_size) {
+    case 4: return 0;
+    case 8: return 1;
+    case 16: return 2;
+    case 32: return 3;
+    case 64: return 4;
+    default: return -1;
+  }
+}
+
+void av1_hash_table_init(hash_table *p_hash_table) {
+  if (g_crc_initialized == 0) {
+    av1_crc_calculator_init(&crc_calculator1, 24, 0x5D6DCB);
+    av1_crc_calculator_init(&crc_calculator2, 24, 0x864CFB);
+    g_crc_initialized = 1;
+  }
+  p_hash_table->p_lookup_table = NULL;
+}
+
+void av1_hash_table_destroy(hash_table *p_hash_table) {
+  hash_table_clear_all(p_hash_table);
+  aom_free(p_hash_table->p_lookup_table);
+  p_hash_table->p_lookup_table = NULL;
+}
+
+void av1_hash_table_create(hash_table *p_hash_table) {
+  if (p_hash_table->p_lookup_table != NULL) {
+    hash_table_clear_all(p_hash_table);
+    return;
+  }
+  const int max_addr = 1 << (crc_bits + block_size_bits);
+  p_hash_table->p_lookup_table =
+      (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+  memset(p_hash_table->p_lookup_table, 0,
+         sizeof(p_hash_table->p_lookup_table[0]) * max_addr);
+}
+
+static void hash_table_add_to_table(hash_table *p_hash_table,
+                                    uint32_t hash_value,
+                                    block_hash *curr_block_hash) {
+  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
+    p_hash_table->p_lookup_table[hash_value] =
+        aom_malloc(sizeof(p_hash_table->p_lookup_table[0][0]));
+    vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
+                 sizeof(curr_block_hash[0]));
+    vector_push_back(p_hash_table->p_lookup_table[hash_value], curr_block_hash);
+  } else {
+    vector_push_back(p_hash_table->p_lookup_table[hash_value], curr_block_hash);
+  }
+}
+
+int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value) {
+  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
+    return 0;
+  } else {
+    return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
+  }
+}
+
+Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
+                                     uint32_t hash_value) {
+  assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
+  return vector_begin(p_hash_table->p_lookup_table[hash_value]);
+}
+
+int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
+                            uint32_t hash_value2) {
+  if (p_hash_table->p_lookup_table[hash_value1] == NULL) {
+    return 0;
+  }
+  Iterator iterator = vector_begin(p_hash_table->p_lookup_table[hash_value1]);
+  Iterator last = vector_end(p_hash_table->p_lookup_table[hash_value1]);
+  for (; !iterator_equals(&iterator, &last); iterator_increment(&iterator)) {
+    if ((*(block_hash *)iterator_get(&iterator)).hash_value2 == hash_value2) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
+                                       uint32_t *pic_block_hash[2],
+                                       int8_t *pic_block_same_info[3]) {
+  const int width = 2;
+  const int height = 2;
+  const int x_end = picture->y_crop_width - width + 1;
+  const int y_end = picture->y_crop_height - height + 1;
+
+  const int length = width * 2;
+  uint8_t p[4];
+
+  int pos = 0;
+  for (int y_pos = 0; y_pos < y_end; y_pos++) {
+    for (int x_pos = 0; x_pos < x_end; x_pos++) {
+      get_pixels_in_1D_char_array_by_block_2x2(
+          picture->y_buffer + y_pos * picture->y_stride + x_pos,
+          picture->y_stride, p);
+      pic_block_same_info[0][pos] = is_block_2x2_row_same_value(p);
+      pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p);
+
+      pic_block_hash[0][pos] =
+          av1_get_crc_value(&crc_calculator1, p, length * sizeof(p[0]));
+      pic_block_hash[1][pos] =
+          av1_get_crc_value(&crc_calculator2, p, length * sizeof(p[0]));
+
+      pos++;
+    }
+    pos += width - 1;
+  }
+}
+
+void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
+                                   int block_size,
+                                   uint32_t *src_pic_block_hash[2],
+                                   uint32_t *dst_pic_block_hash[2],
+                                   int8_t *src_pic_block_same_info[3],
+                                   int8_t *dst_pic_block_same_info[3]) {
+  const int pic_width = picture->y_crop_width;
+  const int x_end = picture->y_crop_width - block_size + 1;
+  const int y_end = picture->y_crop_height - block_size + 1;
+
+  const int src_size = block_size >> 1;
+  const int quad_size = block_size >> 2;
+
+  uint32_t p[4];
+  const int length = sizeof(p);
+
+  int pos = 0;
+  for (int y_pos = 0; y_pos < y_end; y_pos++) {
+    for (int x_pos = 0; x_pos < x_end; x_pos++) {
+      p[0] = src_pic_block_hash[0][pos];
+      p[1] = src_pic_block_hash[0][pos + src_size];
+      p[2] = src_pic_block_hash[0][pos + src_size * pic_width];
+      p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size];
+      dst_pic_block_hash[0][pos] =
+          av1_get_crc_value(&crc_calculator1, (uint8_t *)p, length);
+
+      p[0] = src_pic_block_hash[1][pos];
+      p[1] = src_pic_block_hash[1][pos + src_size];
+      p[2] = src_pic_block_hash[1][pos + src_size * pic_width];
+      p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size];
+      dst_pic_block_hash[1][pos] =
+          av1_get_crc_value(&crc_calculator2, (uint8_t *)p, length);
+
+      dst_pic_block_same_info[0][pos] =
+          src_pic_block_same_info[0][pos] &&
+          src_pic_block_same_info[0][pos + quad_size] &&
+          src_pic_block_same_info[0][pos + src_size] &&
+          src_pic_block_same_info[0][pos + src_size * pic_width] &&
+          src_pic_block_same_info[0][pos + src_size * pic_width + quad_size] &&
+          src_pic_block_same_info[0][pos + src_size * pic_width + src_size];
+
+      dst_pic_block_same_info[1][pos] =
+          src_pic_block_same_info[1][pos] &&
+          src_pic_block_same_info[1][pos + src_size] &&
+          src_pic_block_same_info[1][pos + quad_size * pic_width] &&
+          src_pic_block_same_info[1][pos + quad_size * pic_width + src_size] &&
+          src_pic_block_same_info[1][pos + src_size * pic_width] &&
+          src_pic_block_same_info[1][pos + src_size * pic_width + src_size];
+      pos++;
+    }
+    pos += block_size - 1;
+  }
+
+  if (block_size >= 4) {
+    const int size_minus1 = block_size - 1;
+    pos = 0;
+    for (int y_pos = 0; y_pos < y_end; y_pos++) {
+      for (int x_pos = 0; x_pos < x_end; x_pos++) {
+        dst_pic_block_same_info[2][pos] =
+            (!dst_pic_block_same_info[0][pos] &&
+             !dst_pic_block_same_info[1][pos]) ||
+            (((x_pos & size_minus1) == 0) && ((y_pos & size_minus1) == 0));
+        pos++;
+      }
+      pos += block_size - 1;
+    }
+  }
+}
+
+void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
+                                                 uint32_t *pic_hash[2],
+                                                 int8_t *pic_is_same,
+                                                 int pic_width, int pic_height,
+                                                 int block_size) {
+  const int x_end = pic_width - block_size + 1;
+  const int y_end = pic_height - block_size + 1;
+
+  const int8_t *src_is_added = pic_is_same;
+  const uint32_t *src_hash[2] = { pic_hash[0], pic_hash[1] };
+
+  int add_value = hash_block_size_to_index(block_size);
+  assert(add_value >= 0);
+  add_value <<= crc_bits;
+  const int crc_mask = (1 << crc_bits) - 1;
+
+  for (int x_pos = 0; x_pos < x_end; x_pos++) {
+    for (int y_pos = 0; y_pos < y_end; y_pos++) {
+      const int pos = y_pos * pic_width + x_pos;
+      // valid data
+      if (src_is_added[pos]) {
+        block_hash curr_block_hash;
+        curr_block_hash.x = x_pos;
+        curr_block_hash.y = y_pos;
+
+        const uint32_t hash_value1 = (src_hash[0][pos] & crc_mask) + add_value;
+        curr_block_hash.hash_value2 = src_hash[1][pos];
+
+        hash_table_add_to_table(p_hash_table, hash_value1, &curr_block_hash);
+      }
+    }
+  }
+}
+
+int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
+                                   int block_size, int x_start, int y_start) {
+  const int stride = picture->y_stride;
+  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
+
+  for (int i = 0; i < block_size; i++) {
+    for (int j = 1; j < block_size; j++) {
+      if (p[j] != p[0]) {
+        return 0;
+      }
+    }
+    p += stride;
+  }
+
+  return 1;
+}
+
+int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
+                                 int block_size, int x_start, int y_start) {
+  const int stride = picture->y_stride;
+  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
+
+  for (int i = 0; i < block_size; i++) {
+    for (int j = 1; j < block_size; j++) {
+      if (p[j * stride + i] != p[i]) {
+        return 0;
+      }
+    }
+  }
+
+  return 1;
+}
+
+// global buffer for hash value calculation of a block
+// used only in av1_get_block_hash_value()
+static uint32_t hash_value_buffer[2][2][1024];  // [first hash/second hash]
+                                                // [two buffers used ping-pong]
+                                                // [num of 2x2 blocks in 64x64]
+
+void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
+                              uint32_t *hash_value1, uint32_t *hash_value2) {
+  uint8_t pixel_to_hash[4];
+  uint32_t to_hash[4];
+  const int add_value = hash_block_size_to_index(block_size) << crc_bits;
+  assert(add_value >= 0);
+  const int crc_mask = (1 << crc_bits) - 1;
+
+  // 2x2 subblock hash values in current CU
+  int sub_block_in_width = (block_size >> 1);
+  for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
+    for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
+      int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
+      get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
+                                               stride, pixel_to_hash);
+
+      hash_value_buffer[0][0][pos] = av1_get_crc_value(
+          &crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash));
+      hash_value_buffer[1][0][pos] = av1_get_crc_value(
+          &crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash));
+    }
+  }
+
+  int src_sub_block_in_width = sub_block_in_width;
+  sub_block_in_width >>= 1;
+
+  int src_idx = 1;
+  int dst_idx = 0;
+
+  // 4x4 subblock hash values to current block hash values
+  for (int sub_width = 4; sub_width <= block_size; sub_width *= 2) {
+    src_idx = 1 - src_idx;
+    dst_idx = 1 - dst_idx;
+
+    int dst_pos = 0;
+    for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
+      for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
+        int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);
+
+        to_hash[0] = hash_value_buffer[0][src_idx][srcPos];
+        to_hash[1] = hash_value_buffer[0][src_idx][srcPos + 1];
+        to_hash[2] =
+            hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width];
+        to_hash[3] =
+            hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width + 1];
+
+        hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value(
+            &crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash));
+
+        to_hash[0] = hash_value_buffer[1][src_idx][srcPos];
+        to_hash[1] = hash_value_buffer[1][src_idx][srcPos + 1];
+        to_hash[2] =
+            hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width];
+        to_hash[3] =
+            hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width + 1];
+        hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value(
+            &crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash));
+        dst_pos++;
+      }
+    }
+
+    src_sub_block_in_width = sub_block_in_width;
+    sub_block_in_width >>= 1;
+  }
+
+  *hash_value1 = (hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value;
+  *hash_value2 = hash_value_buffer[1][dst_idx][0];
+}
diff --git a/third_party/aom/av1/encoder/hash_motion.h b/third_party/aom/av1/encoder/hash_motion.h
new file mode 100644
index 000000000..26e1ac46e
--- /dev/null
+++ b/third_party/aom/av1/encoder/hash_motion.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_HASH_MOTION_H_
+#define AV1_ENCODER_HASH_MOTION_H_
+
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_scale/yv12config.h"
+#include "third_party/vector/vector.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// store a block's hash info.
+// x and y are the position from the top left of the picture
+// hash_value2 is used to store the second hash value
+typedef struct _block_hash {
+  int16_t x;
+  int16_t y;
+  uint32_t hash_value2;
+} block_hash;
+
+typedef struct _hash_table { Vector **p_lookup_table; } hash_table;
+
+void av1_hash_table_init(hash_table *p_hash_table);
+void av1_hash_table_destroy(hash_table *p_hash_table);
+void av1_hash_table_create(hash_table *p_hash_table);
+int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value);
+Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
+                                     uint32_t hash_value);
+int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1,
+                            uint32_t hash_value2);
+void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
+                                       uint32_t *pic_block_hash[2],
+                                       int8_t *pic_block_same_info[3]);
+void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture,
+                                   int block_size,
+                                   uint32_t *src_pic_block_hash[2],
+                                   uint32_t *dst_pic_block_hash[2],
+                                   int8_t *src_pic_block_same_info[3],
+                                   int8_t *dst_pic_block_same_info[3]);
+void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
+                                                 uint32_t *pic_hash[2],
+                                                 int8_t *pic_is_same,
+                                                 int pic_width, int pic_height,
+                                                 int block_size);
+
+// check whether the block starts from (x_start, y_start) with the size of
+// block_size x block_size has the same color in all rows
+int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
+                                   int block_size, int x_start, int y_start);
+// check whether the block starts from (x_start, y_start) with the size of
+// block_size x block_size has the same color in all columns
+int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
+                                 int block_size, int x_start, int y_start);
+void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size,
+                              uint32_t *hash_value1, uint32_t *hash_value2);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AV1_ENCODER_HASH_MOTION_H_
diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
index 85f4b7d9b..6ddeb2b77 100644
--- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
+++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c
@@ -51,7 +51,7 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
     return;
   }
 
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT4
   // only C version has LGTs
   av1_fht4x4_c(src_diff, coeff, diff_stride, txfm_param);
 #else
@@ -107,7 +107,7 @@ static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
 
 static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
                          int diff_stride, TxfmParam *txfm_param) {
-#if CONFIG_LGT
+#if CONFIG_LGT || CONFIG_DAALA_DCT8
   av1_fht8x8_c(src_diff, coeff, diff_stride, txfm_param);
 #else
   av1_fht8x8(src_diff, coeff, diff_stride, txfm_param);
@@ -116,7 +116,11 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
 
 static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
                            int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_DAALA_DCT16
+  av1_fht16x16_c(src_diff, coeff, diff_stride, txfm_param);
+#else
   av1_fht16x16(src_diff, coeff, diff_stride, txfm_param);
+#endif  // CONFIG_DAALA_DCT16
 }
 
 static void fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
@@ -136,11 +140,31 @@ static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
                            int diff_stride, TxfmParam *txfm_param) {
 #if CONFIG_EXT_TX
   if (txfm_param->tx_type == IDTX)
-    av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, txfm_param->tx_type);
+    av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, 64, txfm_param->tx_type);
   else
 #endif
     av1_fht64x64(src_diff, coeff, diff_stride, txfm_param);
 }
+
+static void fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
+                           int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_EXT_TX
+  if (txfm_param->tx_type == IDTX)
+    av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, 64, txfm_param->tx_type);
+  else
+#endif
+    av1_fht32x64(src_diff, coeff, diff_stride, txfm_param);
+}
+
+static void fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
+                           int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_EXT_TX
+  if (txfm_param->tx_type == IDTX)
+    av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, 32, txfm_param->tx_type);
+  else
+#endif
+    av1_fht64x32(src_diff, coeff, diff_stride, txfm_param);
+}
 #endif  // CONFIG_TX64X64
 
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
@@ -211,7 +235,7 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
 static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
                                 int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
-  const int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   const int bd = txfm_param->bd;
   if (txfm_param->lossless) {
     assert(tx_type == DCT_DCT);
@@ -296,7 +320,7 @@ static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
 static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
                                 int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
-  const int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   const int bd = txfm_param->bd;
   switch (tx_type) {
     case DCT_DCT:
@@ -334,7 +358,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
 static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
                                   int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
-  const int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   const int bd = txfm_param->bd;
   switch (tx_type) {
     case DCT_DCT:
@@ -372,7 +396,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
 static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
                                   int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
-  const int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   const int bd = txfm_param->bd;
   switch (tx_type) {
     case DCT_DCT:
@@ -408,10 +432,89 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
 }
 
 #if CONFIG_TX64X64
+static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
+                                  int diff_stride, TxfmParam *txfm_param) {
+  int32_t *dst_coeff = (int32_t *)coeff;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int bd = txfm_param->bd;
+  switch (tx_type) {
+    case DCT_DCT:
+      av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+      break;
+#if CONFIG_EXT_TX
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // TODO(sarahparker)
+      // I've deleted the 64x64 implementations that existed in lieu
+      // of adst, flipadst and identity for simplicity but will bring back
+      // in a later change. This shouldn't impact performance since
+      // DCT_DCT is the only extended type currently allowed for 64x64,
+      // as dictated by get_ext_tx_set_type in blockd.h.
+      av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
+      break;
+    case IDTX:
+      av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 32, 64, tx_type);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
+
+static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
+                                  int diff_stride, TxfmParam *txfm_param) {
+  int32_t *dst_coeff = (int32_t *)coeff;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  const int bd = txfm_param->bd;
+  switch (tx_type) {
+    case DCT_DCT:
+      av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+      break;
+#if CONFIG_EXT_TX
+    case ADST_DCT:
+    case DCT_ADST:
+    case ADST_ADST:
+    case FLIPADST_DCT:
+    case DCT_FLIPADST:
+    case FLIPADST_FLIPADST:
+    case ADST_FLIPADST:
+    case FLIPADST_ADST:
+    case V_DCT:
+    case H_DCT:
+    case V_ADST:
+    case H_ADST:
+    case V_FLIPADST:
+    case H_FLIPADST:
+      // TODO(sarahparker)
+      // I've deleted the 64x64 implementations that existed in lieu
+      // of adst, flipadst and identity for simplicity but will bring back
+      // in a later change. This shouldn't impact performance since
+      // DCT_DCT is the only extended type currently allowed for 64x64,
+      // as dictated by get_ext_tx_set_type in blockd.h.
+      av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
+      break;
+    case IDTX:
+      av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, 32, tx_type);
+      break;
+#endif  // CONFIG_EXT_TX
+    default: assert(0); break;
+  }
+}
 static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
                                   int diff_stride, TxfmParam *txfm_param) {
   int32_t *dst_coeff = (int32_t *)coeff;
-  const int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
   const int bd = txfm_param->bd;
   switch (tx_type) {
     case DCT_DCT:
@@ -441,7 +544,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
       av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
       break;
     case IDTX:
-      av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, tx_type);
+      av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, 64, tx_type);
       break;
 #endif  // CONFIG_EXT_TX
     default: assert(0); break;
@@ -452,11 +555,25 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
 void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
                   TxfmParam *txfm_param) {
   const TX_SIZE tx_size = txfm_param->tx_size;
+#if CONFIG_LGT_FROM_PRED
+  if (txfm_param->use_lgt) {
+    // if use_lgt is 1, it will override tx_type
+    assert(is_lgt_allowed(txfm_param->mode, tx_size));
+    flgt2d_from_pred_c(src_diff, coeff, diff_stride, txfm_param);
+    return;
+  }
+#endif  // CONFIG_LGT_FROM_PRED
   switch (tx_size) {
 #if CONFIG_TX64X64
     case TX_64X64:
       fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
       break;
+    case TX_32X64:
+      fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
+      break;
+    case TX_64X32:
+      fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
+      break;
 #endif  // CONFIG_TX64X64
     case TX_32X32:
       fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
@@ -509,6 +626,12 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
     case TX_64X64:
       highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
       break;
+    case TX_32X64:
+      highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
+      break;
+    case TX_64X32:
+      highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
+      break;
 #endif  // CONFIG_TX64X64
     case TX_32X32:
       highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
diff --git a/third_party/aom/av1/encoder/k_means_template.h b/third_party/aom/av1/encoder/k_means_template.h
new file mode 100644
index 000000000..3a433d9b5
--- /dev/null
+++ b/third_party/aom/av1/encoder/k_means_template.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "av1/encoder/palette.h"
+#include "av1/encoder/random.h"
+
+#ifndef AV1_K_MEANS_DIM
+#error "This template requires AV1_K_MEANS_DIM to be defined"
+#endif
+
+#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y)
+#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM)
+
+static float RENAME(calc_dist)(const float *p1, const float *p2) {
+  float dist = 0;
+  int i;
+  for (i = 0; i < AV1_K_MEANS_DIM; ++i) {
+    const float diff = p1[i] - p2[i];
+    dist += diff * diff;
+  }
+  return dist;
+}
+
+void RENAME(av1_calc_indices)(const float *data, const float *centroids,
+                              uint8_t *indices, int n, int k) {
+  int i, j;
+  for (i = 0; i < n; ++i) {
+    float min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids);
+    indices[i] = 0;
+    for (j = 1; j < k; ++j) {
+      const float this_dist = RENAME(calc_dist)(
+          data + i * AV1_K_MEANS_DIM, centroids + j * AV1_K_MEANS_DIM);
+      if (this_dist < min_dist) {
+        min_dist = this_dist;
+        indices[i] = j;
+      }
+    }
+  }
+}
+
+static void RENAME(calc_centroids)(const float *data, float *centroids,
+                                   const uint8_t *indices, int n, int k) {
+  int i, j, index;
+  int count[PALETTE_MAX_SIZE];
+  unsigned int rand_state = (unsigned int)data[0];
+
+  assert(n <= 32768);
+
+  memset(count, 0, sizeof(count[0]) * k);
+  memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM);
+
+  for (i = 0; i < n; ++i) {
+    index = indices[i];
+    assert(index < k);
+    ++count[index];
+    for (j = 0; j < AV1_K_MEANS_DIM; ++j) {
+      centroids[index * AV1_K_MEANS_DIM + j] += data[i * AV1_K_MEANS_DIM + j];
+    }
+  }
+
+  for (i = 0; i < k; ++i) {
+    if (count[i] == 0) {
+      memcpy(centroids + i * AV1_K_MEANS_DIM,
+             data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM,
+             sizeof(centroids[0]) * AV1_K_MEANS_DIM);
+    } else {
+      const float norm = 1.0f / count[i];
+      for (j = 0; j < AV1_K_MEANS_DIM; ++j)
+        centroids[i * AV1_K_MEANS_DIM + j] *= norm;
+    }
+  }
+
+  // Round to nearest integers.
+  for (i = 0; i < k * AV1_K_MEANS_DIM; ++i) {
+    centroids[i] = roundf(centroids[i]);
+  }
+}
+
+static float RENAME(calc_total_dist)(const float *data, const float *centroids,
+                                     const uint8_t *indices, int n, int k) {
+  float dist = 0;
+  int i;
+  (void)k;
+
+  for (i = 0; i < n; ++i)
+    dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM,
+                              centroids + indices[i] * AV1_K_MEANS_DIM);
+
+  return dist;
+}
+
+void RENAME(av1_k_means)(const float *data, float *centroids, uint8_t *indices,
+                         int n, int k, int max_itr) {
+  int i;
+  float this_dist;
+  float pre_centroids[2 * PALETTE_MAX_SIZE];
+  uint8_t pre_indices[MAX_SB_SQUARE];
+
+  RENAME(av1_calc_indices)(data, centroids, indices, n, k);
+  this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
+
+  for (i = 0; i < max_itr; ++i) {
+    const float pre_dist = this_dist;
+    memcpy(pre_centroids, centroids,
+           sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
+    memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
+
+    RENAME(calc_centroids)(data, centroids, indices, n, k);
+    RENAME(av1_calc_indices)(data, centroids, indices, n, k);
+    this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k);
+
+    if (this_dist > pre_dist) {
+      memcpy(centroids, pre_centroids,
+             sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM);
+      memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
+      break;
+    }
+    if (!memcmp(centroids, pre_centroids,
+                sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM))
+      break;
+  }
+}
+
+#undef RENAME_
+#undef RENAME
diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c
index 3f5daebcc..7d2510af9 100644
--- a/third_party/aom/av1/encoder/mbgraph.c
+++ b/third_party/aom/av1/encoder/mbgraph.c
@@ -47,32 +47,32 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv,
   av1_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
                  cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv);
 
-  // Try sub-pixel MC
-  // if (bestsme > error_thresh && bestsme < INT_MAX)
+// Try sub-pixel MC
+// if (bestsme > error_thresh && bestsme < INT_MAX)
+#if CONFIG_AMVR
+  if (cpi->common.cur_frame_mv_precision_level == 1) {
+    x->best_mv.as_mv.row *= 8;
+    x->best_mv.as_mv.col *= 8;
+  } else {
+#else
   {
+#endif
     int distortion;
     unsigned int sse;
-    cpi->find_fractional_mv_step(
-        x, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
-        &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
-        cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
-#if CONFIG_EXT_INTER
-        NULL, 0, 0,
-#endif
-        0, 0, 0);
+    cpi->find_fractional_mv_step(x, ref_mv, cpi->common.allow_high_precision_mv,
+                                 x->errorperbit, &v_fn_ptr, 0,
+                                 mv_sf->subpel_iters_per_step,
+                                 cond_cost_list(cpi, cost_list), NULL, NULL,
+                                 &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0);
   }
 
-#if CONFIG_EXT_INTER
   if (has_second_ref(&xd->mi[0]->mbmi))
     xd->mi[0]->mbmi.mode = NEW_NEWMV;
   else
-#endif  // CONFIG_EXT_INTER
     xd->mi[0]->mbmi.mode = NEWMV;
 
   xd->mi[0]->mbmi.mv[0] = x->best_mv;
-#if CONFIG_EXT_INTER
   xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
-#endif  // CONFIG_EXT_INTER
 
   av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL,
                                  BLOCK_16X16);
@@ -136,6 +136,7 @@ static int do_16x16_zerozero_search(AV1_COMP *cpi, int_mv *dst_mv) {
   return err;
 }
 static int find_best_16x16_intra(AV1_COMP *cpi, PREDICTION_MODE *pbest_mode) {
+  const AV1_COMMON *cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   PREDICTION_MODE best_mode = -1, mode;
@@ -147,9 +148,10 @@ static int find_best_16x16_intra(AV1_COMP *cpi, PREDICTION_MODE *pbest_mode) {
     unsigned int err;
 
     xd->mi[0]->mbmi.mode = mode;
-    av1_predict_intra_block(xd, 16, 16, BLOCK_16X16, mode, x->plane[0].src.buf,
-                            x->plane[0].src.stride, xd->plane[0].dst.buf,
-                            xd->plane[0].dst.stride, 0, 0, 0);
+    av1_predict_intra_block(cm, xd, 16, 16, BLOCK_16X16, mode,
+                            x->plane[0].src.buf, x->plane[0].src.stride,
+                            xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0,
+                            0);
     err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
                        xd->plane[0].dst.buf, xd->plane[0].dst.stride);
 
diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c
index 4efadff1b..6c8503da0 100644
--- a/third_party/aom/av1/encoder/mcomp.c
+++ b/third_party/aom/av1/encoder/mcomp.c
@@ -176,7 +176,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
 }
 
 /* checks if (r, c) has better score than previous best */
-#if CONFIG_EXT_INTER
 #define CHECK_BETTER(v, r, c)                                             \
   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
     MV this_mv = { r, c };                                                \
@@ -202,34 +201,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
   } else {                                                                \
     v = INT_MAX;                                                          \
   }
-#else
-#define CHECK_BETTER(v, r, c)                                             \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
-    MV this_mv = { r, c };                                                \
-    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);    \
-    if (second_pred == NULL)                                              \
-      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
-                         src_address, src_stride, &sse);                  \
-    else                                                                  \
-      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
-                          src_address, src_stride, &sse, second_pred);    \
-    v += thismse;                                                         \
-    if (v < besterr) {                                                    \
-      besterr = v;                                                        \
-      br = r;                                                             \
-      bc = c;                                                             \
-      *distortion = thismse;                                              \
-      *sse1 = sse;                                                        \
-    }                                                                     \
-  } else {                                                                \
-    v = INT_MAX;                                                          \
-  }
-#endif  // CONFIG_EXT_INTER
 
 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
 
 /* checks if (r, c) has better score than previous best */
-#if CONFIG_EXT_INTER
 #define CHECK_BETTER1(v, r, c)                                              \
   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
     MV this_mv = { r, c };                                                  \
@@ -249,26 +224,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
   } else {                                                                  \
     v = INT_MAX;                                                            \
   }
-#else
-#define CHECK_BETTER1(v, r, c)                                              \
-  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
-    MV this_mv = { r, c };                                                  \
-    thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,        \
-                                   pre(y, y_stride, r, c), y_stride, sp(c), \
-                                   sp(r), second_pred, w, h, &sse);         \
-    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);      \
-    v += thismse;                                                           \
-    if (v < besterr) {                                                      \
-      besterr = v;                                                          \
-      br = r;                                                               \
-      bc = c;                                                               \
-      *distortion = thismse;                                                \
-      *sse1 = sse;                                                          \
-    }                                                                       \
-  } else {                                                                  \
-    v = INT_MAX;                                                            \
-  }
-#endif  // CONFIG_EXT_INTER
 
 #define FIRST_LEVEL_CHECKS                                       \
   {                                                              \
@@ -372,35 +327,28 @@ static unsigned int setup_center_error(
     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
     int error_per_bit, const aom_variance_fn_ptr_t *vfp,
     const uint8_t *const src, const int src_stride, const uint8_t *const y,
-    int y_stride, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
-    int *distortion) {
+    int y_stride, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
+    int *mvcost[2], unsigned int *sse1, int *distortion) {
   unsigned int besterr;
 #if CONFIG_HIGHBITDEPTH
   if (second_pred != NULL) {
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
-#if CONFIG_EXT_INTER
       if (mask)
         aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset,
                                   y_stride, mask, mask_stride, invert_mask);
       else
-#endif
         aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
                                  y_stride);
       besterr =
           vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
     } else {
       DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
-#if CONFIG_EXT_INTER
       if (mask)
         aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                            mask, mask_stride, invert_mask);
       else
-#endif
         aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
     }
@@ -413,12 +361,10 @@ static unsigned int setup_center_error(
   (void)xd;
   if (second_pred != NULL) {
     DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
-#if CONFIG_EXT_INTER
     if (mask)
       aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                          mask, mask_stride, invert_mask);
     else
-#endif
       aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
   } else {
@@ -458,19 +404,13 @@ int av1_find_best_sub_pixel_tree_pruned_evenmore(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   SETUP_SUBPEL_SEARCH;
-  besterr =
-      setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
-                         src_stride, y, y_stride, second_pred,
-#if CONFIG_EXT_INTER
-                         mask, mask_stride, invert_mask,
-#endif
-                         w, h, offset, mvjcost, mvcost, sse1, distortion);
+  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+                               src_address, src_stride, y, y_stride,
+                               second_pred, mask, mask_stride, invert_mask, w,
+                               h, offset, mvjcost, mvcost, sse1, distortion);
   (void)halfiters;
   (void)quarteriters;
   (void)eighthiters;
@@ -531,21 +471,15 @@ int av1_find_best_sub_pixel_tree_pruned_more(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   SETUP_SUBPEL_SEARCH;
   (void)use_upsampled_ref;
 
-  besterr =
-      setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
-                         src_stride, y, y_stride, second_pred,
-#if CONFIG_EXT_INTER
-                         mask, mask_stride, invert_mask,
-#endif
-                         w, h, offset, mvjcost, mvcost, sse1, distortion);
+  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+                               src_address, src_stride, y, y_stride,
+                               second_pred, mask, mask_stride, invert_mask, w,
+                               h, offset, mvjcost, mvcost, sse1, distortion);
   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
       cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
@@ -600,21 +534,15 @@ int av1_find_best_sub_pixel_tree_pruned(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   SETUP_SUBPEL_SEARCH;
   (void)use_upsampled_ref;
 
-  besterr =
-      setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
-                         src_stride, y, y_stride, second_pred,
-#if CONFIG_EXT_INTER
-                         mask, mask_stride, invert_mask,
-#endif
-                         w, h, offset, mvjcost, mvcost, sse1, distortion);
+  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+                               src_address, src_stride, y, y_stride,
+                               second_pred, mask, mask_stride, invert_mask, w,
+                               h, offset, mvjcost, mvcost, sse1, distortion);
   if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
       cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
       cost_list[4] != INT_MAX) {
@@ -696,26 +624,24 @@ static const MV search_step_table[12] = {
 };
 /* clang-format on */
 
-static int upsampled_pref_error(
-    const MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp,
-    const uint8_t *const src, const int src_stride, const uint8_t *const y,
-    int y_stride, int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, unsigned int *sse) {
+static int upsampled_pref_error(const MACROBLOCKD *xd,
+                                const aom_variance_fn_ptr_t *vfp,
+                                const uint8_t *const src, const int src_stride,
+                                const uint8_t *const y, int y_stride,
+                                int subpel_x_q3, int subpel_y_q3,
+                                const uint8_t *second_pred, const uint8_t *mask,
+                                int mask_stride, int invert_mask, int w, int h,
+                                unsigned int *sse) {
   unsigned int besterr;
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
     if (second_pred != NULL) {
-#if CONFIG_EXT_INTER
       if (mask)
         aom_highbd_comp_mask_upsampled_pred(
             pred16, second_pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride,
             mask, mask_stride, invert_mask, xd->bd);
       else
-#endif
         aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h,
                                            subpel_x_q3, subpel_y_q3, y,
                                            y_stride, xd->bd);
@@ -732,13 +658,11 @@ static int upsampled_pref_error(
   (void)xd;
 #endif  // CONFIG_HIGHBITDEPTH
     if (second_pred != NULL) {
-#if CONFIG_EXT_INTER
       if (mask)
         aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
                                      subpel_y_q3, y, y_stride, mask,
                                      mask_stride, invert_mask);
       else
-#endif
         aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
                                     subpel_y_q3, y, y_stride);
     } else {
@@ -756,18 +680,12 @@ static unsigned int upsampled_setup_center_error(
     const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
     int error_per_bit, const aom_variance_fn_ptr_t *vfp,
     const uint8_t *const src, const int src_stride, const uint8_t *const y,
-    int y_stride, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
-    int *distortion) {
+    int y_stride, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
+    int *mvcost[2], unsigned int *sse1, int *distortion) {
   unsigned int besterr = upsampled_pref_error(
-      xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred,
-#if CONFIG_EXT_INTER
-      mask, mask_stride, invert_mask,
-#endif
-      w, h, sse1);
+      xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred, mask,
+      mask_stride, invert_mask, w, h, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   return besterr;
@@ -777,11 +695,8 @@ int av1_find_best_sub_pixel_tree(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   const uint8_t *const src_address = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
@@ -818,19 +733,13 @@ int av1_find_best_sub_pixel_tree(
   if (use_upsampled_ref)
     besterr = upsampled_setup_center_error(
         xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
-        y_stride, second_pred,
-#if CONFIG_EXT_INTER
-        mask, mask_stride, invert_mask,
-#endif
-        w, h, offset, mvjcost, mvcost, sse1, distortion);
+        y_stride, second_pred, mask, mask_stride, invert_mask, w, h, offset,
+        mvjcost, mvcost, sse1, distortion);
   else
-    besterr =
-        setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
-                           src_stride, y, y_stride, second_pred,
-#if CONFIG_EXT_INTER
-                           mask, mask_stride, invert_mask,
-#endif
-                           w, h, offset, mvjcost, mvcost, sse1, distortion);
+    besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+                                 src_address, src_stride, y, y_stride,
+                                 second_pred, mask, mask_stride, invert_mask, w,
+                                 h, offset, mvjcost, mvcost, sse1, distortion);
 
   (void)cost_list;  // to silence compiler warning
 
@@ -845,22 +754,17 @@ int av1_find_best_sub_pixel_tree(
         if (use_upsampled_ref) {
           thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
                                          pre(y, y_stride, tr, tc), y_stride,
-                                         sp(tc), sp(tr), second_pred,
-#if CONFIG_EXT_INTER
-                                         mask, mask_stride, invert_mask,
-#endif
-                                         w, h, &sse);
+                                         sp(tc), sp(tr), second_pred, mask,
+                                         mask_stride, invert_mask, w, h, &sse);
         } else {
           const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
           if (second_pred == NULL)
             thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                                src_address, src_stride, &sse);
-#if CONFIG_EXT_INTER
           else if (mask)
             thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                                 src_address, src_stride, second_pred, mask,
                                 mask_stride, invert_mask, &sse);
-#endif
           else
             thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                                 src_address, src_stride, &sse, second_pred);
@@ -892,23 +796,18 @@ int av1_find_best_sub_pixel_tree(
       if (use_upsampled_ref) {
         thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
                                        pre(y, y_stride, tr, tc), y_stride,
-                                       sp(tc), sp(tr), second_pred,
-#if CONFIG_EXT_INTER
-                                       mask, mask_stride, invert_mask,
-#endif
-                                       w, h, &sse);
+                                       sp(tc), sp(tr), second_pred, mask,
+                                       mask_stride, invert_mask, w, h, &sse);
       } else {
         const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
 
         if (second_pred == NULL)
           thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                              src_stride, &sse);
-#if CONFIG_EXT_INTER
         else if (mask)
           thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, second_pred, mask,
                               mask_stride, invert_mask, &sse);
-#endif
         else
           thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse, second_pred);
@@ -1225,6 +1124,7 @@ static int pattern_search(
   int thissad;
   int k = -1;
   const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+  assert(search_param < MAX_MVSEARCH_STEPS);
   int best_init_s = search_param_to_steps[search_param];
   // adjust ref_mv to make sure it is within MV range
   clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
@@ -1493,7 +1393,6 @@ int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
                      : 0);
 }
 
-#if CONFIG_EXT_INTER
 int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
                             const MV *center_mv, const uint8_t *second_pred,
                             const uint8_t *mask, int mask_stride,
@@ -1512,7 +1411,6 @@ int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
                                    x->errorperbit)
                      : 0);
 }
-#endif
 
 int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
                    int sad_per_bit, int do_init_search, int *cost_list,
@@ -2481,11 +2379,9 @@ int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
 // mode, or when searching for one component of an ext-inter compound mode.
 int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
                              const aom_variance_fn_ptr_t *fn_ptr,
-#if CONFIG_EXT_INTER
                              const uint8_t *mask, int mask_stride,
-                             int invert_mask,
-#endif
-                             const MV *center_mv, const uint8_t *second_pred) {
+                             int invert_mask, const MV *center_mv,
+                             const uint8_t *second_pred) {
   const MV neighbors[8] = { { -1, 0 },  { 0, -1 }, { 0, 1 },  { 1, 0 },
                             { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2498,14 +2394,12 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
 
   clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
            x->mv_limits.row_min, x->mv_limits.row_max);
-#if CONFIG_EXT_INTER
   if (mask)
     best_sad = fn_ptr->msdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, best_mv), in_what->stride,
                             second_pred, mask, mask_stride, invert_mask) +
                mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
   else
-#endif
     best_sad =
         fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
                      in_what->stride, second_pred) +
@@ -2520,13 +2414,11 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
 
       if (is_mv_in(&x->mv_limits, &mv)) {
         unsigned int sad;
-#if CONFIG_EXT_INTER
         if (mask)
           sad = fn_ptr->msdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &mv), in_what->stride,
                              second_pred, mask, mask_stride, invert_mask);
         else
-#endif
           sad = fn_ptr->sdaf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &mv), in_what->stride,
                              second_pred);
@@ -2562,10 +2454,45 @@ static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
          (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
 }
 
+#if CONFIG_HASH_ME
+#define MAX_HASH_MV_TABLE_SIZE 5
+static void add_to_sort_table(block_hash block_hashes[MAX_HASH_MV_TABLE_SIZE],
+                              int costs[MAX_HASH_MV_TABLE_SIZE], int *existing,
+                              int max_size, block_hash curr_block,
+                              int curr_cost) {
+  if (*existing < max_size) {
+    block_hashes[*existing] = curr_block;
+    costs[*existing] = curr_cost;
+    (*existing)++;
+  } else {
+    int max_cost = 0;
+    int max_cost_idx = 0;
+    for (int i = 0; i < max_size; i++) {
+      if (costs[i] > max_cost) {
+        max_cost = costs[i];
+        max_cost_idx = i;
+      }
+    }
+
+    if (curr_cost < max_cost) {
+      block_hashes[max_cost_idx] = curr_block;
+      costs[max_cost_idx] = curr_cost;
+    }
+  }
+}
+#endif
+
+#if CONFIG_HASH_ME
+int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+                          MV *mvp_full, int step_param, int error_per_bit,
+                          int *cost_list, const MV *ref_mv, int var_max, int rd,
+                          int x_pos, int y_pos, int intra) {
+#else
 int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
                           MV *mvp_full, int step_param, int error_per_bit,
                           int *cost_list, const MV *ref_mv, int var_max,
                           int rd) {
+#endif
   const SPEED_FEATURES *const sf = &cpi->sf;
   const SEARCH_METHODS method = sf->mv.search_method;
   const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
@@ -2637,6 +2564,93 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
   if (method != NSTEP && rd && var < var_max)
     var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
 
+#if CONFIG_HASH_ME
+  do {
+    if (!cpi->common.allow_screen_content_tools) {
+      break;
+    }
+    // already single ME
+    // get block size and original buffer of current block
+    const int block_height = block_size_high[bsize];
+    const int block_width = block_size_wide[bsize];
+    if (block_height == block_width && x_pos >= 0 && y_pos >= 0) {
+      if (block_width == 4 || block_width == 8 || block_width == 16 ||
+          block_width == 32 || block_width == 64) {
+        uint8_t *what = x->plane[0].src.buf;
+        const int what_stride = x->plane[0].src.stride;
+        block_hash block_hashes[MAX_HASH_MV_TABLE_SIZE];
+        int costs[MAX_HASH_MV_TABLE_SIZE];
+        int existing = 0;
+        int i;
+        uint32_t hash_value1, hash_value2;
+        MV best_hash_mv;
+        int best_hash_cost = INT_MAX;
+
+        // for the hashMap
+        hash_table *ref_frame_hash =
+            intra ? &cpi->common.cur_frame->hash_table
+                  : get_ref_frame_hash_map(cpi,
+                                           x->e_mbd.mi[0]->mbmi.ref_frame[0]);
+
+        av1_get_block_hash_value(what, what_stride, block_width, &hash_value1,
+                                 &hash_value2);
+
+        const int count = av1_hash_table_count(ref_frame_hash, hash_value1);
+        // for intra, at lest one matching can be found, itself.
+        if (count <= (intra ? 1 : 0)) {
+          break;
+        }
+
+        Iterator iterator =
+            av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
+        for (i = 0; i < count; i++, iterator_increment(&iterator)) {
+          block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator));
+          if (hash_value2 == ref_block_hash.hash_value2) {
+            // for intra, make sure the prediction is from valid area
+            // not predict from current block.
+            // TODO(roger): check if the constrain is necessary
+            if (intra &&
+                ref_block_hash.y + block_height >
+                    ((y_pos >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2) &&
+                ref_block_hash.x + block_width >
+                    ((x_pos >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2)) {
+              continue;
+            }
+            int refCost =
+                abs(ref_block_hash.x - x_pos) + abs(ref_block_hash.y - y_pos);
+            add_to_sort_table(block_hashes, costs, &existing,
+                              MAX_HASH_MV_TABLE_SIZE, ref_block_hash, refCost);
+          }
+        }
+
+        if (existing == 0) {
+          break;
+        }
+
+        for (i = 0; i < existing; i++) {
+          MV hash_mv;
+          hash_mv.col = block_hashes[i].x - x_pos;
+          hash_mv.row = block_hashes[i].y - y_pos;
+          if (!is_mv_in(&x->mv_limits, &hash_mv)) {
+            continue;
+          }
+          int currHashCost = av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
+          if (currHashCost < best_hash_cost) {
+            best_hash_cost = currHashCost;
+            best_hash_mv = hash_mv;
+          }
+        }
+
+        if (best_hash_cost < var) {
+          x->second_best_mv = x->best_mv;
+          x->best_mv.as_mv = best_hash_mv;
+          var = best_hash_cost;
+        }
+      }
+    }
+  } while (0);
+#endif
+
   return var;
 }
 
@@ -3150,25 +3164,24 @@ int av1_return_max_sub_pixel_mv(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   COMMON_MV_TEST;
-#if CONFIG_EXT_INTER
   (void)mask;
   (void)mask_stride;
   (void)invert_mask;
-#endif
   (void)minr;
   (void)minc;
   bestmv->row = maxr;
   bestmv->col = maxc;
   besterr = 0;
-  // In the sub-pel motion search, if hp is not used, then the last bit of mv
-  // has to be 0.
+// In the sub-pel motion search, if hp is not used, then the last bit of mv
+// has to be 0.
+#if CONFIG_AMVR
+  lower_mv_precision(bestmv, allow_hp, 0);
+#else
   lower_mv_precision(bestmv, allow_hp);
+#endif
   return besterr;
 }
 // Return the minimum MV.
@@ -3176,24 +3189,23 @@ int av1_return_min_sub_pixel_mv(
     MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref) {
+    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
   COMMON_MV_TEST;
   (void)maxr;
   (void)maxc;
-#if CONFIG_EXT_INTER
   (void)mask;
   (void)mask_stride;
   (void)invert_mask;
-#endif
   bestmv->row = minr;
   bestmv->col = minc;
   besterr = 0;
-  // In the sub-pel motion search, if hp is not used, then the last bit of mv
-  // has to be 0.
+// In the sub-pel motion search, if hp is not used, then the last bit of mv
+// has to be 0.
+#if CONFIG_AMVR
+  lower_mv_precision(bestmv, allow_hp, 0);
+#else
   lower_mv_precision(bestmv, allow_hp);
+#endif
   return besterr;
 }
diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h
index 733e415ce..2c53075cc 100644
--- a/third_party/aom/av1/encoder/mcomp.h
+++ b/third_party/aom/av1/encoder/mcomp.h
@@ -58,13 +58,11 @@ int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
 int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
                           const MV *center_mv, const uint8_t *second_pred,
                           const aom_variance_fn_ptr_t *vfp, int use_mvcost);
-#if CONFIG_EXT_INTER
 int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
                             const MV *center_mv, const uint8_t *second_pred,
                             const uint8_t *mask, int mask_stride,
                             int invert_mask, const aom_variance_fn_ptr_t *vfp,
                             int use_mvcost);
-#endif
 
 struct AV1_COMP;
 struct SPEED_FEATURES;
@@ -99,10 +97,8 @@ typedef int(fractional_mv_step_fp)(
     int forced_stop,  // 0 - full, 1 - qtr only, 2 - half only
     int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
     int *distortion, unsigned int *sse1, const uint8_t *second_pred,
-#if CONFIG_EXT_INTER
-    const uint8_t *mask, int mask_stride, int invert_mask,
-#endif
-    int w, int h, int use_upsampled_ref);
+    const uint8_t *mask, int mask_stride, int invert_mask, int w, int h,
+    int use_upsampled_ref);
 
 extern fractional_mv_step_fp av1_find_best_sub_pixel_tree;
 extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned;
@@ -123,18 +119,23 @@ typedef int (*av1_diamond_search_fn_t)(
 
 int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
                              const aom_variance_fn_ptr_t *fn_ptr,
-#if CONFIG_EXT_INTER
                              const uint8_t *mask, int mask_stride,
-                             int invert_mask,
-#endif
-                             const MV *center_mv, const uint8_t *second_pred);
+                             int invert_mask, const MV *center_mv,
+                             const uint8_t *second_pred);
 
 struct AV1_COMP;
 
+#if CONFIG_HASH_ME
+int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
+                          BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                          int error_per_bit, int *cost_list, const MV *ref_mv,
+                          int var_max, int rd, int x_pos, int y_pos, int intra);
+#else
 int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
                           BLOCK_SIZE bsize, MV *mvp_full, int step_param,
                           int error_per_bit, int *cost_list, const MV *ref_mv,
                           int var_max, int rd);
+#endif
 
 #if CONFIG_MOTION_VAR
 int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c
index bac06cd17..f34b82544 100644
--- a/third_party/aom/av1/encoder/palette.c
+++ b/third_party/aom/av1/encoder/palette.c
@@ -14,116 +14,14 @@
 
 #include "av1/encoder/cost.h"
 #include "av1/encoder/palette.h"
-
-static float calc_dist(const float *p1, const float *p2, int dim) {
-  float dist = 0;
-  int i;
-  for (i = 0; i < dim; ++i) {
-    const float diff = p1[i] - p2[i];
-    dist += diff * diff;
-  }
-  return dist;
-}
-
-void av1_calc_indices(const float *data, const float *centroids,
-                      uint8_t *indices, int n, int k, int dim) {
-  int i, j;
-  for (i = 0; i < n; ++i) {
-    float min_dist = calc_dist(data + i * dim, centroids, dim);
-    indices[i] = 0;
-    for (j = 1; j < k; ++j) {
-      const float this_dist =
-          calc_dist(data + i * dim, centroids + j * dim, dim);
-      if (this_dist < min_dist) {
-        min_dist = this_dist;
-        indices[i] = j;
-      }
-    }
-  }
-}
-
-// Generate a random number in the range [0, 32768).
-static unsigned int lcg_rand16(unsigned int *state) {
-  *state = (unsigned int)(*state * 1103515245ULL + 12345);
-  return *state / 65536 % 32768;
-}
-
-static void calc_centroids(const float *data, float *centroids,
-                           const uint8_t *indices, int n, int k, int dim) {
-  int i, j, index;
-  int count[PALETTE_MAX_SIZE];
-  unsigned int rand_state = (unsigned int)data[0];
-
-  assert(n <= 32768);
-
-  memset(count, 0, sizeof(count[0]) * k);
-  memset(centroids, 0, sizeof(centroids[0]) * k * dim);
-
-  for (i = 0; i < n; ++i) {
-    index = indices[i];
-    assert(index < k);
-    ++count[index];
-    for (j = 0; j < dim; ++j) {
-      centroids[index * dim + j] += data[i * dim + j];
-    }
-  }
-
-  for (i = 0; i < k; ++i) {
-    if (count[i] == 0) {
-      memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
-             sizeof(centroids[0]) * dim);
-    } else {
-      const float norm = 1.0f / count[i];
-      for (j = 0; j < dim; ++j) centroids[i * dim + j] *= norm;
-    }
-  }
-
-  // Round to nearest integers.
-  for (i = 0; i < k * dim; ++i) {
-    centroids[i] = roundf(centroids[i]);
-  }
-}
-
-static float calc_total_dist(const float *data, const float *centroids,
-                             const uint8_t *indices, int n, int k, int dim) {
-  float dist = 0;
-  int i;
-  (void)k;
-
-  for (i = 0; i < n; ++i)
-    dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim);
-
-  return dist;
-}
-
-void av1_k_means(const float *data, float *centroids, uint8_t *indices, int n,
-                 int k, int dim, int max_itr) {
-  int i;
-  float this_dist;
-  float pre_centroids[2 * PALETTE_MAX_SIZE];
-  uint8_t pre_indices[MAX_SB_SQUARE];
-
-  av1_calc_indices(data, centroids, indices, n, k, dim);
-  this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
-
-  for (i = 0; i < max_itr; ++i) {
-    const float pre_dist = this_dist;
-    memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
-    memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
-
-    calc_centroids(data, centroids, indices, n, k, dim);
-    av1_calc_indices(data, centroids, indices, n, k, dim);
-    this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
-
-    if (this_dist > pre_dist) {
-      memcpy(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim);
-      memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
-      break;
-    }
-    if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim))
-      break;
-  }
-}
+#include "av1/encoder/random.h"
+
+#define AV1_K_MEANS_DIM 1
+#include "av1/encoder/k_means_template.h"
+#undef AV1_K_MEANS_DIM
+#define AV1_K_MEANS_DIM 2
+#include "av1/encoder/k_means_template.h"
+#undef AV1_K_MEANS_DIM
 
 static int float_comparer(const void *a, const void *b) {
   const float fa = *(const float *)a;
diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h
index 8afe5a782..efd89f66f 100644
--- a/third_party/aom/av1/encoder/palette.h
+++ b/third_party/aom/av1/encoder/palette.h
@@ -18,17 +18,49 @@
 extern "C" {
 #endif
 
+#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim
+
+void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const float *data,
+                                             const float *centroids,
+                                             uint8_t *indices, int n, int k);
+void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const float *data,
+                                             const float *centroids,
+                                             uint8_t *indices, int n, int k);
+void AV1_K_MEANS_RENAME(av1_k_means, 1)(const float *data, float *centroids,
+                                        uint8_t *indices, int n, int k,
+                                        int max_itr);
+void AV1_K_MEANS_RENAME(av1_k_means, 2)(const float *data, float *centroids,
+                                        uint8_t *indices, int n, int k,
+                                        int max_itr);
+
 // Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim',
 // calculate the centroid 'indices' for the data points.
-void av1_calc_indices(const float *data, const float *centroids,
-                      uint8_t *indices, int n, int k, int dim);
+static INLINE void av1_calc_indices(const float *data, const float *centroids,
+                                    uint8_t *indices, int n, int k, int dim) {
+  if (dim == 1) {
+    AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k);
+  } else if (dim == 2) {
+    AV1_K_MEANS_RENAME(av1_calc_indices, 2)(data, centroids, indices, n, k);
+  } else {
+    assert(0 && "Untemplated k means dimension");
+  }
+}
 
 // Given 'n' 'data' points and an initial guess of 'k' 'centroids' each of
 // dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get
 // updated 'centroids' and the centroid 'indices' for elements in 'data'.
 // Note: the output centroids are rounded off to nearest integers.
-void av1_k_means(const float *data, float *centroids, uint8_t *indices, int n,
-                 int k, int dim, int max_itr);
+static INLINE void av1_k_means(const float *data, float *centroids,
+                               uint8_t *indices, int n, int k, int dim,
+                               int max_itr) {
+  if (dim == 1) {
+    AV1_K_MEANS_RENAME(av1_k_means, 1)(data, centroids, indices, n, k, max_itr);
+  } else if (dim == 2) {
+    AV1_K_MEANS_RENAME(av1_k_means, 2)(data, centroids, indices, n, k, max_itr);
+  } else {
+    assert(0 && "Untemplated k means dimension");
+  }
+}
 
 // Given a list of centroids, returns the unique number of centroids 'k', and
 // puts these unique centroids in first 'k' indices of 'centroids' array.
diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c
index e4ec38826..accc97e57 100644
--- a/third_party/aom/av1/encoder/pickcdef.c
+++ b/third_party/aom/av1/encoder/pickcdef.c
@@ -19,11 +19,11 @@
 #include "av1/common/reconinter.h"
 #include "av1/encoder/encoder.h"
 
-#define REDUCED_STRENGTHS 8
-#define REDUCED_TOTAL_STRENGTHS (REDUCED_STRENGTHS * CLPF_STRENGTHS)
-#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
+#define REDUCED_PRI_STRENGTHS 8
+#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
+#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
 
-static int priconv[REDUCED_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
+static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
 
 /* Search for the best strength to add as an option, knowing we
    already selected nb_strengths options. */
@@ -68,11 +68,16 @@ static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
                                 int fast) {
   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
+#if !CONFIG_CDEF_SINGLEPASS
   const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+#endif
   int i, j;
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   int best_id0 = 0;
   int best_id1 = 0;
+#if CONFIG_CDEF_SINGLEPASS
+  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+#endif
   memset(tot_mse, 0, sizeof(tot_mse));
   for (i = 0; i < sb_count; i++) {
     int gi;
@@ -232,13 +237,13 @@ static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
 }
 
 /* Compute MSE only on the blocks we filtered. */
-uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
-                             dering_list *dlist, int dering_count,
-                             BLOCK_SIZE bsize, int coeff_shift, int pli) {
+uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src,
+                           cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize,
+                           int coeff_shift, int pli) {
   uint64_t sum = 0;
   int bi, bx, by;
   if (bsize == BLOCK_8X8) {
-    for (bi = 0; bi < dering_count; bi++) {
+    for (bi = 0; bi < cdef_count; bi++) {
       by = dlist[bi].by;
       bx = dlist[bi].bx;
       if (pli == 0) {
@@ -250,7 +255,7 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
       }
     }
   } else if (bsize == BLOCK_4X8) {
-    for (bi = 0; bi < dering_count; bi++) {
+    for (bi = 0; bi < cdef_count; bi++) {
       by = dlist[bi].by;
       bx = dlist[bi].bx;
       sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
@@ -259,7 +264,7 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
                            &src[(bi << (3 + 2)) + 4 * 4], 4);
     }
   } else if (bsize == BLOCK_8X4) {
-    for (bi = 0; bi < dering_count; bi++) {
+    for (bi = 0; bi < cdef_count; bi++) {
       by = dlist[bi].by;
       bx = dlist[bi].bx;
       sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
@@ -269,7 +274,7 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
     }
   } else {
     assert(bsize == BLOCK_4X4);
-    for (bi = 0; bi < dering_count; bi++) {
+    for (bi = 0; bi < cdef_count; bi++) {
       by = dlist[bi].by;
       bx = dlist[bi].bx;
       sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
@@ -282,12 +287,12 @@ uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
 void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
                      AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
   int r, c;
-  int sbr, sbc;
+  int fbr, fbc;
   uint16_t *src[3];
   uint16_t *ref_coeff[3];
-  dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
-  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
-  int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+  cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
+  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
+  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
   int stride[3];
   int bsize[3];
   int mi_wide_l2[3];
@@ -295,18 +300,22 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
   int xdec[3];
   int ydec[3];
   int pli;
-  int dering_count;
+  int cdef_count;
   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   uint64_t tot_mse;
   int sb_count;
-  int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-  int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-  int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
-  int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
+  int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
+  int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   uint64_t(*mse[2])[TOTAL_STRENGTHS];
-  int clpf_damping = 3 + (cm->base_qindex >> 6);
-  int dering_damping = 6;
+#if CONFIG_CDEF_SINGLEPASS
+  int pri_damping = 3 + (cm->base_qindex >> 6);
+#else
+  int pri_damping = 6;
+#endif
+  int sec_damping = 3 + (cm->base_qindex >> 6);
   int i;
   int nb_strengths;
   int nb_strength_bits;
@@ -314,19 +323,18 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
   double lambda;
   int nplanes = 3;
   const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
-  DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
+  DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
   uint16_t *in;
-  DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]);
-  int chroma_dering =
-      xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
-      xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
+  DECLARE_ALIGNED(32, uint16_t, tmp_dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
+  int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
+                    xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
   quantizer =
       av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
   lambda = .12 * quantizer * quantizer / 256.;
 
   av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
-  mse[0] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
-  mse[1] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
+  mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
+  mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
   for (pli = 0; pli < nplanes; pli++) {
     uint8_t *ref_buffer;
     int ref_stride;
@@ -380,65 +388,76 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
       }
     }
   }
-  in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
+  in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
   sb_count = 0;
-  for (sbr = 0; sbr < nvsb; ++sbr) {
-    for (sbc = 0; sbc < nhsb; ++sbc) {
+  for (fbr = 0; fbr < nvfb; ++fbr) {
+    for (fbc = 0; fbc < nhfb; ++fbc) {
       int nvb, nhb;
       int gi;
       int dirinit = 0;
-      nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
-      nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
-      cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
-                          MI_SIZE_64X64 * sbc]
+      nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
+      nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
+      cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
+                          MI_SIZE_64X64 * fbc]
           ->mbmi.cdef_strength = -1;
-      if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue;
-      dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64,
-                                            sbc * MI_SIZE_64X64, dlist, 1);
+      if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue;
+      cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
+                                        fbc * MI_SIZE_64X64, dlist, 1);
       for (pli = 0; pli < nplanes; pli++) {
-        for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
-          inbuf[i] = OD_DERING_VERY_LARGE;
+        for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
         for (gi = 0; gi < total_strengths; gi++) {
           int threshold;
           uint64_t curr_mse;
-          int clpf_strength;
-          threshold = gi / CLPF_STRENGTHS;
+          int sec_strength;
+          threshold = gi / CDEF_SEC_STRENGTHS;
           if (fast) threshold = priconv[threshold];
-          if (pli > 0 && !chroma_dering) threshold = 0;
+          if (pli > 0 && !chroma_cdef) threshold = 0;
           /* We avoid filtering the pixels for which some of the pixels to
              average
              are outside the frame. We could change the filter instead, but it
              would add special cases for any future vectorization. */
-          int yoff = OD_FILT_VBORDER * (sbr != 0);
-          int xoff = OD_FILT_HBORDER * (sbc != 0);
+          int yoff = CDEF_VBORDER * (fbr != 0);
+          int xoff = CDEF_HBORDER * (fbc != 0);
           int ysize = (nvb << mi_high_l2[pli]) +
-                      OD_FILT_VBORDER * (sbr != nvsb - 1) + yoff;
+                      CDEF_VBORDER * (fbr != nvfb - 1) + yoff;
           int xsize = (nhb << mi_wide_l2[pli]) +
-                      OD_FILT_HBORDER * (sbc != nhsb - 1) + xoff;
-          clpf_strength = gi % CLPF_STRENGTHS;
-          if (clpf_strength == 0)
-            copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
+                      CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
+          sec_strength = gi % CDEF_SEC_STRENGTHS;
+#if CONFIG_CDEF_SINGLEPASS
+          copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
+                       src[pli],
+                       (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+                       (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
+                       stride[pli], ysize, xsize);
+          cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
+                         dir, &dirinit, var, pli, dlist, cdef_count, threshold,
+                         sec_strength + (sec_strength == 3), pri_damping,
+                         sec_damping, coeff_shift);
+#else
+          if (sec_strength == 0)
+            copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
                          src[pli],
-                         (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
-                         (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
+                         (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+                         (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
                          stride[pli], ysize, xsize);
-          od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
-                    tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
-                    dlist, dering_count, threshold,
-                    clpf_strength + (clpf_strength == 3), clpf_damping,
-                    dering_damping, coeff_shift, clpf_strength != 0, 1);
-          curr_mse = compute_dering_dist(
+          cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
+                         tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
+                         pli, dlist, cdef_count, threshold,
+                         sec_strength + (sec_strength == 3), sec_damping,
+                         pri_damping, coeff_shift, sec_strength != 0, 1);
+#endif
+          curr_mse = compute_cdef_dist(
               ref_coeff[pli] +
-                  (sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
-                  (sbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
-              stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
-              coeff_shift, pli);
+                  (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
+                  (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
+              stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift,
+              pli);
           if (pli < 2)
             mse[pli][sb_count][gi] = curr_mse;
           else
             mse[1][sb_count][gi] += curr_mse;
           sb_index[sb_count] =
-              MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
+              MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
         }
       }
       sb_count++;
@@ -494,15 +513,17 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
   if (fast) {
     for (int j = 0; j < nb_strengths; j++) {
       cm->cdef_strengths[j] =
-          priconv[cm->cdef_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
-          (cm->cdef_strengths[j] % CLPF_STRENGTHS);
+          priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
+              CDEF_SEC_STRENGTHS +
+          (cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS);
       cm->cdef_uv_strengths[j] =
-          priconv[cm->cdef_uv_strengths[j] / CLPF_STRENGTHS] * CLPF_STRENGTHS +
-          (cm->cdef_uv_strengths[j] % CLPF_STRENGTHS);
+          priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
+              CDEF_SEC_STRENGTHS +
+          (cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
     }
   }
-  cm->cdef_dering_damping = dering_damping;
-  cm->cdef_clpf_damping = clpf_damping;
+  cm->cdef_pri_damping = pri_damping;
+  cm->cdef_sec_damping = sec_damping;
   aom_free(mse[0]);
   aom_free(mse[1]);
   for (pli = 0; pli < nplanes; pli++) {
diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c
index 26fd55ef0..d8b6f9074 100644
--- a/third_party/aom/av1/encoder/picklpf.c
+++ b/third_party/aom/av1/encoder/picklpf.c
@@ -14,8 +14,8 @@
 
 #include "./aom_scale_rtcd.h"
 
-#include "aom_dsp/psnr.h"
 #include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/psnr.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
 
@@ -27,6 +27,85 @@
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/picklpf.h"
 
+#if CONFIG_LPF_SB
+#if CONFIG_HIGHBITDEPTH
+static int compute_sb_y_sse_highbd(const YV12_BUFFER_CONFIG *src,
+                                   const YV12_BUFFER_CONFIG *frame,
+                                   AV1_COMMON *const cm, int mi_row,
+                                   int mi_col) {
+  int sse = 0;
+  const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
+  const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
+
+  const int row = mi_row_start * MI_SIZE;
+  const int col = mi_col_start * MI_SIZE;
+  const uint16_t *src_y =
+      CONVERT_TO_SHORTPTR(src->y_buffer) + row * src->y_stride + col;
+  const uint16_t *frame_y =
+      CONVERT_TO_SHORTPTR(frame->y_buffer) + row * frame->y_stride + col;
+  const int row_end = (mi_row_end - mi_row_start) * MI_SIZE;
+  const int col_end = (mi_col_end - mi_col_start) * MI_SIZE;
+
+  int x, y;
+  for (y = 0; y < row_end; ++y) {
+    for (x = 0; x < col_end; ++x) {
+      const int diff = src_y[x] - frame_y[x];
+      sse += diff * diff;
+    }
+    src_y += src->y_stride;
+    frame_y += frame->y_stride;
+  }
+  return sse;
+}
+#endif
+
+static int compute_sb_y_sse(const YV12_BUFFER_CONFIG *src,
+                            const YV12_BUFFER_CONFIG *frame,
+                            AV1_COMMON *const cm, int mi_row, int mi_col) {
+  int sse = 0;
+  const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
+  const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
+  const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
+  const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
+
+  const int row = mi_row_start * MI_SIZE;
+  const int col = mi_col_start * MI_SIZE;
+  const uint8_t *src_y = src->y_buffer + row * src->y_stride + col;
+  const uint8_t *frame_y = frame->y_buffer + row * frame->y_stride + col;
+  const int row_end = (mi_row_end - mi_row_start) * MI_SIZE;
+  const int col_end = (mi_col_end - mi_col_start) * MI_SIZE;
+
+  int x, y;
+  for (y = 0; y < row_end; ++y) {
+    for (x = 0; x < col_end; ++x) {
+      const int diff = src_y[x] - frame_y[x];
+      sse += diff * diff;
+    }
+    src_y += src->y_stride;
+    frame_y += frame->y_stride;
+  }
+  return sse;
+}
+#endif  // CONFIG_LPF_SB
+
+#if !CONFIG_LPF_SB
+static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
+                            YV12_BUFFER_CONFIG *dst_bc, int plane) {
+  switch (plane) {
+    case 0: aom_yv12_copy_y(src_bc, dst_bc); break;
+    case 1: aom_yv12_copy_u(src_bc, dst_bc); break;
+    case 2: aom_yv12_copy_v(src_bc, dst_bc); break;
+    default: assert(plane >= 0 && plane <= 2); break;
+  }
+}
+#endif  // CONFIG_LPF_SB
+
 int av1_get_max_filter_level(const AV1_COMP *cpi) {
   if (cpi->oxcf.pass == 2) {
     return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
@@ -36,25 +115,156 @@ int av1_get_max_filter_level(const AV1_COMP *cpi) {
   }
 }
 
+#if CONFIG_LPF_SB
+// TODO(chengchen): reduce memory usage by copy superblock instead of frame
+static int try_filter_superblock(const YV12_BUFFER_CONFIG *sd,
+                                 AV1_COMP *const cpi, int filt_level,
+                                 int partial_frame, int mi_row, int mi_col) {
+  AV1_COMMON *const cm = &cpi->common;
+  int filt_err;
+
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
+  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
+                        partial_frame, mi_row, mi_col);
+#else
+  if (cpi->num_workers > 1)
+    av1_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
+                             filt_level, 1, partial_frame, cpi->workers,
+                             cpi->num_workers, &cpi->lf_row_sync);
+  else
+    av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+                          1, partial_frame);
+#endif
+
+#if CONFIG_HIGHBITDEPTH
+  if (cm->use_highbitdepth) {
+    filt_err =
+        compute_sb_y_sse_highbd(sd, cm->frame_to_show, cm, mi_row, mi_col);
+  } else {
+    filt_err = compute_sb_y_sse(sd, cm->frame_to_show, cm, mi_row, mi_col);
+  }
+#else
+  filt_err = compute_sb_y_sse(sd, cm->frame_to_show, cm, mi_row, mi_col);
+#endif  // CONFIG_HIGHBITDEPTH
+
+  // TODO(chengchen): Copy the superblock only
+  // Re-instate the unfiltered frame
+  aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+  return filt_err;
+}
+
+static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
+                               int partial_frame, double *best_cost_ret,
+                               int mi_row, int mi_col, int last_lvl) {
+  assert(partial_frame == 1);
+  assert(last_lvl >= 0);
+
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *x = &cpi->td.mb;
+
+  int min_filter_level = AOMMAX(0, last_lvl - MAX_LPF_OFFSET);
+  int max_filter_level =
+      AOMMIN(av1_get_max_filter_level(cpi), last_lvl + MAX_LPF_OFFSET);
+
+  // search a larger range for the start superblock
+  if (mi_row == 0 && mi_col == 0) {
+    min_filter_level = 0;
+    max_filter_level = av1_get_max_filter_level(cpi);
+  }
+
+  // TODO(chengchen): Copy for superblock only
+  // Make a copy of the unfiltered / processed recon buffer
+  aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+
+  int estimate_err =
+      try_filter_superblock(sd, cpi, last_lvl, partial_frame, mi_row, mi_col);
+
+  int best_err = estimate_err;
+  int filt_best = last_lvl;
+
+  int i;
+  for (i = min_filter_level; i <= max_filter_level; i += LPF_STEP) {
+    if (i == last_lvl) continue;
+
+    int filt_err =
+        try_filter_superblock(sd, cpi, i, partial_frame, mi_row, mi_col);
+
+    if (filt_err < best_err) {
+      best_err = filt_err;
+      filt_best = i;
+    }
+  }
+
+  // If previous sb filter level has similar filtering performance as current
+  // best filter level, use previous level such that we can only send one bit
+  // to indicate current filter level is the same as the previous.
+  int threshold = 400;
+
+  // ratio = the filtering area / a superblock size
+  int ratio = 1;
+  if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
+    ratio *= (cm->mi_rows - mi_row);
+  } else {
+    if (mi_row == 0) {
+      ratio *= (MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET);
+    } else {
+      ratio *= MAX_MIB_SIZE;
+    }
+  }
+  if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
+    ratio *= (cm->mi_cols - mi_col);
+  } else {
+    if (mi_col == 0) {
+      ratio *= (MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET);
+    } else {
+      ratio *= MAX_MIB_SIZE;
+    }
+  }
+  threshold = threshold * ratio / (MAX_MIB_SIZE * MAX_MIB_SIZE);
+
+  const int diff = abs(estimate_err - best_err);
+
+  const int percent_thresh = (int)((double)estimate_err * 0.01);
+  threshold = AOMMAX(threshold, percent_thresh);
+  if (diff < threshold) {
+    best_err = estimate_err;
+    filt_best = last_lvl;
+  }
+
+  // Compute rdcost to determine whether to reuse previous filter lvl
+  if (filt_best != last_lvl) {
+  }
+
+  if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
+  return filt_best;
+}
+
+#else  // CONFIG_LPF_SB
 static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
                                 AV1_COMP *const cpi, int filt_level,
                                 int partial_frame
-#if CONFIG_UV_LVL
+#if CONFIG_LOOPFILTER_LEVEL
                                 ,
-                                int plane
+                                int plane, int dir
 #endif
                                 ) {
   AV1_COMMON *const cm = &cpi->common;
   int64_t filt_err;
 
 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_CB4X4
-#if CONFIG_UV_LVL
-  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
-                        plane, partial_frame);
+#if CONFIG_LOOPFILTER_LEVEL
+  assert(plane >= 0 && plane <= 2);
+  int filter_level[2] = { filt_level, filt_level };
+  if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1];
+  if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0];
+
+  av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd,
+                        filter_level[0], filter_level[1], plane, partial_frame);
 #else
   av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
                         partial_frame);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
 #else
   if (cpi->num_workers > 1)
     av1_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
@@ -65,64 +275,33 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
                           1, partial_frame);
 #endif
 
-#if CONFIG_UV_LVL
+  int highbd = 0;
 #if CONFIG_HIGHBITDEPTH
-  if (cm->use_highbitdepth) {
-    if (plane == 0)
-      filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show);
-    else if (plane == 1)
-      filt_err = aom_highbd_get_u_sse(sd, cm->frame_to_show);
-    else
-      filt_err = aom_highbd_get_v_sse(sd, cm->frame_to_show);
-  } else {
-    if (plane == 0)
-      filt_err = aom_get_y_sse(sd, cm->frame_to_show);
-    else if (plane == 1)
-      filt_err = aom_get_u_sse(sd, cm->frame_to_show);
-    else
-      filt_err = aom_get_v_sse(sd, cm->frame_to_show);
-  }
-#else
-  if (plane == 0)
-    filt_err = aom_get_y_sse(sd, cm->frame_to_show);
-  else if (plane == 1)
-    filt_err = aom_get_u_sse(sd, cm->frame_to_show);
-  else
-    filt_err = aom_get_v_sse(sd, cm->frame_to_show);
+  highbd = cm->use_highbitdepth;
 #endif  // CONFIG_HIGHBITDEPTH
 
+#if CONFIG_LOOPFILTER_LEVEL
+  filt_err = aom_get_sse_plane(sd, cm->frame_to_show, plane, highbd);
+
   // Re-instate the unfiltered frame
-  if (plane == 0)
-    aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-  else if (plane == 1)
-    aom_yv12_copy_u(&cpi->last_frame_uf, cm->frame_to_show);
-  else
-    aom_yv12_copy_v(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-#if CONFIG_HIGHBITDEPTH
-  if (cm->use_highbitdepth) {
-    filt_err = aom_highbd_get_y_sse(sd, cm->frame_to_show);
-  } else {
-    filt_err = aom_get_y_sse(sd, cm->frame_to_show);
-  }
+  yv12_copy_plane(&cpi->last_frame_uf, cm->frame_to_show, plane);
 #else
-  filt_err = aom_get_y_sse(sd, cm->frame_to_show);
-#endif  // CONFIG_HIGHBITDEPTH
+  filt_err = aom_get_sse_plane(sd, cm->frame_to_show, 0, highbd);
 
   // Re-instate the unfiltered frame
-  aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
-#endif  // CONFIG_UV_LVL
+  yv12_copy_plane(&cpi->last_frame_uf, cm->frame_to_show, 0);
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
   return filt_err;
 }
 
-int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                            int partial_frame, double *best_cost_ret
-#if CONFIG_UV_LVL
-                            ,
-                            int plane
+static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
+                               int partial_frame, double *best_cost_ret
+#if CONFIG_LOOPFILTER_LEVEL
+                               ,
+                               int plane, int dir
 #endif
-                            ) {
+                               ) {
   const AV1_COMMON *const cm = &cpi->common;
   const struct loopfilter *const lf = &cm->lf;
   const int min_filter_level = 0;
@@ -134,18 +313,18 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
 
 // Start the search at the previous frame filter level unless it is now out of
 // range.
-#if CONFIG_UV_LVL
+#if CONFIG_LOOPFILTER_LEVEL
   int lvl;
   switch (plane) {
-    case 0: lvl = lf->filter_level; break;
+    case 0: lvl = (dir == 1) ? lf->filter_level[1] : lf->filter_level[0]; break;
     case 1: lvl = lf->filter_level_u; break;
     case 2: lvl = lf->filter_level_v; break;
-    default: lvl = lf->filter_level; break;
+    default: assert(plane >= 0 && plane <= 2); return 0;
   }
   int filt_mid = clamp(lvl, min_filter_level, max_filter_level);
 #else
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
   // Sum squared error at each filter level
   int64_t ss_err[MAX_LOOP_FILTER + 1];
@@ -153,23 +332,18 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
   // Set each entry to -1
   memset(ss_err, 0xFF, sizeof(ss_err));
 
-#if CONFIG_UV_LVL
-  if (plane == 0)
-    aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
-  else if (plane == 1)
-    aom_yv12_copy_u(cm->frame_to_show, &cpi->last_frame_uf);
-  else if (plane == 2)
-    aom_yv12_copy_v(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_LOOPFILTER_LEVEL
+  yv12_copy_plane(cm->frame_to_show, &cpi->last_frame_uf, plane);
 #else
   //  Make a copy of the unfiltered / processed recon buffer
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
 
-#if CONFIG_UV_LVL
-  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane);
+#if CONFIG_LOOPFILTER_LEVEL
+  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir);
 #else
   best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
   filt_best = filt_mid;
   ss_err[filt_mid] = best_err;
 
@@ -189,12 +363,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
     if (filt_direction <= 0 && filt_low != filt_mid) {
       // Get Low filter error score
       if (ss_err[filt_low] < 0) {
-#if CONFIG_UV_LVL
+#if CONFIG_LOOPFILTER_LEVEL
         ss_err[filt_low] =
-            try_filter_frame(sd, cpi, filt_low, partial_frame, plane);
+            try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir);
 #else
         ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
       }
       // If value is close to the best so far then bias towards a lower loop
       // filter value.
@@ -210,12 +384,12 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
     // Now look at filt_high
     if (filt_direction >= 0 && filt_high != filt_mid) {
       if (ss_err[filt_high] < 0) {
-#if CONFIG_UV_LVL
+#if CONFIG_LOOPFILTER_LEVEL
         ss_err[filt_high] =
-            try_filter_frame(sd, cpi, filt_high, partial_frame, plane);
+            try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir);
 #else
         ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame);
-#endif  // CONFIG_UV_LVL
+#endif  // CONFIG_LOOPFILTER_LEVEL
       }
       // If value is significantly better than previous best, bias added against
       // raising filter value
@@ -241,6 +415,7 @@ int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
   if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
   return filt_best;
 }
+#endif  // CONFIG_LPF_SB
 
 void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
                            LPF_PICK_METHOD method) {
@@ -249,8 +424,13 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
 
   lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
 
-  if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
+  if (method == LPF_PICK_MINIMAL_LPF) {
+#if CONFIG_LOOPFILTER_LEVEL
+    lf->filter_level[0] = 0;
+    lf->filter_level[1] = 0;
+#else
     lf->filter_level = 0;
+#endif
   } else if (method >= LPF_PICK_FROM_Q) {
     const int min_filter_level = 0;
     const int max_filter_level = av1_get_max_filter_level(cpi);
@@ -279,18 +459,54 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
     int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
 #endif  // CONFIG_HIGHBITDEPTH
     if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
+#if CONFIG_LOOPFILTER_LEVEL
+    lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
+    lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
+#else
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+#endif
   } else {
-#if CONFIG_UV_LVL
-    lf->filter_level = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0);
-    lf->filter_level_u = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1);
-    lf->filter_level_v = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2);
+#if CONFIG_LPF_SB
+    int mi_row, mi_col;
+    // TODO(chengchen): init last_lvl using previous frame's info?
+    int last_lvl = 0;
+    // TODO(chengchen): if the frame size makes the last superblock very small,
+    // consider merge it to the previous superblock to save bits.
+    // Example, if frame size 1080x720, then in the last row of superblock,
+    // there're (FILT_BOUNDAR_OFFSET + 16) pixels.
+    for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MAX_MIB_SIZE) {
+      for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+        int lvl =
+            search_filter_level(sd, cpi, 1, NULL, mi_row, mi_col, last_lvl);
+
+        av1_loop_filter_sb_level_init(cm, mi_row, mi_col, lvl);
+
+        // For the superblock at row start, its previous filter level should be
+        // the one above it, not the one at the end of last row
+        if (mi_col + MAX_MIB_SIZE >= cm->mi_cols) {
+          last_lvl = cm->mi_grid_visible[mi_row * cm->mi_stride]->mbmi.filt_lvl;
+        } else {
+          last_lvl = lvl;
+        }
+      }
+    }
+#else  // CONFIG_LPF_SB
+#if CONFIG_LOOPFILTER_LEVEL
+    lf->filter_level[0] = lf->filter_level[1] = search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 2);
+    lf->filter_level[0] = search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 0);
+    lf->filter_level[1] = search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 1);
+
+    lf->filter_level_u = search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 1, 0);
+    lf->filter_level_v = search_filter_level(
+        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 2, 0);
 #else
-    lf->filter_level = av1_search_filter_level(
-        sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
-#endif  // CONFIG_UV_LVL
+    lf->filter_level =
+        search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
+#endif  // CONFIG_LOOPFILTER_LEVEL
+#endif  // CONFIG_LPF_SB
   }
 }
diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h
index bd248d114..2a168358e 100644
--- a/third_party/aom/av1/encoder/picklpf.h
+++ b/third_party/aom/av1/encoder/picklpf.h
@@ -21,13 +21,6 @@ extern "C" {
 struct yv12_buffer_config;
 struct AV1_COMP;
 int av1_get_max_filter_level(const AV1_COMP *cpi);
-#if CONFIG_UV_LVL
-int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                            int partial_frame, double *err, int plane);
-#else
-int av1_search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
-                            int partial_frame, double *err);
-#endif
 void av1_pick_filter_level(const struct yv12_buffer_config *sd,
                            struct AV1_COMP *cpi, LPF_PICK_METHOD method);
 #ifdef __cplusplus
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index fec68377a..a2262b6fc 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -29,13 +29,13 @@
 
 #include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/encoder.h"
+#include "av1/encoder/mathutils.h"
 #include "av1/encoder/picklpf.h"
 #include "av1/encoder/pickrst.h"
-#include "av1/encoder/mathutils.h"
 
 // When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
-// When set to RESTORE_NONE (0) we allow switchable.
-const RestorationType force_restore_type = RESTORE_NONE;
+// When set to RESTORE_TYPES we allow switchable.
+static const RestorationType force_restore_type = RESTORE_TYPES;
 
 // Number of Wiener iterations
 #define NUM_WIENER_ITERS 5
@@ -44,7 +44,7 @@ typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src,
                                       AV1_COMP *cpi, int partial_frame,
                                       int plane, RestorationInfo *info,
                                       RestorationType *rest_level,
-                                      double *best_tile_cost,
+                                      int64_t *best_tile_cost,
                                       YV12_BUFFER_CONFIG *dst_frame);
 
 const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
@@ -124,13 +124,11 @@ static int64_t sse_restoration_frame(AV1_COMMON *const cm,
 static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
                                     AV1_COMP *const cpi, RestorationInfo *rsi,
                                     int components_pattern, int partial_frame,
-                                    int tile_idx, int subtile_idx,
-                                    int subtile_bits,
+                                    int tile_idx,
                                     YV12_BUFFER_CONFIG *dst_frame) {
   AV1_COMMON *const cm = &cpi->common;
   int64_t filt_err;
   int tile_width, tile_height, nhtiles, nvtiles;
-  int h_start, h_end, v_start, v_end;
   int ntiles, width, height;
 
   // Y and UV components cannot be mixed
@@ -151,11 +149,16 @@ static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
 
   av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
                              partial_frame, dst_frame);
-  av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, nhtiles,
-                           nvtiles, tile_width, tile_height, width, height, 0,
-                           0, &h_start, &h_end, &v_start, &v_end);
-  filt_err = sse_restoration_tile(src, dst_frame, cm, h_start, h_end - h_start,
-                                  v_start, v_end - v_start, components_pattern);
+  RestorationTileLimits limits = av1_get_rest_tile_limits(
+      tile_idx, nhtiles, nvtiles, tile_width, tile_height, width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+      height, components_pattern > 1 ? cm->subsampling_y : 0);
+#else
+      height);
+#endif
+  filt_err = sse_restoration_tile(
+      src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
+      limits.v_start, limits.v_end - limits.v_start, components_pattern);
 
   return filt_err;
 }
@@ -172,16 +175,16 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
   return filt_err;
 }
 
-static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height,
-                                    int src_stride, uint8_t *dat8,
-                                    int dat_stride, int bit_depth,
+static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
+                                    int src_stride, const uint8_t *dat8,
+                                    int dat_stride, int use_highbitdepth,
                                     int32_t *flt1, int flt1_stride,
                                     int32_t *flt2, int flt2_stride, int *xqd) {
   int i, j;
   int64_t err = 0;
   int xq[2];
   decode_xq(xqd, xq);
-  if (bit_depth == 8) {
+  if (!use_highbitdepth) {
     const uint8_t *src = src8;
     const uint8_t *dat = dat8;
     for (i = 0; i < height; ++i) {
@@ -219,12 +222,12 @@ static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height,
 
 #define USE_SGRPROJ_REFINEMENT_SEARCH 1
 static int64_t finer_search_pixel_proj_error(
-    uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8,
-    int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride,
-    int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
+    const uint8_t *src8, int width, int height, int src_stride,
+    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
+    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
   int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                     dat_stride, bit_depth, flt1, flt1_stride,
-                                     flt2, flt2_stride, xqd);
+                                     dat_stride, use_highbitdepth, flt1,
+                                     flt1_stride, flt2, flt2_stride, xqd);
   (void)start_step;
 #if USE_SGRPROJ_REFINEMENT_SEARCH
   int64_t err2;
@@ -237,8 +240,8 @@ static int64_t finer_search_pixel_proj_error(
         if (xqd[p] - s >= tap_min[p]) {
           xqd[p] -= s;
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                      dat_stride, bit_depth, flt1, flt1_stride,
-                                      flt2, flt2_stride, xqd);
+                                      dat_stride, use_highbitdepth, flt1,
+                                      flt1_stride, flt2, flt2_stride, xqd);
           if (err2 > err) {
             xqd[p] += s;
           } else {
@@ -255,8 +258,8 @@ static int64_t finer_search_pixel_proj_error(
         if (xqd[p] + s <= tap_max[p]) {
           xqd[p] += s;
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                      dat_stride, bit_depth, flt1, flt1_stride,
-                                      flt2, flt2_stride, xqd);
+                                      dat_stride, use_highbitdepth, flt1,
+                                      flt1_stride, flt2, flt2_stride, xqd);
           if (err2 > err) {
             xqd[p] -= s;
           } else {
@@ -273,10 +276,11 @@ static int64_t finer_search_pixel_proj_error(
   return err;
 }
 
-static void get_proj_subspace(uint8_t *src8, int width, int height,
+static void get_proj_subspace(const uint8_t *src8, int width, int height,
                               int src_stride, uint8_t *dat8, int dat_stride,
-                              int bit_depth, int32_t *flt1, int flt1_stride,
-                              int32_t *flt2, int flt2_stride, int *xq) {
+                              int use_highbitdepth, int32_t *flt1,
+                              int flt1_stride, int32_t *flt2, int flt2_stride,
+                              int *xq) {
   int i, j;
   double H[2][2] = { { 0, 0 }, { 0, 0 } };
   double C[2] = { 0, 0 };
@@ -289,7 +293,7 @@ static void get_proj_subspace(uint8_t *src8, int width, int height,
   // Default
   xq[0] = 0;
   xq[1] = 0;
-  if (bit_depth == 8) {
+  if (!use_highbitdepth) {
     const uint8_t *src = src8;
     const uint8_t *dat = dat8;
     for (i = 0; i < height; ++i) {
@@ -346,54 +350,83 @@ void encode_xq(int *xq, int *xqd) {
 }
 
 static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
-                                          int dat_stride, uint8_t *src8,
-                                          int src_stride, int bit_depth,
-                                          int *eps, int *xqd, int32_t *rstbuf) {
+                                          int dat_stride, const uint8_t *src8,
+                                          int src_stride, int use_highbitdepth,
+                                          int bit_depth, int pu_width,
+                                          int pu_height, int *eps, int *xqd,
+                                          int32_t *rstbuf) {
   int32_t *flt1 = rstbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
-  int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
   int ep, bestep = 0;
   int64_t err, besterr = -1;
   int exqd[2], bestxqd[2] = { 0, 0 };
+  int flt1_stride = ((width + 7) & ~7) + 8;
+  int flt2_stride = ((width + 7) & ~7) + 8;
+  assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
+         pu_width == RESTORATION_PROC_UNIT_SIZE);
+  assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) ||
+         pu_height == RESTORATION_PROC_UNIT_SIZE);
+#if !CONFIG_HIGHBITDEPTH
+  (void)bit_depth;
+#endif
 
   for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
     int exq[2];
 #if CONFIG_HIGHBITDEPTH
-    if (bit_depth > 8) {
+    if (use_highbitdepth) {
       uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
+      for (int i = 0; i < height; i += pu_height)
+        for (int j = 0; j < width; j += pu_width) {
+          const int w = AOMMIN(pu_width, width - j);
+          const int h = AOMMIN(pu_height, height - i);
+          uint16_t *dat_p = dat + i * dat_stride + j;
+          int32_t *flt1_p = flt1 + i * flt1_stride + j;
+          int32_t *flt2_p = flt2 + i * flt2_stride + j;
 #if USE_HIGHPASS_IN_SGRPROJ
-      av1_highpass_filter_highbd(dat, width, height, dat_stride, flt1, width,
-                                 sgr_params[ep].corner, sgr_params[ep].edge);
+          av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p,
+                                     flt1_stride, sgr_params[ep].corner,
+                                     sgr_params[ep].edge);
 #else
-      av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt1,
-                                        width, bit_depth, sgr_params[ep].r1,
-                                        sgr_params[ep].e1, tmpbuf2);
+          av1_selfguided_restoration_highbd(
+              dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth,
+              sgr_params[ep].r1, sgr_params[ep].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
-      av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt2,
-                                        width, bit_depth, sgr_params[ep].r2,
-                                        sgr_params[ep].e2, tmpbuf2);
+          av1_selfguided_restoration_highbd(
+              dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth,
+              sgr_params[ep].r2, sgr_params[ep].e2);
+        }
     } else {
 #endif
+      for (int i = 0; i < height; i += pu_height)
+        for (int j = 0; j < width; j += pu_width) {
+          const int w = AOMMIN(pu_width, width - j);
+          const int h = AOMMIN(pu_height, height - i);
+          uint8_t *dat_p = dat8 + i * dat_stride + j;
+          int32_t *flt1_p = flt1 + i * flt1_stride + j;
+          int32_t *flt2_p = flt2 + i * flt2_stride + j;
 #if USE_HIGHPASS_IN_SGRPROJ
-      av1_highpass_filter(dat8, width, height, dat_stride, flt1, width,
-                          sgr_params[ep].corner, sgr_params[ep].edge);
+          av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
+                              sgr_params[ep].corner, sgr_params[ep].edge);
 #else
-    av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, width,
-                               sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2);
+        av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride,
+                                   sgr_params[ep].r1, sgr_params[ep].e1);
 #endif  // USE_HIGHPASS_IN_SGRPROJ
-      av1_selfguided_restoration(dat8, width, height, dat_stride, flt2, width,
-                                 sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2);
+          av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p,
+                                     flt2_stride, sgr_params[ep].r2,
+                                     sgr_params[ep].e2);
+        }
 #if CONFIG_HIGHBITDEPTH
     }
 #endif
     aom_clear_system_state();
     get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
-                      bit_depth, flt1, width, flt2, width, exq);
+                      use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride,
+                      exq);
     aom_clear_system_state();
     encode_xq(exq, exqd);
-    err = finer_search_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                        dat_stride, bit_depth, flt1, width,
-                                        flt2, width, 2, exqd);
+    err = finer_search_pixel_proj_error(
+        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
+        flt1, flt1_stride, flt2, flt2_stride, 2, exqd);
     if (besterr == -1 || err < besterr) {
       bestep = ep;
       besterr = err;
@@ -420,124 +453,258 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
   return bits;
 }
 
-static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
-                             int partial_frame, int plane,
-                             RestorationInfo *info, RestorationType *type,
-                             double *best_tile_cost,
-                             YV12_BUFFER_CONFIG *dst_frame) {
-  SgrprojInfo *sgrproj_info = info->sgrproj_info;
-  double err, cost_norestore, cost_sgrproj;
-  int bits;
-  MACROBLOCK *x = &cpi->td.mb;
+struct rest_search_ctxt {
+  const YV12_BUFFER_CONFIG *src;
+  AV1_COMP *cpi;
+  uint8_t *dgd_buffer;
+  const uint8_t *src_buffer;
+  int dgd_stride;
+  int src_stride;
+  int partial_frame;
+  RestorationInfo *info;
+  RestorationType *type;
+  int64_t *best_tile_cost;
+  int plane;
+  int plane_width;
+  int plane_height;
+  int nrtiles_x;
+  int nrtiles_y;
+  YV12_BUFFER_CONFIG *dst_frame;
+};
+
+// Fill in ctxt. Returns the number of restoration tiles for this plane
+static INLINE int init_rest_search_ctxt(
+    const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane,
+    RestorationInfo *info, RestorationType *type, int64_t *best_tile_cost,
+    YV12_BUFFER_CONFIG *dst_frame, struct rest_search_ctxt *ctxt) {
   AV1_COMMON *const cm = &cpi->common;
+  ctxt->src = src;
+  ctxt->cpi = cpi;
+  ctxt->partial_frame = partial_frame;
+  ctxt->info = info;
+  ctxt->type = type;
+  ctxt->best_tile_cost = best_tile_cost;
+  ctxt->plane = plane;
+  ctxt->dst_frame = dst_frame;
+
   const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
-  RestorationInfo *rsi = &cpi->rst_search[0];
-  int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
-  int h_start, h_end, v_start, v_end;
-  int width, height, src_stride, dgd_stride;
-  uint8_t *dgd_buffer, *src_buffer;
   if (plane == AOM_PLANE_Y) {
-    width = src->y_crop_width;
-    height = src->y_crop_height;
-    src_buffer = src->y_buffer;
-    src_stride = src->y_stride;
-    dgd_buffer = dgd->y_buffer;
-    dgd_stride = dgd->y_stride;
-    assert(width == dgd->y_crop_width);
-    assert(height == dgd->y_crop_height);
-    assert(width == src->y_crop_width);
-    assert(height == src->y_crop_height);
+    ctxt->plane_width = src->y_crop_width;
+    ctxt->plane_height = src->y_crop_height;
+    ctxt->src_buffer = src->y_buffer;
+    ctxt->src_stride = src->y_stride;
+    ctxt->dgd_buffer = dgd->y_buffer;
+    ctxt->dgd_stride = dgd->y_stride;
+    assert(ctxt->plane_width == dgd->y_crop_width);
+    assert(ctxt->plane_height == dgd->y_crop_height);
+    assert(ctxt->plane_width == src->y_crop_width);
+    assert(ctxt->plane_height == src->y_crop_height);
   } else {
-    width = src->uv_crop_width;
-    height = src->uv_crop_height;
-    src_stride = src->uv_stride;
-    dgd_stride = dgd->uv_stride;
-    src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer;
-    dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer;
-    assert(width == dgd->uv_crop_width);
-    assert(height == dgd->uv_crop_height);
+    ctxt->plane_width = src->uv_crop_width;
+    ctxt->plane_height = src->uv_crop_height;
+    ctxt->src_stride = src->uv_stride;
+    ctxt->dgd_stride = dgd->uv_stride;
+    ctxt->src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer;
+    ctxt->dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer;
+    assert(ctxt->plane_width == dgd->uv_crop_width);
+    assert(ctxt->plane_height == dgd->uv_crop_height);
   }
-  const int ntiles =
-      av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize,
-                          &tile_width, &tile_height, &nhtiles, &nvtiles);
-  SgrprojInfo ref_sgrproj_info;
-  set_default_sgrproj(&ref_sgrproj_info);
 
-  rsi[plane].frame_restoration_type = RESTORE_SGRPROJ;
+  return av1_get_rest_ntiles(ctxt->plane_width, ctxt->plane_height,
+                             cm->rst_info[plane].restoration_tilesize, NULL,
+                             NULL, &ctxt->nrtiles_x, &ctxt->nrtiles_y);
+}
 
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
+typedef void (*rtile_visitor_t)(const struct rest_search_ctxt *search_ctxt,
+                                int rtile_idx,
+                                const RestorationTileLimits *limits, void *arg);
+
+static void foreach_rtile_in_tile(const struct rest_search_ctxt *ctxt,
+                                  int tile_row, int tile_col,
+                                  rtile_visitor_t fun, void *arg) {
+  const AV1_COMMON *const cm = &ctxt->cpi->common;
+  const RestorationInfo *rsi = ctxt->cpi->rst_search;
+  TileInfo tile_info;
+
+  av1_tile_set_row(&tile_info, cm, tile_row);
+  av1_tile_set_col(&tile_info, cm, tile_col);
+
+  int tile_col_start = tile_info.mi_col_start * MI_SIZE;
+  int tile_col_end = tile_info.mi_col_end * MI_SIZE;
+  int tile_row_start = tile_info.mi_row_start * MI_SIZE;
+  int tile_row_end = tile_info.mi_row_end * MI_SIZE;
+  if (ctxt->plane > 0) {
+    tile_col_start = ROUND_POWER_OF_TWO(tile_col_start, cm->subsampling_x);
+    tile_col_end = ROUND_POWER_OF_TWO(tile_col_end, cm->subsampling_x);
+    tile_row_start = ROUND_POWER_OF_TWO(tile_row_start, cm->subsampling_y);
+    tile_row_end = ROUND_POWER_OF_TWO(tile_row_end, cm->subsampling_y);
   }
-  // Compute best Sgrproj filters for each tile
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
-                             tile_height, width, height, 0, 0, &h_start, &h_end,
-                             &v_start, &v_end);
-    err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
-                               h_end - h_start, v_start, v_end - v_start,
-                               (1 << plane));
-    // #bits when a tile is not restored
-    bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0);
-    cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
-    best_tile_cost[tile_idx] = DBL_MAX;
-    search_selfguided_restoration(
-        dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start,
-        v_end - v_start, dgd_stride,
-        src_buffer + v_start * src_stride + h_start, src_stride,
+
+#if CONFIG_FRAME_SUPERRES
+  // If upscaling is enabled, the tile limits need scaling to match the
+  // upscaled frame where the restoration tiles live. To do this, scale up the
+  // top-left and bottom-right of the tile.
+  if (!av1_superres_unscaled(cm)) {
+    av1_calculate_unscaled_superres_size(&tile_col_start, &tile_row_start,
+                                         cm->superres_scale_denominator);
+    av1_calculate_unscaled_superres_size(&tile_col_end, &tile_row_end,
+                                         cm->superres_scale_denominator);
+    // Make sure we don't fall off the bottom-right of the frame.
+    tile_col_end = AOMMIN(tile_col_end, ctxt->plane_width);
+    tile_row_end = AOMMIN(tile_row_end, ctxt->plane_height);
+  }
+#endif  // CONFIG_FRAME_SUPERRES
+
+  const int rtile_size = rsi->restoration_tilesize;
+  const int rtile_col0 = (tile_col_start + rtile_size - 1) / rtile_size;
+  const int rtile_col1 =
+      AOMMIN((tile_col_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_x);
+  const int rtile_row0 = (tile_row_start + rtile_size - 1) / rtile_size;
+  const int rtile_row1 =
+      AOMMIN((tile_row_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_y);
+
+  const int rtile_width = AOMMIN(tile_col_end - tile_col_start, rtile_size);
+  const int rtile_height = AOMMIN(tile_row_end - tile_row_start, rtile_size);
+
+  for (int rtile_row = rtile_row0; rtile_row < rtile_row1; ++rtile_row) {
+    for (int rtile_col = rtile_col0; rtile_col < rtile_col1; ++rtile_col) {
+      const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col;
+      RestorationTileLimits limits = av1_get_rest_tile_limits(
+          rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_width,
+          rtile_height, ctxt->plane_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+          ctxt->plane_height, ctxt->plane > 0 ? cm->subsampling_y : 0);
+#else
+          ctxt->plane_height);
+#endif
+      fun(ctxt, rtile_idx, &limits, arg);
+    }
+  }
+}
+
+static void search_sgrproj_for_rtile(const struct rest_search_ctxt *ctxt,
+                                     int rtile_idx,
+                                     const RestorationTileLimits *limits,
+                                     void *arg) {
+  const MACROBLOCK *const x = &ctxt->cpi->td.mb;
+  const AV1_COMMON *const cm = &ctxt->cpi->common;
+  RestorationInfo *rsi = ctxt->cpi->rst_search;
+  SgrprojInfo *sgrproj_info = ctxt->info->sgrproj_info;
+
+  SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg;
+
+  int64_t err =
+      sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
+                           limits->h_end - limits->h_start, limits->v_start,
+                           limits->v_end - limits->v_start, (1 << ctxt->plane));
+  // #bits when a tile is not restored
+  int bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0);
+  double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
+  ctxt->best_tile_cost[rtile_idx] = INT64_MAX;
+
+  RestorationInfo *plane_rsi = &rsi[ctxt->plane];
+  SgrprojInfo *rtile_sgrproj_info = &plane_rsi->sgrproj_info[rtile_idx];
+  uint8_t *dgd_start =
+      ctxt->dgd_buffer + limits->v_start * ctxt->dgd_stride + limits->h_start;
+  const uint8_t *src_start =
+      ctxt->src_buffer + limits->v_start * ctxt->src_stride + limits->h_start;
+
+  search_selfguided_restoration(
+      dgd_start, limits->h_end - limits->h_start,
+      limits->v_end - limits->v_start, ctxt->dgd_stride, src_start,
+      ctxt->src_stride,
 #if CONFIG_HIGHBITDEPTH
-        cm->bit_depth,
+      cm->use_highbitdepth, cm->bit_depth,
 #else
-        8,
+      0, 8,
 #endif  // CONFIG_HIGHBITDEPTH
-        &rsi[plane].sgrproj_info[tile_idx].ep,
-        rsi[plane].sgrproj_info[tile_idx].xqd, cm->rst_internal.tmpbuf);
-    rsi[plane].restoration_type[tile_idx] = RESTORE_SGRPROJ;
-    err = try_restoration_tile(src, cpi, rsi, (1 << plane), partial_frame,
-                               tile_idx, 0, 0, dst_frame);
-    bits = count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx],
-                              &ref_sgrproj_info)
-           << AV1_PROB_COST_SHIFT;
-    bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1);
-    cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
-    if (cost_sgrproj >= cost_norestore) {
-      type[tile_idx] = RESTORE_NONE;
-    } else {
-      type[tile_idx] = RESTORE_SGRPROJ;
-      memcpy(&sgrproj_info[tile_idx], &rsi[plane].sgrproj_info[tile_idx],
-             sizeof(sgrproj_info[tile_idx]));
-      memcpy(&ref_sgrproj_info, &sgrproj_info[tile_idx],
-             sizeof(ref_sgrproj_info));
-      best_tile_cost[tile_idx] = err;
+      rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height,
+      &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd,
+      cm->rst_internal.tmpbuf);
+  plane_rsi->restoration_type[rtile_idx] = RESTORE_SGRPROJ;
+  err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane),
+                             ctxt->partial_frame, rtile_idx, ctxt->dst_frame);
+  bits =
+      count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx], ref_sgrproj_info)
+      << AV1_PROB_COST_SHIFT;
+  bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1);
+  double cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
+  if (cost_sgrproj >= cost_norestore) {
+    ctxt->type[rtile_idx] = RESTORE_NONE;
+  } else {
+    ctxt->type[rtile_idx] = RESTORE_SGRPROJ;
+    *ref_sgrproj_info = sgrproj_info[rtile_idx] =
+        plane_rsi->sgrproj_info[rtile_idx];
+    ctxt->best_tile_cost[rtile_idx] = err;
+  }
+  plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
+}
+
+static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
+                             int partial_frame, int plane,
+                             RestorationInfo *info, RestorationType *type,
+                             int64_t *best_tile_cost,
+                             YV12_BUFFER_CONFIG *dst_frame) {
+  struct rest_search_ctxt ctxt;
+  const int nrtiles =
+      init_rest_search_ctxt(src, cpi, partial_frame, plane, info, type,
+                            best_tile_cost, dst_frame, &ctxt);
+
+  RestorationInfo *plane_rsi = &cpi->rst_search[plane];
+  plane_rsi->frame_restoration_type = RESTORE_SGRPROJ;
+  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
+    plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
+  }
+
+  // Compute best Sgrproj filters for each rtile, one (encoder/decoder)
+  // tile at a time.
+  const AV1_COMMON *const cm = &cpi->common;
+#if CONFIG_HIGHBITDEPTH
+  if (cm->use_highbitdepth)
+    extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width,
+                        ctxt.plane_height, ctxt.dgd_stride, SGRPROJ_BORDER_HORZ,
+                        SGRPROJ_BORDER_VERT);
+  else
+#endif
+    extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height,
+                 ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT);
+
+  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
+    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
+      SgrprojInfo ref_sgrproj_info;
+      set_default_sgrproj(&ref_sgrproj_info);
+      foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_sgrproj_for_rtile,
+                            &ref_sgrproj_info);
     }
-    rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
   }
+
   // Cost for Sgrproj filtering
+  SgrprojInfo ref_sgrproj_info;
   set_default_sgrproj(&ref_sgrproj_info);
-  bits = frame_level_restore_bits[rsi[plane].frame_restoration_type]
-         << AV1_PROB_COST_SHIFT;
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    bits +=
-        av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, type[tile_idx] != RESTORE_NONE);
-    memcpy(&rsi[plane].sgrproj_info[tile_idx], &sgrproj_info[tile_idx],
-           sizeof(sgrproj_info[tile_idx]));
-    if (type[tile_idx] == RESTORE_SGRPROJ) {
-      bits += count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx],
+  SgrprojInfo *sgrproj_info = info->sgrproj_info;
+
+  int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type]
+             << AV1_PROB_COST_SHIFT;
+  for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) {
+    bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB,
+                         type[rtile_idx] != RESTORE_NONE);
+    plane_rsi->sgrproj_info[rtile_idx] = sgrproj_info[rtile_idx];
+    if (type[rtile_idx] == RESTORE_SGRPROJ) {
+      bits += count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx],
                                  &ref_sgrproj_info)
               << AV1_PROB_COST_SHIFT;
-      memcpy(&ref_sgrproj_info, &rsi[plane].sgrproj_info[tile_idx],
-             sizeof(ref_sgrproj_info));
+      ref_sgrproj_info = plane_rsi->sgrproj_info[rtile_idx];
     }
-    rsi[plane].restoration_type[tile_idx] = type[tile_idx];
+    plane_rsi->restoration_type[rtile_idx] = type[rtile_idx];
   }
-  err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame,
-                              dst_frame);
-  cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err);
-
+  int64_t err = try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane),
+                                      partial_frame, dst_frame);
+  double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
   return cost_sgrproj;
 }
 
-static double find_average(uint8_t *src, int h_start, int h_end, int v_start,
-                           int v_end, int stride) {
+static double find_average(const uint8_t *src, int h_start, int h_end,
+                           int v_start, int v_end, int stride) {
   uint64_t sum = 0;
   double avg = 0;
   int i, j;
@@ -548,47 +715,51 @@ static double find_average(uint8_t *src, int h_start, int h_end, int v_start,
   return avg;
 }
 
-static void compute_stats(uint8_t *dgd, uint8_t *src, int h_start, int h_end,
+static void compute_stats(int wiener_win, const uint8_t *dgd,
+                          const uint8_t *src, int h_start, int h_end,
                           int v_start, int v_end, int dgd_stride,
                           int src_stride, double *M, double *H) {
   int i, j, k, l;
   double Y[WIENER_WIN2];
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin = (wiener_win >> 1);
   const double avg =
       find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
 
-  memset(M, 0, sizeof(*M) * WIENER_WIN2);
-  memset(H, 0, sizeof(*H) * WIENER_WIN2 * WIENER_WIN2);
+  memset(M, 0, sizeof(*M) * wiener_win2);
+  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
   for (i = v_start; i < v_end; i++) {
     for (j = h_start; j < h_end; j++) {
       const double X = (double)src[i * src_stride + j] - avg;
       int idx = 0;
-      for (k = -WIENER_HALFWIN; k <= WIENER_HALFWIN; k++) {
-        for (l = -WIENER_HALFWIN; l <= WIENER_HALFWIN; l++) {
+      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
+        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
           Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
           idx++;
         }
       }
-      for (k = 0; k < WIENER_WIN2; ++k) {
+      assert(idx == wiener_win2);
+      for (k = 0; k < wiener_win2; ++k) {
         M[k] += Y[k] * X;
-        H[k * WIENER_WIN2 + k] += Y[k] * Y[k];
-        for (l = k + 1; l < WIENER_WIN2; ++l) {
+        H[k * wiener_win2 + k] += Y[k] * Y[k];
+        for (l = k + 1; l < wiener_win2; ++l) {
           // H is a symmetric matrix, so we only need to fill out the upper
           // triangle here. We can copy it down to the lower triangle outside
           // the (i, j) loops.
-          H[k * WIENER_WIN2 + l] += Y[k] * Y[l];
+          H[k * wiener_win2 + l] += Y[k] * Y[l];
         }
       }
     }
   }
-  for (k = 0; k < WIENER_WIN2; ++k) {
-    for (l = k + 1; l < WIENER_WIN2; ++l) {
-      H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l];
+  for (k = 0; k < wiener_win2; ++k) {
+    for (l = k + 1; l < wiener_win2; ++l) {
+      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
     }
   }
 }
 
 #if CONFIG_HIGHBITDEPTH
-static double find_average_highbd(uint16_t *src, int h_start, int h_end,
+static double find_average_highbd(const uint16_t *src, int h_start, int h_end,
                                   int v_start, int v_end, int stride) {
   uint64_t sum = 0;
   double avg = 0;
@@ -600,168 +771,184 @@ static double find_average_highbd(uint16_t *src, int h_start, int h_end,
   return avg;
 }
 
-static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start,
-                                 int h_end, int v_start, int v_end,
-                                 int dgd_stride, int src_stride, double *M,
-                                 double *H) {
+static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
+                                 const uint8_t *src8, int h_start, int h_end,
+                                 int v_start, int v_end, int dgd_stride,
+                                 int src_stride, double *M, double *H) {
   int i, j, k, l;
   double Y[WIENER_WIN2];
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin = (wiener_win >> 1);
+  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
   const double avg =
       find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
 
-  memset(M, 0, sizeof(*M) * WIENER_WIN2);
-  memset(H, 0, sizeof(*H) * WIENER_WIN2 * WIENER_WIN2);
+  memset(M, 0, sizeof(*M) * wiener_win2);
+  memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2);
   for (i = v_start; i < v_end; i++) {
     for (j = h_start; j < h_end; j++) {
       const double X = (double)src[i * src_stride + j] - avg;
       int idx = 0;
-      for (k = -WIENER_HALFWIN; k <= WIENER_HALFWIN; k++) {
-        for (l = -WIENER_HALFWIN; l <= WIENER_HALFWIN; l++) {
+      for (k = -wiener_halfwin; k <= wiener_halfwin; k++) {
+        for (l = -wiener_halfwin; l <= wiener_halfwin; l++) {
           Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
           idx++;
         }
       }
-      for (k = 0; k < WIENER_WIN2; ++k) {
+      assert(idx == wiener_win2);
+      for (k = 0; k < wiener_win2; ++k) {
         M[k] += Y[k] * X;
-        H[k * WIENER_WIN2 + k] += Y[k] * Y[k];
-        for (l = k + 1; l < WIENER_WIN2; ++l) {
+        H[k * wiener_win2 + k] += Y[k] * Y[k];
+        for (l = k + 1; l < wiener_win2; ++l) {
           // H is a symmetric matrix, so we only need to fill out the upper
           // triangle here. We can copy it down to the lower triangle outside
           // the (i, j) loops.
-          H[k * WIENER_WIN2 + l] += Y[k] * Y[l];
+          H[k * wiener_win2 + l] += Y[k] * Y[l];
         }
       }
     }
   }
-  for (k = 0; k < WIENER_WIN2; ++k) {
-    for (l = k + 1; l < WIENER_WIN2; ++l) {
-      H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l];
+  for (k = 0; k < wiener_win2; ++k) {
+    for (l = k + 1; l < wiener_win2; ++l) {
+      H[l * wiener_win2 + k] = H[k * wiener_win2 + l];
     }
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-static INLINE int wrap_index(int i) {
-  return (i >= WIENER_HALFWIN1 ? WIENER_WIN - 1 - i : i);
+static INLINE int wrap_index(int i, int wiener_win) {
+  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
+  return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
 }
 
 // Fix vector b, update vector a
-static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) {
+static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc,
+                             double *a, double *b) {
   int i, j;
   double S[WIENER_WIN];
   double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
-  int w, w2;
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
   memset(A, 0, sizeof(A));
   memset(B, 0, sizeof(B));
-  for (i = 0; i < WIENER_WIN; i++) {
-    for (j = 0; j < WIENER_WIN; ++j) {
-      const int jj = wrap_index(j);
+  for (i = 0; i < wiener_win; i++) {
+    for (j = 0; j < wiener_win; ++j) {
+      const int jj = wrap_index(j, wiener_win);
       A[jj] += Mc[i][j] * b[i];
     }
   }
-  for (i = 0; i < WIENER_WIN; i++) {
-    for (j = 0; j < WIENER_WIN; j++) {
+  for (i = 0; i < wiener_win; i++) {
+    for (j = 0; j < wiener_win; j++) {
       int k, l;
-      for (k = 0; k < WIENER_WIN; ++k)
-        for (l = 0; l < WIENER_WIN; ++l) {
-          const int kk = wrap_index(k);
-          const int ll = wrap_index(l);
-          B[ll * WIENER_HALFWIN1 + kk] +=
-              Hc[j * WIENER_WIN + i][k * WIENER_WIN2 + l] * b[i] * b[j];
+      for (k = 0; k < wiener_win; ++k)
+        for (l = 0; l < wiener_win; ++l) {
+          const int kk = wrap_index(k, wiener_win);
+          const int ll = wrap_index(l, wiener_win);
+          B[ll * wiener_halfwin1 + kk] +=
+              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j];
         }
     }
   }
   // Normalization enforcement in the system of equations itself
-  w = WIENER_WIN;
-  w2 = (w >> 1) + 1;
-  for (i = 0; i < w2 - 1; ++i)
+  for (i = 0; i < wiener_halfwin1 - 1; ++i)
     A[i] -=
-        A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
-  for (i = 0; i < w2 - 1; ++i)
-    for (j = 0; j < w2 - 1; ++j)
-      B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
-                            2 * B[(w2 - 1) * w2 + (w2 - 1)]);
-  if (linsolve(w2 - 1, B, w2, A, S)) {
-    S[w2 - 1] = 1.0;
-    for (i = w2; i < w; ++i) {
-      S[i] = S[w - 1 - i];
-      S[w2 - 1] -= 2 * S[i];
+        A[wiener_halfwin1 - 1] * 2 +
+        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
+        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
+  for (i = 0; i < wiener_halfwin1 - 1; ++i)
+    for (j = 0; j < wiener_halfwin1 - 1; ++j)
+      B[i * wiener_halfwin1 + j] -=
+          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
+               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
+               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
+                     (wiener_halfwin1 - 1)]);
+  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
+    S[wiener_halfwin1 - 1] = 1.0;
+    for (i = wiener_halfwin1; i < wiener_win; ++i) {
+      S[i] = S[wiener_win - 1 - i];
+      S[wiener_halfwin1 - 1] -= 2 * S[i];
     }
-    memcpy(a, S, w * sizeof(*a));
+    memcpy(a, S, wiener_win * sizeof(*a));
   }
 }
 
 // Fix vector a, update vector b
-static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
+static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc,
+                             double *a, double *b) {
   int i, j;
   double S[WIENER_WIN];
   double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
-  int w, w2;
+  const int wiener_win2 = wiener_win * wiener_win;
+  const int wiener_halfwin1 = (wiener_win >> 1) + 1;
   memset(A, 0, sizeof(A));
   memset(B, 0, sizeof(B));
-  for (i = 0; i < WIENER_WIN; i++) {
-    const int ii = wrap_index(i);
-    for (j = 0; j < WIENER_WIN; j++) A[ii] += Mc[i][j] * a[j];
+  for (i = 0; i < wiener_win; i++) {
+    const int ii = wrap_index(i, wiener_win);
+    for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j];
   }
 
-  for (i = 0; i < WIENER_WIN; i++) {
-    for (j = 0; j < WIENER_WIN; j++) {
-      const int ii = wrap_index(i);
-      const int jj = wrap_index(j);
+  for (i = 0; i < wiener_win; i++) {
+    for (j = 0; j < wiener_win; j++) {
+      const int ii = wrap_index(i, wiener_win);
+      const int jj = wrap_index(j, wiener_win);
       int k, l;
-      for (k = 0; k < WIENER_WIN; ++k)
-        for (l = 0; l < WIENER_WIN; ++l)
-          B[jj * WIENER_HALFWIN1 + ii] +=
-              Hc[i * WIENER_WIN + j][k * WIENER_WIN2 + l] * a[k] * a[l];
+      for (k = 0; k < wiener_win; ++k)
+        for (l = 0; l < wiener_win; ++l)
+          B[jj * wiener_halfwin1 + ii] +=
+              Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l];
     }
   }
   // Normalization enforcement in the system of equations itself
-  w = WIENER_WIN;
-  w2 = WIENER_HALFWIN1;
-  for (i = 0; i < w2 - 1; ++i)
+  for (i = 0; i < wiener_halfwin1 - 1; ++i)
     A[i] -=
-        A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
-  for (i = 0; i < w2 - 1; ++i)
-    for (j = 0; j < w2 - 1; ++j)
-      B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
-                            2 * B[(w2 - 1) * w2 + (w2 - 1)]);
-  if (linsolve(w2 - 1, B, w2, A, S)) {
-    S[w2 - 1] = 1.0;
-    for (i = w2; i < w; ++i) {
-      S[i] = S[w - 1 - i];
-      S[w2 - 1] -= 2 * S[i];
+        A[wiener_halfwin1 - 1] * 2 +
+        B[i * wiener_halfwin1 + wiener_halfwin1 - 1] -
+        2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)];
+  for (i = 0; i < wiener_halfwin1 - 1; ++i)
+    for (j = 0; j < wiener_halfwin1 - 1; ++j)
+      B[i * wiener_halfwin1 + j] -=
+          2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] +
+               B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] -
+               2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 +
+                     (wiener_halfwin1 - 1)]);
+  if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) {
+    S[wiener_halfwin1 - 1] = 1.0;
+    for (i = wiener_halfwin1; i < wiener_win; ++i) {
+      S[i] = S[wiener_win - 1 - i];
+      S[wiener_halfwin1 - 1] -= 2 * S[i];
     }
-    memcpy(b, S, w * sizeof(*b));
+    memcpy(b, S, wiener_win * sizeof(*b));
   }
 }
 
-static int wiener_decompose_sep_sym(double *M, double *H, double *a,
-                                    double *b) {
+static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H,
+                                    double *a, double *b) {
   static const int init_filt[WIENER_WIN] = {
     WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV,
     WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV,
     WIENER_FILT_TAP0_MIDV,
   };
-  int i, j, iter;
   double *Hc[WIENER_WIN2];
   double *Mc[WIENER_WIN];
-  for (i = 0; i < WIENER_WIN; i++) {
-    Mc[i] = M + i * WIENER_WIN;
-    for (j = 0; j < WIENER_WIN; j++) {
-      Hc[i * WIENER_WIN + j] =
-          H + i * WIENER_WIN * WIENER_WIN2 + j * WIENER_WIN;
-    }
+  int i, j, iter;
+  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
+  const int wiener_win2 = wiener_win * wiener_win;
+  for (i = 0; i < wiener_win; i++) {
+    a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP;
   }
-  for (i = 0; i < WIENER_WIN; i++) {
-    a[i] = b[i] = (double)init_filt[i] / WIENER_FILT_STEP;
+  for (i = 0; i < wiener_win; i++) {
+    Mc[i] = M + i * wiener_win;
+    for (j = 0; j < wiener_win; j++) {
+      Hc[i * wiener_win + j] =
+          H + i * wiener_win * wiener_win2 + j * wiener_win;
+    }
   }
 
   iter = 1;
   while (iter < NUM_WIENER_ITERS) {
-    update_a_sep_sym(Mc, Hc, a, b);
-    update_b_sep_sym(Mc, Hc, a, b);
+    update_a_sep_sym(wiener_win, Mc, Hc, a, b);
+    update_b_sep_sym(wiener_win, Mc, Hc, a, b);
     iter++;
   }
   return 1;
@@ -770,14 +957,16 @@ static int wiener_decompose_sep_sym(double *M, double *H, double *a,
 // Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares
 // against identity filters; Final score is defined as the difference between
 // the function values
-static double compute_score(double *M, double *H, InterpKernel vfilt,
-                            InterpKernel hfilt) {
+static double compute_score(int wiener_win, double *M, double *H,
+                            InterpKernel vfilt, InterpKernel hfilt) {
   double ab[WIENER_WIN * WIENER_WIN];
   int i, k, l;
   double P = 0, Q = 0;
   double iP = 0, iQ = 0;
   double Score, iScore;
   double a[WIENER_WIN], b[WIENER_WIN];
+  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
+  const int wiener_win2 = wiener_win * wiener_win;
 
   aom_clear_system_state();
 
@@ -788,32 +977,41 @@ static double compute_score(double *M, double *H, InterpKernel vfilt,
     a[WIENER_HALFWIN] -= 2 * a[i];
     b[WIENER_HALFWIN] -= 2 * b[i];
   }
-  for (k = 0; k < WIENER_WIN; ++k) {
-    for (l = 0; l < WIENER_WIN; ++l) ab[k * WIENER_WIN + l] = a[l] * b[k];
+  memset(ab, 0, sizeof(ab));
+  for (k = 0; k < wiener_win; ++k) {
+    for (l = 0; l < wiener_win; ++l)
+      ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off];
   }
-  for (k = 0; k < WIENER_WIN2; ++k) {
+  for (k = 0; k < wiener_win2; ++k) {
     P += ab[k] * M[k];
-    for (l = 0; l < WIENER_WIN2; ++l)
-      Q += ab[k] * H[k * WIENER_WIN2 + l] * ab[l];
+    for (l = 0; l < wiener_win2; ++l)
+      Q += ab[k] * H[k * wiener_win2 + l] * ab[l];
   }
   Score = Q - 2 * P;
 
-  iP = M[WIENER_WIN2 >> 1];
-  iQ = H[(WIENER_WIN2 >> 1) * WIENER_WIN2 + (WIENER_WIN2 >> 1)];
+  iP = M[wiener_win2 >> 1];
+  iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)];
   iScore = iQ - 2 * iP;
 
   return Score - iScore;
 }
 
-static void quantize_sym_filter(double *f, InterpKernel fi) {
+static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) {
   int i;
-  for (i = 0; i < WIENER_HALFWIN; ++i) {
+  const int wiener_halfwin = (wiener_win >> 1);
+  for (i = 0; i < wiener_halfwin; ++i) {
     fi[i] = RINT(f[i] * WIENER_FILT_STEP);
   }
   // Specialize for 7-tap filter
-  fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
-  fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
-  fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
+  if (wiener_win == WIENER_WIN) {
+    fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
+    fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
+    fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
+  } else {
+    fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
+    fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
+    fi[0] = 0;
+  }
   // Satisfy filter constraints
   fi[WIENER_WIN - 1] = fi[0];
   fi[WIENER_WIN - 2] = fi[1];
@@ -822,14 +1020,15 @@ static void quantize_sym_filter(double *f, InterpKernel fi) {
   fi[3] = -2 * (fi[0] + fi[1] + fi[2]);
 }
 
-static int count_wiener_bits(WienerInfo *wiener_info,
+static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info,
                              WienerInfo *ref_wiener_info) {
   int bits = 0;
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-      WIENER_FILT_TAP0_SUBEXP_K,
-      ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
-      wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
+  if (wiener_win == WIENER_WIN)
+    bits += aom_count_primitive_refsubexpfin(
+        WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+        WIENER_FILT_TAP0_SUBEXP_K,
+        ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
+        wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV);
   bits += aom_count_primitive_refsubexpfin(
       WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
       WIENER_FILT_TAP1_SUBEXP_K,
@@ -840,11 +1039,12 @@ static int count_wiener_bits(WienerInfo *wiener_info,
       WIENER_FILT_TAP2_SUBEXP_K,
       ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
       wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV);
-  bits += aom_count_primitive_refsubexpfin(
-      WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
-      WIENER_FILT_TAP0_SUBEXP_K,
-      ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
-      wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
+  if (wiener_win == WIENER_WIN)
+    bits += aom_count_primitive_refsubexpfin(
+        WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
+        WIENER_FILT_TAP0_SUBEXP_K,
+        ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
+        wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV);
   bits += aom_count_primitive_refsubexpfin(
       WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
       WIENER_FILT_TAP1_SUBEXP_K,
@@ -861,11 +1061,13 @@ static int count_wiener_bits(WienerInfo *wiener_info,
 #define USE_WIENER_REFINEMENT_SEARCH 1
 static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
                                         AV1_COMP *cpi, RestorationInfo *rsi,
-                                        int start_step, int plane, int tile_idx,
+                                        int start_step, int plane,
+                                        int wiener_win, int tile_idx,
                                         int partial_frame,
                                         YV12_BUFFER_CONFIG *dst_frame) {
+  const int plane_off = (WIENER_WIN - wiener_win) >> 1;
   int64_t err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
-                                     tile_idx, 0, 0, dst_frame);
+                                     tile_idx, dst_frame);
   (void)start_step;
 #if USE_WIENER_REFINEMENT_SEARCH
   int64_t err2;
@@ -875,7 +1077,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
                     WIENER_FILT_TAP2_MAXV };
   // printf("err  pre = %"PRId64"\n", err);
   for (int s = start_step; s >= 1; s >>= 1) {
-    for (int p = 0; p < WIENER_HALFWIN; ++p) {
+    for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
       int skip = 0;
       do {
         if (rsi[plane].wiener_info[tile_idx].hfilter[p] - s >= tap_min[p]) {
@@ -883,7 +1085,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
           rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s;
           rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s;
           err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
-                                      tile_idx, 0, 0, dst_frame);
+                                      tile_idx, dst_frame);
           if (err2 > err) {
             rsi[plane].wiener_info[tile_idx].hfilter[p] += s;
             rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s;
@@ -904,7 +1106,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
           rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s;
           rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s;
           err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
-                                      tile_idx, 0, 0, dst_frame);
+                                      tile_idx, dst_frame);
           if (err2 > err) {
             rsi[plane].wiener_info[tile_idx].hfilter[p] -= s;
             rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s;
@@ -918,7 +1120,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
         break;
       } while (1);
     }
-    for (int p = 0; p < WIENER_HALFWIN; ++p) {
+    for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
       int skip = 0;
       do {
         if (rsi[plane].wiener_info[tile_idx].vfilter[p] - s >= tap_min[p]) {
@@ -926,7 +1128,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
           rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s;
           rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s;
           err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
-                                      tile_idx, 0, 0, dst_frame);
+                                      tile_idx, dst_frame);
           if (err2 > err) {
             rsi[plane].wiener_info[tile_idx].vfilter[p] += s;
             rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s;
@@ -947,7 +1149,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
           rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s;
           rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s;
           err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame,
-                                      tile_idx, 0, 0, dst_frame);
+                                      tile_idx, dst_frame);
           if (err2 > err) {
             rsi[plane].wiener_info[tile_idx].vfilter[p] -= s;
             rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s;
@@ -967,154 +1169,157 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
   return err;
 }
 
-static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
-                            int partial_frame, int plane, RestorationInfo *info,
-                            RestorationType *type, double *best_tile_cost,
-                            YV12_BUFFER_CONFIG *dst_frame) {
-  WienerInfo *wiener_info = info->wiener_info;
-  AV1_COMMON *const cm = &cpi->common;
-  RestorationInfo *rsi = cpi->rst_search;
-  int64_t err;
-  int bits;
-  double cost_wiener, cost_norestore;
-  MACROBLOCK *x = &cpi->td.mb;
+static void search_wiener_for_rtile(const struct rest_search_ctxt *ctxt,
+                                    int rtile_idx,
+                                    const RestorationTileLimits *limits,
+                                    void *arg) {
+  const MACROBLOCK *const x = &ctxt->cpi->td.mb;
+  const AV1_COMMON *const cm = &ctxt->cpi->common;
+  RestorationInfo *rsi = ctxt->cpi->rst_search;
+
+  const int wiener_win =
+      (ctxt->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
+
   double M[WIENER_WIN2];
   double H[WIENER_WIN2 * WIENER_WIN2];
   double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
-  const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
-  int width, height, src_stride, dgd_stride;
-  uint8_t *dgd_buffer, *src_buffer;
-  if (plane == AOM_PLANE_Y) {
-    width = src->y_crop_width;
-    height = src->y_crop_height;
-    src_buffer = src->y_buffer;
-    src_stride = src->y_stride;
-    dgd_buffer = dgd->y_buffer;
-    dgd_stride = dgd->y_stride;
-    assert(width == dgd->y_crop_width);
-    assert(height == dgd->y_crop_height);
-    assert(width == src->y_crop_width);
-    assert(height == src->y_crop_height);
-  } else {
-    width = src->uv_crop_width;
-    height = src->uv_crop_height;
-    src_stride = src->uv_stride;
-    dgd_stride = dgd->uv_stride;
-    src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer;
-    dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer;
-    assert(width == dgd->uv_crop_width);
-    assert(height == dgd->uv_crop_height);
-  }
-  double score;
-  int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
-  int h_start, h_end, v_start, v_end;
-  const int ntiles = av1_get_rest_ntiles(
-      width, height, cm->rst_info[plane].restoration_tilesize, &tile_width,
-      &tile_height, &nhtiles, &nvtiles);
-  WienerInfo ref_wiener_info;
-  set_default_wiener(&ref_wiener_info);
 
-  rsi[plane].frame_restoration_type = RESTORE_WIENER;
+  WienerInfo *ref_wiener_info = (WienerInfo *)arg;
 
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
-  }
+  int64_t err =
+      sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
+                           limits->h_end - limits->h_start, limits->v_start,
+                           limits->v_end - limits->v_start, (1 << ctxt->plane));
+  // #bits when a tile is not restored
+  int bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
+  double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
+  ctxt->best_tile_cost[rtile_idx] = INT64_MAX;
 
-// Construct a (WIENER_HALFWIN)-pixel border around the frame
 #if CONFIG_HIGHBITDEPTH
   if (cm->use_highbitdepth)
-    extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd_buffer), width, height,
-                        dgd_stride);
+    compute_stats_highbd(wiener_win, ctxt->dgd_buffer, ctxt->src_buffer,
+                         limits->h_start, limits->h_end, limits->v_start,
+                         limits->v_end, ctxt->dgd_stride, ctxt->src_stride, M,
+                         H);
   else
-#endif
-    extend_frame(dgd_buffer, width, height, dgd_stride);
-
-  // Compute best Wiener filters for each tile
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
-                             tile_height, width, height, 0, 0, &h_start, &h_end,
-                             &v_start, &v_end);
-    err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
-                               h_end - h_start, v_start, v_end - v_start,
-                               (1 << plane));
-    // #bits when a tile is not restored
-    bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0);
-    cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
-    best_tile_cost[tile_idx] = DBL_MAX;
-
-    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
-                             tile_height, width, height, 0, 0, &h_start, &h_end,
-                             &v_start, &v_end);
-#if CONFIG_HIGHBITDEPTH
-    if (cm->use_highbitdepth)
-      compute_stats_highbd(dgd_buffer, src_buffer, h_start, h_end, v_start,
-                           v_end, dgd_stride, src_stride, M, H);
-    else
 #endif  // CONFIG_HIGHBITDEPTH
-      compute_stats(dgd_buffer, src_buffer, h_start, h_end, v_start, v_end,
-                    dgd_stride, src_stride, M, H);
+    compute_stats(wiener_win, ctxt->dgd_buffer, ctxt->src_buffer,
+                  limits->h_start, limits->h_end, limits->v_start,
+                  limits->v_end, ctxt->dgd_stride, ctxt->src_stride, M, H);
 
-    type[tile_idx] = RESTORE_WIENER;
+  ctxt->type[rtile_idx] = RESTORE_WIENER;
 
-    if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
-      type[tile_idx] = RESTORE_NONE;
-      continue;
-    }
-    quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter);
-    quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter);
-
-    // Filter score computes the value of the function x'*A*x - x'*b for the
-    // learned filter and compares it against identity filer. If there is no
-    // reduction in the function, the filter is reverted back to identity
-    score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter,
-                          rsi[plane].wiener_info[tile_idx].hfilter);
-    if (score > 0.0) {
-      type[tile_idx] = RESTORE_NONE;
-      continue;
-    }
-    aom_clear_system_state();
+  if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) {
+    ctxt->type[rtile_idx] = RESTORE_NONE;
+    return;
+  }
 
-    rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER;
-    err = finer_tile_search_wiener(src, cpi, rsi, 4, plane, tile_idx,
-                                   partial_frame, dst_frame);
-    bits =
-        count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
-        << AV1_PROB_COST_SHIFT;
-    bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
-    cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err);
-    if (cost_wiener >= cost_norestore) {
-      type[tile_idx] = RESTORE_NONE;
-    } else {
-      type[tile_idx] = RESTORE_WIENER;
-      memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx],
-             sizeof(wiener_info[tile_idx]));
-      memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
-             sizeof(ref_wiener_info));
-      best_tile_cost[tile_idx] = err;
+  RestorationInfo *plane_rsi = &rsi[ctxt->plane];
+  WienerInfo *rtile_wiener_info = &plane_rsi->wiener_info[rtile_idx];
+  quantize_sym_filter(wiener_win, vfilterd, rtile_wiener_info->vfilter);
+  quantize_sym_filter(wiener_win, hfilterd, rtile_wiener_info->hfilter);
+
+  // Filter score computes the value of the function x'*A*x - x'*b for the
+  // learned filter and compares it against identity filer. If there is no
+  // reduction in the function, the filter is reverted back to identity
+  double score = compute_score(wiener_win, M, H, rtile_wiener_info->vfilter,
+                               rtile_wiener_info->hfilter);
+  if (score > 0.0) {
+    ctxt->type[rtile_idx] = RESTORE_NONE;
+    return;
+  }
+  aom_clear_system_state();
+
+  plane_rsi->restoration_type[rtile_idx] = RESTORE_WIENER;
+  err = finer_tile_search_wiener(ctxt->src, ctxt->cpi, rsi, 4, ctxt->plane,
+                                 wiener_win, rtile_idx, ctxt->partial_frame,
+                                 ctxt->dst_frame);
+  if (wiener_win != WIENER_WIN) {
+    assert(rtile_wiener_info->vfilter[0] == 0 &&
+           rtile_wiener_info->vfilter[WIENER_WIN - 1] == 0);
+    assert(rtile_wiener_info->hfilter[0] == 0 &&
+           rtile_wiener_info->hfilter[WIENER_WIN - 1] == 0);
+  }
+  bits = count_wiener_bits(wiener_win, rtile_wiener_info, ref_wiener_info)
+         << AV1_PROB_COST_SHIFT;
+  bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1);
+  double cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err);
+  if (cost_wiener >= cost_norestore) {
+    ctxt->type[rtile_idx] = RESTORE_NONE;
+  } else {
+    ctxt->type[rtile_idx] = RESTORE_WIENER;
+    *ref_wiener_info = ctxt->info->wiener_info[rtile_idx] = *rtile_wiener_info;
+    ctxt->best_tile_cost[rtile_idx] = err;
+  }
+  plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE;
+}
+
+static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
+                            int partial_frame, int plane, RestorationInfo *info,
+                            RestorationType *type, int64_t *best_tile_cost,
+                            YV12_BUFFER_CONFIG *dst_frame) {
+  struct rest_search_ctxt ctxt;
+  const int nrtiles =
+      init_rest_search_ctxt(src, cpi, partial_frame, plane, info, type,
+                            best_tile_cost, dst_frame, &ctxt);
+
+  RestorationInfo *plane_rsi = &cpi->rst_search[plane];
+  plane_rsi->frame_restoration_type = RESTORE_WIENER;
+  for (int tile_idx = 0; tile_idx < nrtiles; ++tile_idx) {
+    plane_rsi->restoration_type[tile_idx] = RESTORE_NONE;
+  }
+
+  AV1_COMMON *const cm = &cpi->common;
+// Construct a (WIENER_HALFWIN)-pixel border around the frame
+// Note use this border to gather stats even though the actual filter
+// may use less border on the top/bottom of a processing unit.
+#if CONFIG_HIGHBITDEPTH
+  if (cm->use_highbitdepth)
+    extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width,
+                        ctxt.plane_height, ctxt.dgd_stride, WIENER_HALFWIN,
+                        WIENER_HALFWIN);
+  else
+#endif
+    extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height,
+                 ctxt.dgd_stride, WIENER_HALFWIN, WIENER_HALFWIN);
+
+  // Compute best Wiener filters for each rtile, one (encoder/decoder)
+  // tile at a time.
+  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
+    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
+      WienerInfo ref_wiener_info;
+      set_default_wiener(&ref_wiener_info);
+
+      foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_wiener_for_rtile,
+                            &ref_wiener_info);
     }
-    rsi[plane].restoration_type[tile_idx] = RESTORE_NONE;
   }
-  // Cost for Wiener filtering
+
+  // cost for Wiener filtering
+  WienerInfo ref_wiener_info;
   set_default_wiener(&ref_wiener_info);
-  bits = frame_level_restore_bits[rsi[plane].frame_restoration_type]
-         << AV1_PROB_COST_SHIFT;
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+  int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type]
+             << AV1_PROB_COST_SHIFT;
+  WienerInfo *wiener_info = info->wiener_info;
+  const int wiener_win =
+      (plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
+
+  for (int tile_idx = 0; tile_idx < nrtiles; ++tile_idx) {
     bits +=
         av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE);
-    memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx],
-           sizeof(wiener_info[tile_idx]));
+    plane_rsi->wiener_info[tile_idx] = wiener_info[tile_idx];
+
     if (type[tile_idx] == RESTORE_WIENER) {
-      bits +=
-          count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info)
-          << AV1_PROB_COST_SHIFT;
-      memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx],
-             sizeof(ref_wiener_info));
+      bits += count_wiener_bits(wiener_win, &plane_rsi->wiener_info[tile_idx],
+                                &ref_wiener_info)
+              << AV1_PROB_COST_SHIFT;
+      ref_wiener_info = plane_rsi->wiener_info[tile_idx];
     }
-    rsi[plane].restoration_type[tile_idx] = type[tile_idx];
+    plane_rsi->restoration_type[tile_idx] = type[tile_idx];
   }
-  err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame,
-                              dst_frame);
-  cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err);
+  int64_t err = try_restoration_frame(src, cpi, cpi->rst_search, 1 << plane,
+                                      partial_frame, dst_frame);
+  double cost_wiener = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
 
   return cost_wiener;
 }
@@ -1122,7 +1327,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
 static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
                                int partial_frame, int plane,
                                RestorationInfo *info, RestorationType *type,
-                               double *best_tile_cost,
+                               int64_t *best_tile_cost,
                                YV12_BUFFER_CONFIG *dst_frame) {
   int64_t err;
   double cost_norestore;
@@ -1130,7 +1335,6 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
   MACROBLOCK *x = &cpi->td.mb;
   AV1_COMMON *const cm = &cpi->common;
   int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
-  int h_start, h_end, v_start, v_end;
   int width, height;
   if (plane == AOM_PLANE_Y) {
     width = src->y_crop_width;
@@ -1148,12 +1352,16 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
 
   info->frame_restoration_type = RESTORE_NONE;
   for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width,
-                             tile_height, width, height, 0, 0, &h_start, &h_end,
-                             &v_start, &v_end);
-    err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start,
-                               h_end - h_start, v_start, v_end - v_start,
-                               1 << plane);
+    RestorationTileLimits limits = av1_get_rest_tile_limits(
+        tile_idx, nhtiles, nvtiles, tile_width, tile_height, width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+        height, plane != AOM_PLANE_Y ? cm->subsampling_y : 0);
+#else
+        height);
+#endif
+    err = sse_restoration_tile(src, cm->frame_to_show, cm, limits.h_start,
+                               limits.h_end - limits.h_start, limits.v_start,
+                               limits.v_end - limits.v_start, 1 << plane);
     type[tile_idx] = RESTORE_NONE;
     best_tile_cost[tile_idx] = err;
   }
@@ -1164,74 +1372,88 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
   return cost_norestore;
 }
 
+struct switchable_rest_search_ctxt {
+  SgrprojInfo sgrproj_info;
+  WienerInfo wiener_info;
+  RestorationType *const *restore_types;
+  int64_t *const *tile_cost;
+  double cost_switchable;
+};
+
+static void search_switchable_for_rtile(const struct rest_search_ctxt *ctxt,
+                                        int rtile_idx,
+                                        const RestorationTileLimits *limits,
+                                        void *arg) {
+  const MACROBLOCK *x = &ctxt->cpi->td.mb;
+  RestorationInfo *rsi = &ctxt->cpi->common.rst_info[ctxt->plane];
+  struct switchable_rest_search_ctxt *swctxt =
+      (struct switchable_rest_search_ctxt *)arg;
+
+  (void)limits;
+
+  double best_cost =
+      RDCOST_DBL(x->rdmult, (x->switchable_restore_cost[RESTORE_NONE] >> 4),
+                 swctxt->tile_cost[RESTORE_NONE][rtile_idx]);
+  rsi->restoration_type[rtile_idx] = RESTORE_NONE;
+  for (RestorationType r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) {
+    if (force_restore_type != RESTORE_TYPES)
+      if (r != force_restore_type) continue;
+    int tilebits = 0;
+    if (swctxt->restore_types[r][rtile_idx] != r) continue;
+    if (r == RESTORE_WIENER)
+      tilebits += count_wiener_bits(
+          (ctxt->plane == AOM_PLANE_Y ? WIENER_WIN : WIENER_WIN - 2),
+          &rsi->wiener_info[rtile_idx], &swctxt->wiener_info);
+    else if (r == RESTORE_SGRPROJ)
+      tilebits += count_sgrproj_bits(&rsi->sgrproj_info[rtile_idx],
+                                     &swctxt->sgrproj_info);
+    tilebits <<= AV1_PROB_COST_SHIFT;
+    tilebits += x->switchable_restore_cost[r];
+    double cost =
+        RDCOST_DBL(x->rdmult, tilebits >> 4, swctxt->tile_cost[r][rtile_idx]);
+
+    if (cost < best_cost) {
+      rsi->restoration_type[rtile_idx] = r;
+      best_cost = cost;
+    }
+  }
+  if (rsi->restoration_type[rtile_idx] == RESTORE_WIENER)
+    swctxt->wiener_info = rsi->wiener_info[rtile_idx];
+  else if (rsi->restoration_type[rtile_idx] == RESTORE_SGRPROJ)
+    swctxt->sgrproj_info = rsi->sgrproj_info[rtile_idx];
+  if (force_restore_type != RESTORE_TYPES)
+    assert(rsi->restoration_type[rtile_idx] == force_restore_type ||
+           rsi->restoration_type[rtile_idx] == RESTORE_NONE);
+  swctxt->cost_switchable += best_cost;
+}
+
 static double search_switchable_restoration(
     const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane,
     RestorationType *const restore_types[RESTORE_SWITCHABLE_TYPES],
-    double *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) {
-  AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCK *x = &cpi->td.mb;
-  double cost_switchable = 0;
-  int bits, tile_idx;
-  RestorationType r;
-  int width, height;
-  if (plane == AOM_PLANE_Y) {
-    width = src->y_crop_width;
-    height = src->y_crop_height;
-  } else {
-    width = src->uv_crop_width;
-    height = src->uv_crop_height;
-  }
-  const int ntiles = av1_get_rest_ntiles(
-      width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL,
-      NULL);
-  SgrprojInfo ref_sgrproj_info;
-  set_default_sgrproj(&ref_sgrproj_info);
-  WienerInfo ref_wiener_info;
-  set_default_wiener(&ref_wiener_info);
-  (void)partial_frame;
+    int64_t *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) {
+  const AV1_COMMON *const cm = &cpi->common;
+  struct rest_search_ctxt ctxt;
+  init_rest_search_ctxt(src, cpi, partial_frame, plane, NULL, NULL, NULL, NULL,
+                        &ctxt);
+  struct switchable_rest_search_ctxt swctxt;
+  swctxt.restore_types = restore_types;
+  swctxt.tile_cost = tile_cost;
 
   rsi->frame_restoration_type = RESTORE_SWITCHABLE;
-  bits = frame_level_restore_bits[rsi->frame_restoration_type]
-         << AV1_PROB_COST_SHIFT;
-  cost_switchable = RDCOST_DBL(x->rdmult, bits >> 4, 0);
-  for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
-    double best_cost =
-        RDCOST_DBL(x->rdmult, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4),
-                   tile_cost[RESTORE_NONE][tile_idx]);
-    rsi->restoration_type[tile_idx] = RESTORE_NONE;
-    for (r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) {
-      if (force_restore_type != 0)
-        if (r != force_restore_type) continue;
-      int tilebits = 0;
-      if (restore_types[r][tile_idx] != r) continue;
-      if (r == RESTORE_WIENER)
-        tilebits +=
-            count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info);
-      else if (r == RESTORE_SGRPROJ)
-        tilebits +=
-            count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info);
-      tilebits <<= AV1_PROB_COST_SHIFT;
-      tilebits += cpi->switchable_restore_cost[r];
-      double cost =
-          RDCOST_DBL(x->rdmult, tilebits >> 4, tile_cost[r][tile_idx]);
-
-      if (cost < best_cost) {
-        rsi->restoration_type[tile_idx] = r;
-        best_cost = cost;
-      }
+  int bits = frame_level_restore_bits[rsi->frame_restoration_type]
+             << AV1_PROB_COST_SHIFT;
+  swctxt.cost_switchable = RDCOST_DBL(cpi->td.mb.rdmult, bits >> 4, 0);
+
+  for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
+    for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
+      set_default_sgrproj(&swctxt.sgrproj_info);
+      set_default_wiener(&swctxt.wiener_info);
+      foreach_rtile_in_tile(&ctxt, tile_row, tile_col,
+                            search_switchable_for_rtile, &swctxt);
     }
-    if (rsi->restoration_type[tile_idx] == RESTORE_WIENER)
-      memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx],
-             sizeof(ref_wiener_info));
-    else if (rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ)
-      memcpy(&ref_sgrproj_info, &rsi->sgrproj_info[tile_idx],
-             sizeof(ref_sgrproj_info));
-    if (force_restore_type != 0)
-      assert(rsi->restoration_type[tile_idx] == force_restore_type ||
-             rsi->restoration_type[tile_idx] == RESTORE_NONE);
-    cost_switchable += best_cost;
   }
-  return cost_switchable;
+
+  return swctxt.cost_switchable;
 }
 
 void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
@@ -1241,7 +1463,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
   };
   AV1_COMMON *const cm = &cpi->common;
   double cost_restore[RESTORE_TYPES];
-  double *tile_cost[RESTORE_SWITCHABLE_TYPES];
+  int64_t *tile_cost[RESTORE_SWITCHABLE_TYPES];
   RestorationType *restore_types[RESTORE_SWITCHABLE_TYPES];
   double best_cost_restore;
   RestorationType r, best_restore;
@@ -1259,7 +1481,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
 
   // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work.
   for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) {
-    tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y);
+    tile_cost[r] = (int64_t *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y);
     restore_types[r] =
         (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles_y);
   }
@@ -1267,7 +1489,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
   for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) {
     for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) {
       cost_restore[r] = DBL_MAX;
-      if (force_restore_type != 0)
+      if (force_restore_type != RESTORE_TYPES)
         if (r != RESTORE_NONE && r != force_restore_type) continue;
       cost_restore[r] =
           search_restore_fun[r](src, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -1283,7 +1505,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
     best_cost_restore = DBL_MAX;
     best_restore = 0;
     for (r = 0; r < RESTORE_TYPES; ++r) {
-      if (force_restore_type != 0)
+      if (force_restore_type != RESTORE_TYPES)
         if (r != RESTORE_NONE && r != force_restore_type) continue;
       if (cost_restore[r] < best_cost_restore) {
         best_restore = r;
@@ -1291,7 +1513,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
       }
     }
     cm->rst_info[plane].frame_restoration_type = best_restore;
-    if (force_restore_type != 0)
+    if (force_restore_type != RESTORE_TYPES)
       assert(best_restore == force_restore_type ||
              best_restore == RESTORE_NONE);
     if (best_restore != RESTORE_SWITCHABLE) {
diff --git a/third_party/aom/av1/encoder/random.h b/third_party/aom/av1/encoder/random.h
new file mode 100644
index 000000000..9b2dac965
--- /dev/null
+++ b/third_party/aom/av1/encoder/random.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_RANDOM_H_
+#define AV1_ENCODER_RANDOM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Generate a random number in the range [0, 32768).
+static INLINE unsigned int lcg_rand16(unsigned int *state) {
+  *state = (unsigned int)(*state * 1103515245ULL + 12345);
+  return *state / 65536 % 32768;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AV1_ENCODER_RANDOM_H_
diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c
index c6e3675be..6d2eb4183 100644
--- a/third_party/aom/av1/encoder/ransac.c
+++ b/third_party/aom/av1/encoder/ransac.c
@@ -17,6 +17,7 @@
 
 #include "av1/encoder/ransac.h"
 #include "av1/encoder/mathutils.h"
+#include "av1/encoder/random.h"
 
 #define MAX_MINPTS 4
 #define MAX_DEGENERATE_ITER 10
@@ -587,12 +588,6 @@ static int find_homography(int np, double *pts1, double *pts2, double *mat) {
   return 0;
 }
 
-// Generate a random number in the range [0, 32768).
-static unsigned int lcg_rand16(unsigned int *state) {
-  *state = (unsigned int)(*state * 1103515245ULL + 12345);
-  return *state / 65536 % 32768;
-}
-
 static int get_rand_indices(int npoints, int minpts, int *indices,
                             unsigned int *seed) {
   int i, j;
diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c
index b546fdffa..a90cb880e 100644
--- a/third_party/aom/av1/encoder/ratectrl.c
+++ b/third_party/aom/av1/encoder/ratectrl.c
@@ -29,6 +29,7 @@
 #include "av1/common/seg_common.h"
 
 #include "av1/encoder/encodemv.h"
+#include "av1/encoder/random.h"
 #include "av1/encoder/ratectrl.h"
 
 // Max rate target for 1080P and below encodes under normal circumstances
@@ -93,9 +94,11 @@ static int gf_low = 400;
 static int kf_high = 5000;
 static int kf_low = 400;
 
-double av1_resize_rate_factor(const AV1_COMP *cpi) {
-  return (double)(cpi->oxcf.width * cpi->oxcf.height) /
-         (cpi->common.width * cpi->common.height);
+// How many times less pixels there are to encode given the current scaling.
+// Temporary replacement for rcf_mult and rate_thresh_mult.
+static double resize_rate_factor(const AV1_COMP *cpi, int width, int height) {
+  (void)cpi;
+  return (double)(cpi->oxcf.width * cpi->oxcf.height) / (width * height);
 }
 
 // Functions to compute the active minq lookup table entries based on a
@@ -371,7 +374,8 @@ int av1_rc_drop_frame(AV1_COMP *cpi) {
   }
 }
 
-static double get_rate_correction_factor(const AV1_COMP *cpi) {
+static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
+                                         int height) {
   const RATE_CONTROL *const rc = &cpi->rc;
   double rcf;
 
@@ -389,15 +393,16 @@ static double get_rate_correction_factor(const AV1_COMP *cpi) {
     else
       rcf = rc->rate_correction_factors[INTER_NORMAL];
   }
-  rcf *= av1_resize_rate_factor(cpi);
+  rcf *= resize_rate_factor(cpi, width, height);
   return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
 }
 
-static void set_rate_correction_factor(AV1_COMP *cpi, double factor) {
+static void set_rate_correction_factor(AV1_COMP *cpi, double factor, int width,
+                                       int height) {
   RATE_CONTROL *const rc = &cpi->rc;
 
   // Normalize RCF to account for the size-dependent scaling factor.
-  factor /= av1_resize_rate_factor(cpi);
+  factor /= resize_rate_factor(cpi, width, height);
 
   factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
 
@@ -417,11 +422,14 @@ static void set_rate_correction_factor(AV1_COMP *cpi, double factor) {
   }
 }
 
-void av1_rc_update_rate_correction_factors(AV1_COMP *cpi) {
+void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int width,
+                                           int height) {
   const AV1_COMMON *const cm = &cpi->common;
   int correction_factor = 100;
-  double rate_correction_factor = get_rate_correction_factor(cpi);
+  double rate_correction_factor =
+      get_rate_correction_factor(cpi, width, height);
   double adjustment_limit;
+  const int MBs = av1_get_MBs(width, height);
 
   int projected_size_based_on_q = 0;
 
@@ -439,7 +447,7 @@ void av1_rc_update_rate_correction_factors(AV1_COMP *cpi) {
         av1_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
   } else {
     projected_size_based_on_q =
-        av1_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, cm->MBs,
+        av1_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, MBs,
                                rate_correction_factor, cm->bit_depth);
   }
   // Work out a size correction factor.
@@ -485,21 +493,24 @@ void av1_rc_update_rate_correction_factors(AV1_COMP *cpi) {
       rate_correction_factor = MIN_BPB_FACTOR;
   }
 
-  set_rate_correction_factor(cpi, rate_correction_factor);
+  set_rate_correction_factor(cpi, rate_correction_factor, width, height);
 }
 
 int av1_rc_regulate_q(const AV1_COMP *cpi, int target_bits_per_frame,
-                      int active_best_quality, int active_worst_quality) {
+                      int active_best_quality, int active_worst_quality,
+                      int width, int height) {
   const AV1_COMMON *const cm = &cpi->common;
   int q = active_worst_quality;
   int last_error = INT_MAX;
   int i, target_bits_per_mb, bits_per_mb_at_this_q;
-  const double correction_factor = get_rate_correction_factor(cpi);
+  const int MBs = av1_get_MBs(width, height);
+  const double correction_factor =
+      get_rate_correction_factor(cpi, width, height);
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
   target_bits_per_mb =
-      (int)((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+      (int)((uint64_t)(target_bits_per_frame) << BPER_MB_NORMBITS) / MBs;
 
   i = active_best_quality;
 
@@ -579,8 +590,11 @@ static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) {
     active_worst_quality =
         curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] * 2;
   } else {
-    if (!rc->is_src_frame_alt_ref &&
-        (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+                                      cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
+                                      cpi->refresh_alt_ref_frame)) {
       active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
                                              : rc->last_q[INTER_FRAME];
     } else {
@@ -647,8 +661,8 @@ static int calc_active_worst_quality_one_pass_cbr(const AV1_COMP *cpi) {
   return active_worst_quality;
 }
 
-static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi,
-                                             int *bottom_index,
+static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi, int width,
+                                             int height, int *bottom_index,
                                              int *top_index) {
   const AV1_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
@@ -678,7 +692,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi,
           rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
 
       // Allow somewhat lower kf minq with small image formats.
-      if ((cm->width * cm->height) <= (352 * 288)) {
+      if ((width * height) <= (352 * 288)) {
         q_adj_factor -= 0.25;
       }
 
@@ -740,7 +754,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi,
     q = rc->last_boosted_qindex;
   } else {
     q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality);
+                          active_worst_quality, width, height);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
       if (rc->this_frame_target >= rc->max_frame_bandwidth)
@@ -770,8 +784,8 @@ static int get_active_cq_level(const RATE_CONTROL *rc,
   return active_cq_level;
 }
 
-static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi,
-                                             int *bottom_index,
+static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi, int width,
+                                             int height, int *bottom_index,
                                              int *top_index) {
   const AV1_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
@@ -804,7 +818,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi,
           rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
 
       // Allow somewhat lower kf minq with small image formats.
-      if ((cm->width * cm->height) <= (352 * 288)) {
+      if ((width * height) <= (352 * 288)) {
         q_adj_factor -= 0.25;
       }
 
@@ -899,7 +913,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi,
     q = rc->last_boosted_qindex;
   } else {
     q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality);
+                          active_worst_quality, width, height);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
       if (rc->this_frame_target >= rc->max_frame_bandwidth)
@@ -945,7 +959,8 @@ int av1_frame_type_qdelta(const AV1_COMP *cpi, int rf_level, int q) {
 }
 
 #define STATIC_MOTION_THRESH 95
-static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
+static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width,
+                                         int height, int *bottom_index,
                                          int *top_index) {
   const AV1_COMMON *const cm = &cpi->common;
   const RATE_CONTROL *const rc = &cpi->rc;
@@ -992,7 +1007,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
           get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
 
       // Allow somewhat lower kf minq with small image formats.
-      if ((cm->width * cm->height) <= (352 * 288)) {
+      if ((width * height) <= (352 * 288)) {
         q_adj_factor -= 0.25;
       }
 
@@ -1005,8 +1020,11 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
       active_best_quality +=
           av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
     }
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+  } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+                                           cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
+                                           cpi->refresh_alt_ref_frame)) {
     // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
     // a key frame.
@@ -1026,7 +1044,11 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
       active_best_quality = active_best_quality * 15 / 16;
 
     } else if (oxcf->rc_mode == AOM_Q) {
+#if CONFIG_EXT_REFS
+      if (!cpi->refresh_alt_ref_frame && !cpi->refresh_alt2_ref_frame) {
+#else
       if (!cpi->refresh_alt_ref_frame) {
+#endif  // CONFIG_EXT_REFS
         active_best_quality = cq_level;
       } else {
         active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
@@ -1058,8 +1080,11 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
   if ((cpi->oxcf.rc_mode != AOM_Q) &&
       (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) {
     if (frame_is_intra_only(cm) ||
-        (!rc->is_src_frame_alt_ref &&
-         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
+        (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+                                       cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
+                                       cpi->refresh_alt_ref_frame))) {
       active_best_quality -=
           (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
       active_worst_quality += (cpi->twopass.extend_maxq / 2);
@@ -1105,7 +1130,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
     }
   } else {
     q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
-                          active_worst_quality);
+                          active_worst_quality, width, height);
     if (q > active_worst_quality) {
       // Special case when we are targeting the max allowed rate.
       if (rc->this_frame_target >= rc->max_frame_bandwidth)
@@ -1126,16 +1151,19 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index,
   return q;
 }
 
-int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, int *bottom_index,
-                             int *top_index) {
+int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
+                             int *bottom_index, int *top_index) {
   int q;
   if (cpi->oxcf.pass == 0) {
     if (cpi->oxcf.rc_mode == AOM_CBR)
-      q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
+      q = rc_pick_q_and_bounds_one_pass_cbr(cpi, width, height, bottom_index,
+                                            top_index);
     else
-      q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
+      q = rc_pick_q_and_bounds_one_pass_vbr(cpi, width, height, bottom_index,
+                                            top_index);
   } else {
-    q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
+    q = rc_pick_q_and_bounds_two_pass(cpi, width, height, bottom_index,
+                                      top_index);
   }
 
   return q;
@@ -1157,7 +1185,8 @@ void av1_rc_compute_frame_size_bounds(const AV1_COMP *cpi, int frame_target,
   }
 }
 
-void av1_rc_set_frame_target(AV1_COMP *cpi, int target) {
+static void rc_set_frame_target(AV1_COMP *cpi, int target, int width,
+                                int height) {
   const AV1_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
 
@@ -1166,11 +1195,11 @@ void av1_rc_set_frame_target(AV1_COMP *cpi, int target) {
   // Modify frame size target when down-scaled.
   if (!av1_frame_unscaled(cm))
     rc->this_frame_target =
-        (int)(rc->this_frame_target * av1_resize_rate_factor(cpi));
+        (int)(rc->this_frame_target * resize_rate_factor(cpi, width, height));
 
   // Target rate per SB64 (including partial SB64s.
-  rc->sb64_target_rate = (int)((int64_t)rc->this_frame_target * 64 * 64) /
-                         (cm->width * cm->height);
+  rc->sb64_target_rate =
+      (int)((int64_t)rc->this_frame_target * 64 * 64) / (width * height);
 }
 
 static void update_alt_ref_frame_stats(AV1_COMP *cpi) {
@@ -1194,7 +1223,7 @@ static void update_golden_frame_stats(AV1_COMP *cpi) {
   //                   only the virtual indices for the reference frame will be
   //                   updated and cpi->refresh_golden_frame will still be zero.
   if (cpi->refresh_golden_frame || rc->is_src_frame_alt_ref) {
-#else
+#else   // !CONFIG_EXT_REFS
   // Update the Golden frame usage counts.
   if (cpi->refresh_golden_frame) {
 #endif  // CONFIG_EXT_REFS
@@ -1219,7 +1248,11 @@ static void update_golden_frame_stats(AV1_COMP *cpi) {
     // Decrement count down till next gf
     if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
 
+#if CONFIG_EXT_REFS
+  } else if (!cpi->refresh_alt_ref_frame && !cpi->refresh_alt2_ref_frame) {
+#else
   } else if (!cpi->refresh_alt_ref_frame) {
+#endif  // CONFIG_EXT_REFS
     // Decrement count down till next gf
     if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
 
@@ -1240,7 +1273,7 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
   rc->projected_frame_size = (int)(bytes_used << 3);
 
   // Post encode loop adjustment of Q prediction.
-  av1_rc_update_rate_correction_factors(cpi);
+  av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height);
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
@@ -1249,7 +1282,11 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
         ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
   } else {
     if (!rc->is_src_frame_alt_ref &&
-        !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+        !(cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+          cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
+          cpi->refresh_alt_ref_frame)) {
       rc->last_q[INTER_FRAME] = qindex;
       rc->avg_frame_qindex[INTER_FRAME] =
           ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@@ -1271,6 +1308,9 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
   if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) ||
       (!rc->constrained_gf_group &&
        (cpi->refresh_alt_ref_frame ||
+#if CONFIG_EXT_REFS
+        cpi->refresh_alt2_ref_frame ||
+#endif  // CONFIG_EXT_REFS
         (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
     rc->last_boosted_qindex = qindex;
   }
@@ -1280,6 +1320,10 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
 
   // Rolling monitors of whether we are over or underspending used to help
   // regulate min and Max Q in two pass.
+  if (!av1_frame_unscaled(cm))
+    rc->this_frame_target =
+        (int)(rc->this_frame_target /
+              resize_rate_factor(cpi, cm->width, cm->height));
   if (cm->frame_type != KEY_FRAME) {
     rc->rolling_target_bits = ROUND_POWER_OF_TWO(
         rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
@@ -1294,6 +1338,8 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
   // Actual bits spent
   rc->total_actual_bits += rc->projected_frame_size;
 #if CONFIG_EXT_REFS
+  // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
+  //               differently here for rc->avg_frame_bandwidth.
   rc->total_target_bits +=
       (cm->show_frame || rc->is_bwd_ref_frame) ? rc->avg_frame_bandwidth : 0;
 #else
@@ -1313,6 +1359,8 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
   if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
 
 #if CONFIG_EXT_REFS
+  // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME
+  //               differently here for rc->avg_frame_bandwidth.
   if (cm->show_frame || rc->is_bwd_ref_frame) {
 #else
   if (cm->show_frame) {
@@ -1320,6 +1368,12 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
     rc->frames_since_key++;
     rc->frames_to_key--;
   }
+  // if (cm->current_video_frame == 1 && cm->show_frame)
+  /*
+  rc->this_frame_target =
+      (int)(rc->this_frame_target / resize_rate_factor(cpi, cm->width,
+  cm->height));
+      */
 }
 
 void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) {
@@ -1394,7 +1448,7 @@ void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) {
     target = calc_iframe_target_size_one_pass_vbr(cpi);
   else
     target = calc_pframe_target_size_one_pass_vbr(cpi);
-  av1_rc_set_frame_target(cpi, target);
+  rc_set_frame_target(cpi, target, cm->width, cm->height);
 }
 
 static int calc_pframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
@@ -1496,7 +1550,7 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) {
   else
     target = calc_pframe_target_size_one_pass_cbr(cpi);
 
-  av1_rc_set_frame_target(cpi, target);
+  rc_set_frame_target(cpi, target, cm->width, cm->height);
   // TODO(afergs): Decide whether to scale up, down, or not at all
 }
 
@@ -1581,11 +1635,11 @@ void av1_rc_set_gf_interval_range(const AV1_COMP *const cpi,
   }
 }
 
-void av1_rc_update_framerate(AV1_COMP *cpi) {
-  const AV1_COMMON *const cm = &cpi->common;
+void av1_rc_update_framerate(AV1_COMP *cpi, int width, int height) {
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
   int vbr_max_bits;
+  const int MBs = av1_get_MBs(width, height);
 
   rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate);
   rc->min_frame_bandwidth =
@@ -1605,7 +1659,7 @@ void av1_rc_update_framerate(AV1_COMP *cpi) {
       (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
             100);
   rc->max_frame_bandwidth =
-      AOMMAX(AOMMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
+      AOMMAX(AOMMAX((MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
 
   av1_rc_set_gf_interval_range(cpi, rc);
 }
@@ -1654,73 +1708,12 @@ static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
   }
 }
 
-void av1_set_target_rate(AV1_COMP *cpi) {
+void av1_set_target_rate(AV1_COMP *cpi, int width, int height) {
   RATE_CONTROL *const rc = &cpi->rc;
   int target_rate = rc->base_frame_target;
 
   // Correction to rate target based on prior over or under shoot.
   if (cpi->oxcf.rc_mode == AOM_VBR || cpi->oxcf.rc_mode == AOM_CQ)
     vbr_rate_correction(cpi, &target_rate);
-  av1_rc_set_frame_target(cpi, target_rate);
-}
-
-static unsigned int lcg_rand16(unsigned int *state) {
-  *state = (unsigned int)(*state * 1103515245ULL + 12345);
-  return *state / 65536 % 32768;
-}
-
-uint8_t av1_calculate_next_resize_scale(const AV1_COMP *cpi) {
-  static unsigned int seed = 56789;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  if (oxcf->pass == 1) return SCALE_DENOMINATOR;
-  uint8_t new_num = SCALE_DENOMINATOR;
-
-  switch (oxcf->resize_mode) {
-    case RESIZE_NONE: new_num = SCALE_DENOMINATOR; break;
-    case RESIZE_FIXED:
-      if (cpi->common.frame_type == KEY_FRAME)
-        new_num = oxcf->resize_kf_scale_numerator;
-      else
-        new_num = oxcf->resize_scale_numerator;
-      break;
-    case RESIZE_DYNAMIC:
-      // RESIZE_DYNAMIC: Just random for now.
-      new_num = lcg_rand16(&seed) % 4 + 13;
-      break;
-    default: assert(0);
-  }
-  return new_num;
-}
-
-#if CONFIG_FRAME_SUPERRES
-// TODO(afergs): Rename av1_rc_update_superres_scale(...)?
-uint8_t av1_calculate_next_superres_scale(const AV1_COMP *cpi, int width,
-                                          int height) {
-  static unsigned int seed = 34567;
-  const AV1EncoderConfig *oxcf = &cpi->oxcf;
-  if (oxcf->pass == 1) return SCALE_DENOMINATOR;
-  uint8_t new_num = SCALE_DENOMINATOR;
-
-  switch (oxcf->superres_mode) {
-    case SUPERRES_NONE: new_num = SCALE_DENOMINATOR; break;
-    case SUPERRES_FIXED:
-      if (cpi->common.frame_type == KEY_FRAME)
-        new_num = oxcf->superres_kf_scale_numerator;
-      else
-        new_num = oxcf->superres_scale_numerator;
-      break;
-    case SUPERRES_DYNAMIC:
-      // SUPERRES_DYNAMIC: Just random for now.
-      new_num = lcg_rand16(&seed) % 9 + 8;
-      break;
-    default: assert(0);
-  }
-
-  // Make sure overall reduction is no more than 1/2 of the source size.
-  av1_calculate_scaled_size(&width, &height, new_num);
-  if (width * 2 < oxcf->width || height * 2 < oxcf->height)
-    new_num = SCALE_DENOMINATOR;
-
-  return new_num;
+  rc_set_frame_target(cpi, target_rate, width, height);
 }
-#endif  // CONFIG_FRAME_SUPERRES
diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h
index 4ebdfadd6..8b410e778 100644
--- a/third_party/aom/av1/encoder/ratectrl.h
+++ b/third_party/aom/av1/encoder/ratectrl.h
@@ -50,6 +50,14 @@ typedef enum {
 #endif  // CONFIG_EXT_REFS
 
 typedef struct {
+  int resize_width;
+  int resize_height;
+#if CONFIG_FRAME_SUPERRES
+  uint8_t superres_denom;
+#endif  // CONFIG_FRAME_SUPERRES
+} size_params_type;
+
+typedef struct {
   // Rate targetting variables
   int base_frame_target;  // A baseline frame target before adjustment
                           // for previous under or over shoot.
@@ -189,10 +197,6 @@ int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
 void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi);
 void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi);
 
-// How many times less pixels there are to encode given the current scaling.
-// Temporary replacement for rcf_mult and rate_thresh_mult.
-double av1_resize_rate_factor(const struct AV1_COMP *cpi);
-
 // Post encode update of the rate control parameters based
 // on bytes used
 void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used);
@@ -201,7 +205,8 @@ void av1_rc_postencode_update_drop_frame(struct AV1_COMP *cpi);
 
 // Updates rate correction factors
 // Changes only the rate correction factors in the rate control structure.
-void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi);
+void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi, int width,
+                                           int height);
 
 // Decide if we should drop this frame: For 1-pass CBR.
 // Changes only the decimation count in the rate control structure
@@ -214,12 +219,13 @@ void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
                                       int *frame_over_shoot_limit);
 
 // Picks q and q bounds given the target for bits
-int av1_rc_pick_q_and_bounds(const struct AV1_COMP *cpi, int *bottom_index,
-                             int *top_index);
+int av1_rc_pick_q_and_bounds(const struct AV1_COMP *cpi, int width, int height,
+                             int *bottom_index, int *top_index);
 
 // Estimates q to achieve a target bits per frame
 int av1_rc_regulate_q(const struct AV1_COMP *cpi, int target_bits_per_frame,
-                      int active_best_quality, int active_worst_quality);
+                      int active_best_quality, int active_worst_quality,
+                      int width, int height);
 
 // Estimates bits per mb for a given qindex and correction factor.
 int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
@@ -247,20 +253,15 @@ int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
 
 int av1_frame_type_qdelta(const struct AV1_COMP *cpi, int rf_level, int q);
 
-void av1_rc_update_framerate(struct AV1_COMP *cpi);
+void av1_rc_update_framerate(struct AV1_COMP *cpi, int width, int height);
 
 void av1_rc_set_gf_interval_range(const struct AV1_COMP *const cpi,
                                   RATE_CONTROL *const rc);
 
-void av1_set_target_rate(struct AV1_COMP *cpi);
+void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height);
 
 int av1_resize_one_pass_cbr(struct AV1_COMP *cpi);
 
-uint8_t av1_calculate_next_resize_scale(const struct AV1_COMP *cpi);
-#if CONFIG_FRAME_SUPERRES
-uint8_t av1_calculate_next_superres_scale(const struct AV1_COMP *cpi, int width,
-                                          int height);
-#endif  // CONFIG_FRAME_SUPERRES
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c
index da3b6e209..5dd485334 100644
--- a/third_party/aom/av1/encoder/rd.c
+++ b/third_party/aom/av1/encoder/rd.c
@@ -36,6 +36,9 @@
 #include "av1/encoder/encodemb.h"
 #include "av1/encoder/encodemv.h"
 #include "av1/encoder/encoder.h"
+#if CONFIG_LV_MAP
+#include "av1/encoder/encodetxb.h"
+#endif
 #include "av1/encoder/mcomp.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
@@ -54,121 +57,301 @@ static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
   2,  2,  2,
 #endif
-  2,  3,  3,  4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
+  2,  3,  3,  4, 6,  6,  8, 12, 12, 16, 24, 24, 32,
 #if CONFIG_EXT_PARTITION
   48, 48, 64,
 #endif  // CONFIG_EXT_PARTITION
-  4,  4,  8,  8
+  4,  4,  8,  8, 16, 16,
+#if CONFIG_EXT_PARTITION
+  32, 32
+#endif  // CONFIG_EXT_PARTITION
 };
 
-static void fill_mode_costs(AV1_COMP *cpi) {
-  const FRAME_CONTEXT *const fc = cpi->common.fc;
+#if CONFIG_EXT_TX
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] =
+    {
+#if CONFIG_CHROMA_2X2
+      { 1, 1, 1, 1, 1 },  // unused
+      { 0, 1, 1, 0, 0 },
+      { 0, 0, 0, 1, 0 },
+#if CONFIG_MRC_TX
+      { 0, 0, 0, 0, 1 },
+#endif  // CONFIG_MRC_TX
+#else   // CONFIG_CHROMA_2X2
+      { 1, 1, 1, 1 },  // unused
+      { 1, 1, 0, 0 },
+      { 0, 0, 1, 0 },
+#if CONFIG_MRC_TX
+      { 0, 0, 0, 1 },
+#endif  // CONFIG_MRC_TX
+#endif  // CONFIG_CHROMA_2X2
+    };
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] =
+    {
+#if CONFIG_CHROMA_2X2
+      { 1, 1, 1, 1, 1 },  // unused
+      { 0, 1, 1, 0, 0 }, { 0, 0, 0, 1, 0 }, { 0, 0, 0, 0, 1 },
+#if CONFIG_MRC_TX
+      { 0, 0, 0, 0, 1 },
+#endif  // CONFIG_MRC_TX
+#else   // CONFIG_CHROMA_2X2
+      { 1, 1, 1, 1 },  // unused
+      { 1, 1, 0, 0 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 },
+#if CONFIG_MRC_TX
+      { 0, 0, 0, 1 },
+#endif  // CONFIG_MRC_TX
+#endif  // CONFIG_CHROMA_2X2
+    };
+#endif  // CONFIG_EXT_TX
+
+void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
+                         FRAME_CONTEXT *fc) {
   int i, j;
 
+  if (cm->frame_type == KEY_FRAME) {
+    for (i = 0; i < PARTITION_CONTEXTS_PRIMARY; ++i)
+      av1_cost_tokens_from_cdf(x->partition_cost[i], fc->partition_cdf[i],
+                               NULL);
+#if CONFIG_UNPOISON_PARTITION_CTX
+    for (; i < PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES; ++i) {
+      aom_prob p = fc->partition_prob[i][PARTITION_VERT];
+      assert(p > 0);
+      x->partition_cost[i][PARTITION_NONE] = INT_MAX;
+      x->partition_cost[i][PARTITION_HORZ] = INT_MAX;
+      x->partition_cost[i][PARTITION_VERT] = av1_cost_bit(p, 0);
+      x->partition_cost[i][PARTITION_SPLIT] = av1_cost_bit(p, 1);
+    }
+    for (; i < PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES; ++i) {
+      aom_prob p = fc->partition_prob[i][PARTITION_HORZ];
+      assert(p > 0);
+      x->partition_cost[i][PARTITION_NONE] = INT_MAX;
+      x->partition_cost[i][PARTITION_HORZ] = av1_cost_bit(p, 0);
+      x->partition_cost[i][PARTITION_VERT] = INT_MAX;
+      x->partition_cost[i][PARTITION_SPLIT] = av1_cost_bit(p, 1);
+    }
+    x->partition_cost[PARTITION_CONTEXTS][PARTITION_NONE] = INT_MAX;
+    x->partition_cost[PARTITION_CONTEXTS][PARTITION_HORZ] = INT_MAX;
+    x->partition_cost[PARTITION_CONTEXTS][PARTITION_VERT] = INT_MAX;
+    x->partition_cost[PARTITION_CONTEXTS][PARTITION_SPLIT] = 0;
+#endif  // CONFIG_UNPOISON_PARTITION_CTX
+  }
+
+#if CONFIG_KF_CTX
+  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
+    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
+      av1_cost_tokens_from_cdf(x->y_mode_costs[i][j], fc->kf_y_cdf[i][j], NULL);
+#else
   for (i = 0; i < INTRA_MODES; ++i)
     for (j = 0; j < INTRA_MODES; ++j)
-      av1_cost_tokens_from_cdf(cpi->y_mode_costs[i][j], av1_kf_y_mode_cdf[i][j],
-                               av1_intra_mode_inv);
+      av1_cost_tokens_from_cdf(x->y_mode_costs[i][j], fc->kf_y_cdf[i][j], NULL);
+#endif
 
   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
-    av1_cost_tokens_from_cdf(cpi->mbmode_cost[i], fc->y_mode_cdf[i],
-                             av1_intra_mode_inv);
-
+    av1_cost_tokens_from_cdf(x->mbmode_cost[i], fc->y_mode_cdf[i], NULL);
   for (i = 0; i < INTRA_MODES; ++i)
-    av1_cost_tokens_from_cdf(cpi->intra_uv_mode_cost[i], fc->uv_mode_cdf[i],
-                             av1_intra_mode_inv);
+    av1_cost_tokens_from_cdf(x->intra_uv_mode_cost[i], fc->uv_mode_cdf[i],
+                             NULL);
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    av1_cost_tokens(cpi->switchable_interp_costs[i],
-                    fc->switchable_interp_prob[i], av1_switchable_interp_tree);
+    av1_cost_tokens_from_cdf(x->switchable_interp_costs[i],
+                             fc->switchable_interp_cdf[i], NULL);
 
-#if CONFIG_PALETTE
   for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
-    av1_cost_tokens_from_cdf(cpi->palette_y_size_cost[i],
+    av1_cost_tokens_from_cdf(x->palette_y_size_cost[i],
                              fc->palette_y_size_cdf[i], NULL);
-    av1_cost_tokens_from_cdf(cpi->palette_uv_size_cost[i],
+    av1_cost_tokens_from_cdf(x->palette_uv_size_cost[i],
                              fc->palette_uv_size_cdf[i], NULL);
   }
 
   for (i = 0; i < PALETTE_SIZES; ++i) {
     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
-      av1_cost_tokens_from_cdf(cpi->palette_y_color_cost[i][j],
+      av1_cost_tokens_from_cdf(x->palette_y_color_cost[i][j],
                                fc->palette_y_color_index_cdf[i][j], NULL);
-      av1_cost_tokens_from_cdf(cpi->palette_uv_color_cost[i][j],
+      av1_cost_tokens_from_cdf(x->palette_uv_color_cost[i][j],
                                fc->palette_uv_color_index_cdf[i][j], NULL);
     }
   }
-#endif  // CONFIG_PALETTE
+#if CONFIG_MRC_TX
+  for (i = 0; i < PALETTE_SIZES; ++i) {
+    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
+      av1_cost_tokens_from_cdf(x->mrc_mask_inter_cost[i][j],
+                               fc->mrc_mask_inter_cdf[i][j], NULL);
+      av1_cost_tokens_from_cdf(x->mrc_mask_intra_cost[i][j],
+                               fc->mrc_mask_intra_cdf[i][j], NULL);
+    }
+  }
+#endif  // CONFIG_MRC_TX
+
+#if CONFIG_CFL
+  int sign_cost[CFL_JOINT_SIGNS];
+  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
+  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
+    const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
+    const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
+    int *cost_u = x->cfl_cost[joint_sign][CFL_PRED_U];
+    int *cost_v = x->cfl_cost[joint_sign][CFL_PRED_V];
+    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO)
+      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
+    else
+      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
+    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO)
+      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
+    else
+      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
+    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
+      cost_u[u] += sign_cost[joint_sign];
+  }
+#endif  // CONFIG_CFL
 
   for (i = 0; i < MAX_TX_DEPTH; ++i)
     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
-      av1_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j],
-                      av1_tx_size_tree[i]);
+      av1_cost_tokens_from_cdf(x->tx_size_cost[i][j], fc->tx_size_cdf[i][j],
+                               NULL);
 
 #if CONFIG_EXT_TX
+#if CONFIG_LGT_FROM_PRED
+  if (LGT_FROM_PRED_INTRA) {
+    for (i = 0; i < LGT_SIZES; ++i) {
+      for (j = 0; j < INTRA_MODES; ++j) {
+        x->intra_lgt_cost[i][j][0] = av1_cost_bit(fc->intra_lgt_prob[i][j], 0);
+        x->intra_lgt_cost[i][j][1] = av1_cost_bit(fc->intra_lgt_prob[i][j], 1);
+      }
+    }
+  }
+  if (LGT_FROM_PRED_INTER) {
+    for (i = 0; i < LGT_SIZES; ++i) {
+      x->inter_lgt_cost[i][0] = av1_cost_bit(fc->inter_lgt_prob[i], 0);
+      x->inter_lgt_cost[i][1] = av1_cost_bit(fc->inter_lgt_prob[i], 1);
+    }
+  }
+#endif  // CONFIG_LGT_FROM_PRED
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
     int s;
     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
       if (use_inter_ext_tx_for_txsize[s][i]) {
-        av1_cost_tokens(cpi->inter_tx_type_costs[s][i],
-                        fc->inter_ext_tx_prob[s][i], av1_ext_tx_inter_tree[s]);
+        av1_cost_tokens_from_cdf(
+            x->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
+            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
       }
     }
     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
       if (use_intra_ext_tx_for_txsize[s][i]) {
-        for (j = 0; j < INTRA_MODES; ++j)
-          av1_cost_tokens(cpi->intra_tx_type_costs[s][i][j],
-                          fc->intra_ext_tx_prob[s][i][j],
-                          av1_ext_tx_intra_tree[s]);
+        for (j = 0; j < INTRA_MODES; ++j) {
+          av1_cost_tokens_from_cdf(
+              x->intra_tx_type_costs[s][i][j], fc->intra_ext_tx_cdf[s][i][j],
+              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
+        }
       }
     }
   }
 #else
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
     for (j = 0; j < TX_TYPES; ++j)
-      av1_cost_tokens(cpi->intra_tx_type_costs[i][j],
-                      fc->intra_ext_tx_prob[i][j], av1_ext_tx_tree);
+      av1_cost_tokens_from_cdf(x->intra_tx_type_costs[i][j],
+                               fc->intra_ext_tx_cdf[i][j], av1_ext_tx_inv);
   }
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    av1_cost_tokens(cpi->inter_tx_type_costs[i], fc->inter_ext_tx_prob[i],
-                    av1_ext_tx_tree);
+    av1_cost_tokens_from_cdf(x->inter_tx_type_costs[i], fc->inter_ext_tx_cdf[i],
+                             av1_ext_tx_inv);
   }
 #endif  // CONFIG_EXT_TX
 #if CONFIG_EXT_INTRA
 #if CONFIG_INTRA_INTERP
   for (i = 0; i < INTRA_FILTERS + 1; ++i)
-    av1_cost_tokens(cpi->intra_filter_cost[i], fc->intra_filter_probs[i],
-                    av1_intra_filter_tree);
+    av1_cost_tokens_from_cdf(x->intra_filter_cost[i], fc->intra_filter_cdf[i],
+                             NULL);
 #endif  // CONFIG_INTRA_INTERP
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_LOOP_RESTORATION
-  av1_cost_tokens(cpi->switchable_restore_cost, fc->switchable_restore_prob,
+  av1_cost_tokens(x->switchable_restore_cost, fc->switchable_restore_prob,
                   av1_switchable_restore_tree);
 #endif  // CONFIG_LOOP_RESTORATION
-#if CONFIG_GLOBAL_MOTION
-  for (i = 0; i < TRANS_TYPES; ++i)
-    cpi->gmtype_cost[i] = (1 + (i > 0 ? GLOBAL_TYPE_BITS : 0))
-                          << AV1_PROB_COST_SHIFT;
-#endif  // CONFIG_GLOBAL_MOTION
-}
+#if CONFIG_INTRABC
+  av1_cost_tokens_from_cdf(x->intrabc_cost, fc->intrabc_cdf, NULL);
+#endif  // CONFIG_INTRABC
 
-void av1_fill_token_costs(av1_coeff_cost *c,
-                          av1_coeff_probs_model (*p)[PLANE_TYPES]) {
-  int i, j, k, l;
-  TX_SIZE t;
-  for (t = 0; t < TX_SIZES; ++t)
-    for (i = 0; i < PLANE_TYPES; ++i)
-      for (j = 0; j < REF_TYPES; ++j)
-        for (k = 0; k < COEF_BANDS; ++k)
-          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
-            aom_prob probs[ENTROPY_NODES];
-            av1_model_to_full_probs(p[t][i][j][k][l], probs);
-            av1_cost_tokens((int *)c[t][i][j][k][0][l], probs, av1_coef_tree);
-            av1_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
-                                 av1_coef_tree);
-            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
-                   c[t][i][j][k][1][l][EOB_TOKEN]);
-          }
+  if (!frame_is_intra_only(cm)) {
+    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
+#if CONFIG_NEW_MULTISYMBOL
+      av1_cost_tokens_from_cdf(x->newmv_mode_cost[i], fc->newmv_cdf[i], NULL);
+#else
+      x->newmv_mode_cost[i][0] = av1_cost_bit(fc->newmv_prob[i], 0);
+      x->newmv_mode_cost[i][1] = av1_cost_bit(fc->newmv_prob[i], 1);
+#endif
+    }
+
+    for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) {
+#if CONFIG_NEW_MULTISYMBOL
+      av1_cost_tokens_from_cdf(x->zeromv_mode_cost[i], fc->zeromv_cdf[i], NULL);
+#else
+      x->zeromv_mode_cost[i][0] = av1_cost_bit(fc->zeromv_prob[i], 0);
+      x->zeromv_mode_cost[i][1] = av1_cost_bit(fc->zeromv_prob[i], 1);
+#endif
+    }
+
+    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
+#if CONFIG_NEW_MULTISYMBOL
+      av1_cost_tokens_from_cdf(x->refmv_mode_cost[i], fc->refmv_cdf[i], NULL);
+#else
+      x->refmv_mode_cost[i][0] = av1_cost_bit(fc->refmv_prob[i], 0);
+      x->refmv_mode_cost[i][1] = av1_cost_bit(fc->refmv_prob[i], 1);
+#endif
+    }
+
+    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
+#if CONFIG_NEW_MULTISYMBOL
+      av1_cost_tokens_from_cdf(x->drl_mode_cost0[i], fc->drl_cdf[i], NULL);
+#else
+      x->drl_mode_cost0[i][0] = av1_cost_bit(fc->drl_prob[i], 0);
+      x->drl_mode_cost0[i][1] = av1_cost_bit(fc->drl_prob[i], 1);
+#endif
+    }
+    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+      av1_cost_tokens_from_cdf(x->inter_compound_mode_cost[i],
+                               fc->inter_compound_mode_cdf[i], NULL);
+#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
+      av1_cost_tokens_from_cdf(x->compound_type_cost[i],
+                               fc->compound_type_cdf[i], NULL);
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#if CONFIG_COMPOUND_SINGLEREF
+    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+      av1_cost_tokens_from_cdf(x->inter_singleref_comp_mode_cost[i],
+                               fc->inter_singleref_comp_mode_cdf[i], NULL);
+#endif  // CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_INTERINTRA
+    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
+      av1_cost_tokens_from_cdf(x->interintra_mode_cost[i],
+                               fc->interintra_mode_cdf[i], NULL);
+#endif  // CONFIG_INTERINTRA
+#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
+      av1_cost_tokens_from_cdf(x->motion_mode_cost[i], fc->motion_mode_cdf[i],
+                               NULL);
+    }
+#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+      av1_cost_tokens_from_cdf(x->motion_mode_cost2[i], fc->ncobmc_cdf[i],
+                               NULL);
+#endif
+#if CONFIG_NEW_MULTISYMBOL || CONFIG_NCOBMC_ADAPT_WEIGHT
+      av1_cost_tokens_from_cdf(x->motion_mode_cost1[i], fc->obmc_cdf[i], NULL);
+#else
+      x->motion_mode_cost1[i][0] = av1_cost_bit(fc->obmc_prob[i], 0);
+      x->motion_mode_cost1[i][1] = av1_cost_bit(fc->obmc_prob[i], 1);
+#endif
+    }
+#endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+    for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) {
+      av1_cost_tokens_from_cdf(x->ncobmc_mode_cost[i], fc->ncobmc_mode_cdf[i],
+                               NULL);
+    }
+#endif  // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+  }
 }
 
 // Values are now correlated to quantizer.
@@ -212,11 +395,11 @@ static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
   128, 144, 128, 128, 144,
 #if CONFIG_EXT_REFS
   // TODO(zoeliu): To adjust further following factor values.
-  128, 128, 128
+  128, 128, 128,
   // TODO(weitinglin): We should investigate if the values should be the same
   //                   as the value used by OVERLAY frame
-  ,
-  144
+  144,  // INTNL_OVERLAY_UPDATE
+  128   // INTNL_ARF_UPDATE
 #endif  // CONFIG_EXT_REFS
 };
 
@@ -341,11 +524,170 @@ void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref,
   x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
 }
 
+#if CONFIG_LV_MAP
+#if !LV_MAP_PROB
+static void get_rate_cost(aom_prob p, int cost[2]) {
+  cost[0] = av1_cost_bit(p, 0);
+  cost[1] = av1_cost_bit(p, 1);
+}
+#endif  // !LV_MAP_PROB
+
+void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc) {
+  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+    for (int plane = 0; plane < PLANE_TYPES; ++plane) {
+      LV_MAP_COEFF_COST *pcost = &x->coeff_costs[tx_size][plane];
+
+#if LV_MAP_PROB
+      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
+                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
+
+      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->nz_map_cost[ctx],
+                                 fc->nz_map_cdf[tx_size][plane][ctx], NULL);
+
+      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx],
+                                 fc->eob_flag_cdf[tx_size][plane][ctx], NULL);
+
+      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
+                                 fc->dc_sign_cdf[plane][ctx], NULL);
+
+      for (int layer = 0; layer < NUM_BASE_LEVELS; ++layer)
+        for (int ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx)
+          av1_cost_tokens_from_cdf(
+              pcost->base_cost[layer][ctx],
+              fc->coeff_base_cdf[tx_size][plane][layer][ctx], NULL);
+
+#if BR_NODE
+      for (int br = 0; br < BASE_RANGE_SETS; ++br)
+        for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx)
+          av1_cost_tokens_from_cdf(pcost->br_cost[br][ctx],
+                                   fc->coeff_br_cdf[tx_size][plane][br][ctx],
+                                   NULL);
+
+      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
+        int lps_rate[2];
+        av1_cost_tokens_from_cdf(lps_rate,
+                                 fc->coeff_lps_cdf[tx_size][plane][ctx], NULL);
+
+        for (int base_range = 0; base_range < COEFF_BASE_RANGE + 1;
+             ++base_range) {
+          int br_set_idx = base_range < COEFF_BASE_RANGE
+                               ? coeff_to_br_index[base_range]
+                               : BASE_RANGE_SETS;
+
+          pcost->lps_cost[ctx][base_range] = 0;
+
+          for (int idx = 0; idx < BASE_RANGE_SETS; ++idx) {
+            if (idx == br_set_idx) {
+              pcost->lps_cost[ctx][base_range] += pcost->br_cost[idx][ctx][1];
+
+              int br_base = br_index_to_coeff[br_set_idx];
+              int br_offset = base_range - br_base;
+              int extra_bits = (1 << br_extra_bits[idx]) - 1;
+              for (int tok = 0; tok < extra_bits; ++tok) {
+                if (tok == br_offset) {
+                  pcost->lps_cost[ctx][base_range] += lps_rate[1];
+                  break;
+                } else {
+                  pcost->lps_cost[ctx][base_range] += lps_rate[0];
+                }
+              }
+              break;
+            } else {
+              pcost->lps_cost[ctx][base_range] += pcost->br_cost[idx][ctx][0];
+            }
+          }
+          // load the base range cost
+        }
+      }
+#else   // BR_NODE
+      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->lps_cost[ctx],
+                                 fc->coeff_lps_cdf[tx_size][plane][ctx], NULL);
+#endif  // BR_NODE
+#if CONFIG_CTX1D
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        av1_cost_tokens_from_cdf(pcost->eob_mode_cost[tx_class],
+                                 fc->eob_mode_cdf[tx_size][plane][tx_class],
+                                 NULL);
+
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (int ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx)
+          av1_cost_tokens_from_cdf(
+              pcost->empty_line_cost[tx_class][ctx],
+              fc->empty_line_cdf[tx_size][plane][tx_class][ctx], NULL);
+
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (int ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx)
+          av1_cost_tokens_from_cdf(
+              pcost->hv_eob_cost[tx_class][ctx],
+              fc->hv_eob_cdf[tx_size][plane][tx_class][ctx], NULL);
+#endif  // CONFIG_CTX1D
+#else   // LV_MAP_PROB
+      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
+        get_rate_cost(fc->txb_skip[tx_size][ctx], pcost->txb_skip_cost[ctx]);
+
+      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
+        get_rate_cost(fc->nz_map[tx_size][plane][ctx], pcost->nz_map_cost[ctx]);
+
+      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
+        get_rate_cost(fc->eob_flag[tx_size][plane][ctx], pcost->eob_cost[ctx]);
+
+      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
+        get_rate_cost(fc->dc_sign[plane][ctx], pcost->dc_sign_cost[ctx]);
+
+      for (int layer = 0; layer < NUM_BASE_LEVELS; ++layer)
+        for (int ctx = 0; ctx < COEFF_BASE_CONTEXTS; ++ctx)
+          get_rate_cost(fc->coeff_base[tx_size][plane][layer][ctx],
+                        pcost->base_cost[layer][ctx]);
+
+      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx)
+        get_rate_cost(fc->coeff_lps[tx_size][plane][ctx], pcost->lps_cost[ctx]);
+
+#if CONFIG_CTX1D
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        get_rate_cost(fc->eob_mode[tx_size][plane][tx_class],
+                      pcost->eob_mode_cost[tx_class]);
+
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (int ctx = 0; ctx < EMPTY_LINE_CONTEXTS; ++ctx)
+          get_rate_cost(fc->empty_line[tx_size][plane][tx_class][ctx],
+                        pcost->empty_line_cost[tx_class][ctx]);
+
+      for (int tx_class = 0; tx_class < TX_CLASSES; ++tx_class)
+        for (int ctx = 0; ctx < HV_EOB_CONTEXTS; ++ctx)
+          get_rate_cost(fc->hv_eob[tx_size][plane][tx_class][ctx],
+                        pcost->hv_eob_cost[tx_class][ctx]);
+#endif  // CONFIG_CTX1D
+#endif  // LV_MAP_PROB
+    }
+  }
+}
+#endif  // CONFIG_LV_MAP
+
+void av1_fill_token_costs_from_cdf(av1_coeff_cost *cost,
+                                   coeff_cdf_model (*cdf)[PLANE_TYPES]) {
+  for (int tx = 0; tx < TX_SIZES; ++tx) {
+    for (int pt = 0; pt < PLANE_TYPES; ++pt) {
+      for (int rt = 0; rt < REF_TYPES; ++rt) {
+        for (int band = 0; band < COEF_BANDS; ++band) {
+          for (int ctx = 0; ctx < BAND_COEFF_CONTEXTS(band); ++ctx) {
+            av1_cost_tokens_from_cdf(cost[tx][pt][rt][band][ctx],
+                                     cdf[tx][pt][rt][band][ctx], NULL);
+          }
+        }
+      }
+    }
+  }
+}
+
 void av1_initialize_rd_consts(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   RD_OPT *const rd = &cpi->rd;
-  int i;
   int nmv_ctx;
 
   aom_clear_system_state();
@@ -357,11 +699,25 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
   set_block_thresholds(cm, rd);
 
   for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+#if CONFIG_AMVR
+    if (cm->cur_frame_mv_precision_level) {
+      av1_build_nmv_cost_table(x->nmv_vec_cost[nmv_ctx], x->nmvcost[nmv_ctx],
+                               &cm->fc->nmvc[nmv_ctx], MV_SUBPEL_NONE);
+    } else {
+      av1_build_nmv_cost_table(
+          x->nmv_vec_cost[nmv_ctx],
+          cm->allow_high_precision_mv ? x->nmvcost_hp[nmv_ctx]
+                                      : x->nmvcost[nmv_ctx],
+          &cm->fc->nmvc[nmv_ctx], cm->allow_high_precision_mv);
+    }
+
+#else
     av1_build_nmv_cost_table(
         x->nmv_vec_cost[nmv_ctx],
         cm->allow_high_precision_mv ? x->nmvcost_hp[nmv_ctx]
                                     : x->nmvcost[nmv_ctx],
         &cm->fc->nmvc[nmv_ctx], cm->allow_high_precision_mv);
+#endif
   }
   x->mvcost = x->mv_cost_stack[0];
   x->nmvjointcost = x->nmv_vec_cost[0];
@@ -376,106 +732,22 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
   }
 #endif
 
+#if CONFIG_GLOBAL_MOTION
   if (cpi->oxcf.pass != 1) {
-    av1_fill_token_costs(x->token_costs, cm->fc->coef_probs);
-
-    if (cm->frame_type == KEY_FRAME) {
-#if CONFIG_EXT_PARTITION_TYPES
-      for (i = 0; i < PARTITION_PLOFFSET; ++i)
-        av1_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
-                        av1_partition_tree);
-      for (; i < PARTITION_CONTEXTS_PRIMARY; ++i)
-        av1_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
-                        av1_ext_partition_tree);
+    for (int i = 0; i < TRANS_TYPES; ++i)
+#if GLOBAL_TRANS_TYPES > 4
+      cpi->gmtype_cost[i] = (1 + (i > 0 ? GLOBAL_TYPE_BITS : 0))
+                            << AV1_PROB_COST_SHIFT;
 #else
-      for (i = 0; i < PARTITION_CONTEXTS_PRIMARY; ++i)
-        av1_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
-                        av1_partition_tree);
-#endif  // CONFIG_EXT_PARTITION_TYPES
-#if CONFIG_UNPOISON_PARTITION_CTX
-      for (; i < PARTITION_CONTEXTS_PRIMARY + PARTITION_BLOCK_SIZES; ++i) {
-        aom_prob p = cm->fc->partition_prob[i][PARTITION_VERT];
-        assert(p > 0);
-        cpi->partition_cost[i][PARTITION_NONE] = INT_MAX;
-        cpi->partition_cost[i][PARTITION_HORZ] = INT_MAX;
-        cpi->partition_cost[i][PARTITION_VERT] = av1_cost_bit(p, 0);
-        cpi->partition_cost[i][PARTITION_SPLIT] = av1_cost_bit(p, 1);
-      }
-      for (; i < PARTITION_CONTEXTS_PRIMARY + 2 * PARTITION_BLOCK_SIZES; ++i) {
-        aom_prob p = cm->fc->partition_prob[i][PARTITION_HORZ];
-        assert(p > 0);
-        cpi->partition_cost[i][PARTITION_NONE] = INT_MAX;
-        cpi->partition_cost[i][PARTITION_HORZ] = av1_cost_bit(p, 0);
-        cpi->partition_cost[i][PARTITION_VERT] = INT_MAX;
-        cpi->partition_cost[i][PARTITION_SPLIT] = av1_cost_bit(p, 1);
-      }
-      cpi->partition_cost[PARTITION_CONTEXTS][PARTITION_NONE] = INT_MAX;
-      cpi->partition_cost[PARTITION_CONTEXTS][PARTITION_HORZ] = INT_MAX;
-      cpi->partition_cost[PARTITION_CONTEXTS][PARTITION_VERT] = INT_MAX;
-      cpi->partition_cost[PARTITION_CONTEXTS][PARTITION_SPLIT] = 0;
-#endif  // CONFIG_UNPOISON_PARTITION_CTX
-    }
-
-    fill_mode_costs(cpi);
-
-    if (!frame_is_intra_only(cm)) {
-      for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
-        cpi->newmv_mode_cost[i][0] = av1_cost_bit(cm->fc->newmv_prob[i], 0);
-        cpi->newmv_mode_cost[i][1] = av1_cost_bit(cm->fc->newmv_prob[i], 1);
-      }
-
-      for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) {
-        cpi->zeromv_mode_cost[i][0] = av1_cost_bit(cm->fc->zeromv_prob[i], 0);
-        cpi->zeromv_mode_cost[i][1] = av1_cost_bit(cm->fc->zeromv_prob[i], 1);
-      }
-
-      for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
-        cpi->refmv_mode_cost[i][0] = av1_cost_bit(cm->fc->refmv_prob[i], 0);
-        cpi->refmv_mode_cost[i][1] = av1_cost_bit(cm->fc->refmv_prob[i], 1);
-      }
-
-      for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
-        cpi->drl_mode_cost0[i][0] = av1_cost_bit(cm->fc->drl_prob[i], 0);
-        cpi->drl_mode_cost0[i][1] = av1_cost_bit(cm->fc->drl_prob[i], 1);
-      }
-#if CONFIG_EXT_INTER
-      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-        av1_cost_tokens((int *)cpi->inter_compound_mode_cost[i],
-                        cm->fc->inter_compound_mode_probs[i],
-                        av1_inter_compound_mode_tree);
-#if CONFIG_COMPOUND_SINGLEREF
-      for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
-        av1_cost_tokens((int *)cpi->inter_singleref_comp_mode_cost[i],
-                        cm->fc->inter_singleref_comp_mode_probs[i],
-                        av1_inter_singleref_comp_mode_tree);
-#endif  // CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_INTERINTRA
-      for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
-        av1_cost_tokens((int *)cpi->interintra_mode_cost[i],
-                        cm->fc->interintra_mode_prob[i],
-                        av1_interintra_mode_tree);
-#endif  // CONFIG_INTERINTRA
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-      for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
-        av1_cost_tokens((int *)cpi->motion_mode_cost[i],
-                        cm->fc->motion_mode_prob[i], av1_motion_mode_tree);
-      }
-#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-      for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
-        cpi->motion_mode_cost1[i][0] = av1_cost_bit(cm->fc->obmc_prob[i], 0);
-        cpi->motion_mode_cost1[i][1] = av1_cost_bit(cm->fc->obmc_prob[i], 1);
-      }
-#endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
-#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
-      for (i = ADAPT_OVERLAP_BLOCK_8X8; i < ADAPT_OVERLAP_BLOCKS; ++i) {
-        av1_cost_tokens((int *)cpi->ncobmc_mode_cost[i],
-                        cm->fc->ncobmc_mode_prob[i], av1_ncobmc_mode_tree);
-      }
-#endif
-#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-    }
+      // IDENTITY: 1 bit
+      // TRANSLATION: 3 bits
+      // ROTZOOM: 2 bits
+      // AFFINE: 3 bits
+      cpi->gmtype_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0))
+                            << AV1_PROB_COST_SHIFT;
+#endif  // GLOBAL_TRANS_TYPES > 4
   }
+#endif  // CONFIG_GLOBAL_MOTION
 }
 
 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
@@ -618,6 +890,26 @@ static void get_entropy_contexts_plane(
             !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
       break;
 #if CONFIG_TX64X64
+    case TX_32X64:
+      for (i = 0; i < num_4x4_w; i += 16)
+        t_above[i] =
+            !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
+      for (i = 0; i < num_4x4_h; i += 32)
+        t_left[i] =
+            !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8] |
+               *(const uint64_t *)&left[i + 16] |
+               *(const uint64_t *)&left[i + 24]);
+      break;
+    case TX_64X32:
+      for (i = 0; i < num_4x4_w; i += 32)
+        t_above[i] =
+            !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8] |
+               *(const uint64_t *)&above[i + 16] |
+               *(const uint64_t *)&above[i + 24]);
+      for (i = 0; i < num_4x4_h; i += 16)
+        t_left[i] =
+            !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
+      break;
     case TX_64X64:
       for (i = 0; i < num_4x4_w; i += 32)
         t_above[i] =
@@ -727,6 +1019,20 @@ static void get_entropy_contexts_plane(
         t_left[i] = !!*(const uint64_t *)&left[i];
       break;
 #if CONFIG_TX64X64
+    case TX_32X64:
+      for (i = 0; i < num_4x4_w; i += 8)
+        t_above[i] = !!*(const uint64_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 16)
+        t_left[i] =
+            !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
+      break;
+    case TX_64X32:
+      for (i = 0; i < num_4x4_w; i += 16)
+        t_above[i] =
+            !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
+      for (i = 0; i < num_4x4_h; i += 8)
+        t_left[i] = !!*(const uint64_t *)&left[i];
+      break;
     case TX_64X64:
       for (i = 0; i < num_4x4_w; i += 16)
         t_above[i] =
@@ -909,8 +1215,8 @@ YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
 }
 
 #if CONFIG_DUAL_FILTER
-int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *xd) {
-  const AV1_COMMON *const cm = &cpi->common;
+int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
+                            const MACROBLOCKD *xd) {
   if (cm->interp_filter == SWITCHABLE) {
     const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
     int inter_filter_cost = 0;
@@ -921,8 +1227,9 @@ int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *xd) {
           (mbmi->ref_frame[1] > INTRA_FRAME &&
            has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
         const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-        inter_filter_cost +=
-            cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+        const InterpFilter filter =
+            av1_extract_interp_filter(mbmi->interp_filters, dir);
+        inter_filter_cost += x->switchable_interp_costs[ctx][filter];
       }
     }
     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
@@ -931,13 +1238,15 @@ int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *xd) {
   }
 }
 #else
-int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *xd) {
-  const AV1_COMMON *const cm = &cpi->common;
+int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
+                            const MACROBLOCKD *xd) {
   if (cm->interp_filter == SWITCHABLE) {
     const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
     const int ctx = av1_get_pred_context_switchable_interp(xd);
+    const InterpFilter filter =
+        av1_extract_interp_filter(mbmi->interp_filters, 0);
     return SWITCHABLE_INTERP_RATE_FACTOR *
-           cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+           x->switchable_interp_costs[ctx][filter];
   }
   return 0;
 }
@@ -957,6 +1266,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
     rd->thresh_mult[THR_NEARESTL2] = 300;
     rd->thresh_mult[THR_NEARESTL3] = 300;
     rd->thresh_mult[THR_NEARESTB] = 300;
+    rd->thresh_mult[THR_NEARESTA2] = 300;
 #endif  // CONFIG_EXT_REFS
     rd->thresh_mult[THR_NEARESTA] = 300;
     rd->thresh_mult[THR_NEARESTG] = 300;
@@ -966,6 +1276,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
     rd->thresh_mult[THR_NEARESTL2] = 0;
     rd->thresh_mult[THR_NEARESTL3] = 0;
     rd->thresh_mult[THR_NEARESTB] = 0;
+    rd->thresh_mult[THR_NEARESTA2] = 0;
 #endif  // CONFIG_EXT_REFS
     rd->thresh_mult[THR_NEARESTA] = 0;
     rd->thresh_mult[THR_NEARESTG] = 0;
@@ -978,6 +1289,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_NEWL2] += 1000;
   rd->thresh_mult[THR_NEWL3] += 1000;
   rd->thresh_mult[THR_NEWB] += 1000;
+  rd->thresh_mult[THR_NEWA2] = 1000;
 #endif  // CONFIG_EXT_REFS
   rd->thresh_mult[THR_NEWA] += 1000;
   rd->thresh_mult[THR_NEWG] += 1000;
@@ -987,6 +1299,7 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_NEARL2] += 1000;
   rd->thresh_mult[THR_NEARL3] += 1000;
   rd->thresh_mult[THR_NEARB] += 1000;
+  rd->thresh_mult[THR_NEARA2] = 1000;
 #endif  // CONFIG_EXT_REFS
   rd->thresh_mult[THR_NEARA] += 1000;
   rd->thresh_mult[THR_NEARG] += 1000;
@@ -996,14 +1309,13 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_ZEROL2] += 2000;
   rd->thresh_mult[THR_ZEROL3] += 2000;
   rd->thresh_mult[THR_ZEROB] += 2000;
+  rd->thresh_mult[THR_ZEROA2] = 2000;
 #endif  // CONFIG_EXT_REFS
   rd->thresh_mult[THR_ZEROG] += 2000;
   rd->thresh_mult[THR_ZEROA] += 2000;
 
   rd->thresh_mult[THR_TM] += 1000;
 
-#if CONFIG_EXT_INTER
-
 #if CONFIG_COMPOUND_SINGLEREF
   rd->thresh_mult[THR_SR_NEAREST_NEARMV] += 1200;
 #if CONFIG_EXT_REFS
@@ -1063,6 +1375,10 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
+  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] += 1000;
+  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] += 1000;
+  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] += 1000;
+  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] += 1000;
 
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 1000;
@@ -1072,31 +1388,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // CONFIG_EXT_INTER
-
-  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARESTL2A] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTL3A] += 1000;
-#endif  // CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARESTLB] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTL2B] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTL3B] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTGB] += 1000;
-#if CONFIG_EXT_COMP_REFS
-  rd->thresh_mult[THR_COMP_NEARESTLL2] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTLL3] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTLG] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTBA] += 1000;
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-#endif  // CONFIG_EXT_INTER
-
-#if CONFIG_EXT_INTER
-
   rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
@@ -1164,6 +1455,38 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
   rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500;
 
+  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200;
+  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500;
+  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500;
+  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEWLA2] += 2000;
+  rd->thresh_mult[THR_COMP_ZERO_ZEROLA2] += 2500;
+
+  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] += 1200;
+  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] += 1500;
+  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] += 1500;
+  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000;
+  rd->thresh_mult[THR_COMP_ZERO_ZEROL2A2] += 2500;
+
+  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200;
+  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500;
+  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500;
+  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] += 2000;
+  rd->thresh_mult[THR_COMP_ZERO_ZEROL3A2] += 2500;
+
+  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] += 1200;
+  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] += 1500;
+  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] += 1500;
+  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700;
+  rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000;
+  rd->thresh_mult[THR_COMP_ZERO_ZEROGA2] += 2500;
+
 #if CONFIG_EXT_COMP_REFS
   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1200;
   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 1500;
@@ -1199,64 +1522,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // CONFIG_EXT_INTER
-
-  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARL2A] += 1500;
-  rd->thresh_mult[THR_COMP_NEWL2A] += 2000;
-  rd->thresh_mult[THR_COMP_NEARL3A] += 1500;
-  rd->thresh_mult[THR_COMP_NEWL3A] += 2000;
-#endif  // CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
-  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
-
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_NEARLB] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLB] += 2000;
-  rd->thresh_mult[THR_COMP_NEARL2B] += 1500;
-  rd->thresh_mult[THR_COMP_NEWL2B] += 2000;
-  rd->thresh_mult[THR_COMP_NEARL3B] += 1500;
-  rd->thresh_mult[THR_COMP_NEWL3B] += 2000;
-  rd->thresh_mult[THR_COMP_NEARGB] += 1500;
-  rd->thresh_mult[THR_COMP_NEWGB] += 2000;
-
-#if CONFIG_EXT_COMP_REFS
-  rd->thresh_mult[THR_COMP_NEARLL2] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLL2] += 2000;
-  rd->thresh_mult[THR_COMP_NEARLL3] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLL3] += 2000;
-  rd->thresh_mult[THR_COMP_NEARLG] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLG] += 2000;
-  rd->thresh_mult[THR_COMP_NEARBA] += 1500;
-  rd->thresh_mult[THR_COMP_NEWBA] += 2000;
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_ZEROL2A] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROL3A] += 2500;
-#endif  // CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
-#if CONFIG_EXT_REFS
-  rd->thresh_mult[THR_COMP_ZEROLB] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROL2B] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROL3B] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROGB] += 2500;
-
-#if CONFIG_EXT_COMP_REFS
-  rd->thresh_mult[THR_COMP_ZEROLL2] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROLL3] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROLG] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROBA] += 2500;
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-#endif  // CONFIG_EXT_INTER
-
   rd->thresh_mult[THR_H_PRED] += 2000;
   rd->thresh_mult[THR_V_PRED] += 2000;
   rd->thresh_mult[THR_D135_PRED] += 2500;
@@ -1266,7 +1531,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_D117_PRED] += 2500;
   rd->thresh_mult[THR_D45_PRED] += 2500;
 
-#if CONFIG_EXT_INTER
   rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARL] += 1500;
@@ -1294,13 +1558,17 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTB] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARB] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEWB] += 2000;
+
+  rd->thresh_mult[THR_COMP_INTERINTRA_ZEROA2] += 1500;
+  rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTA2] += 1500;
+  rd->thresh_mult[THR_COMP_INTERINTRA_NEARA2] += 1500;
+  rd->thresh_mult[THR_COMP_INTERINTRA_NEWA2] += 2000;
 #endif  // CONFIG_EXT_REFS
 
   rd->thresh_mult[THR_COMP_INTERINTRA_ZEROA] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEARA] += 1500;
   rd->thresh_mult[THR_COMP_INTERINTRA_NEWA] += 2000;
-#endif  // CONFIG_EXT_INTER
 }
 
 void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) {
@@ -1312,6 +1580,11 @@ void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) {
     2500,
     2500,
     2500,
+    2500,
+    4500,
+    4500,
+    4500,
+    4500,
     4500,
     4500,
     4500,
@@ -1321,7 +1594,7 @@ void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) {
     4500,
     4500,
     2500
-#else
+#else  // !CONFIG_EXT_REFS
     2500,
     2500,
     2500,
diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h
index ea5115b41..35ada8e6c 100644
--- a/third_party/aom/av1/encoder/rd.h
+++ b/third_party/aom/av1/encoder/rd.h
@@ -43,14 +43,6 @@ extern "C" {
 #define MV_COST_WEIGHT 108
 #define MV_COST_WEIGHT_SUB 120
 
-#define INVALID_MV 0x80008000
-
-#if CONFIG_EXT_REFS
-#define MAX_REFS 15
-#else
-#define MAX_REFS 6
-#endif  // CONFIG_EXT_REFS
-
 #define RD_THRESH_MAX_FACT 64
 #define RD_THRESH_INC 1
 
@@ -62,6 +54,7 @@ typedef enum {
   THR_NEARESTL2,
   THR_NEARESTL3,
   THR_NEARESTB,
+  THR_NEARESTA2,
 #endif  // CONFIG_EXT_REFS
   THR_NEARESTA,
   THR_NEARESTG,
@@ -73,6 +66,7 @@ typedef enum {
   THR_NEWL2,
   THR_NEWL3,
   THR_NEWB,
+  THR_NEWA2,
 #endif  // CONFIG_EXT_REFS
   THR_NEWA,
   THR_NEWG,
@@ -82,6 +76,7 @@ typedef enum {
   THR_NEARL2,
   THR_NEARL3,
   THR_NEARB,
+  THR_NEARA2,
 #endif  // CONFIG_EXT_REFS
   THR_NEARA,
   THR_NEARG,
@@ -91,11 +86,10 @@ typedef enum {
   THR_ZEROL2,
   THR_ZEROL3,
   THR_ZEROB,
+  THR_ZEROA2,
 #endif  // CONFIG_EXT_REFS
-  THR_ZEROG,
   THR_ZEROA,
-
-#if CONFIG_EXT_INTER
+  THR_ZEROG,
 
 #if CONFIG_COMPOUND_SINGLEREF
   THR_SR_NEAREST_NEARMV,
@@ -156,6 +150,10 @@ typedef enum {
   THR_COMP_NEAREST_NEARESTL2B,
   THR_COMP_NEAREST_NEARESTL3B,
   THR_COMP_NEAREST_NEARESTGB,
+  THR_COMP_NEAREST_NEARESTLA2,
+  THR_COMP_NEAREST_NEARESTL2A2,
+  THR_COMP_NEAREST_NEARESTL3A2,
+  THR_COMP_NEAREST_NEARESTGA2,
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_NEAREST_NEARESTLL2,
   THR_COMP_NEAREST_NEARESTLL3,
@@ -164,40 +162,13 @@ typedef enum {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // CONFIG_EXT_INTER
-
-  THR_COMP_NEARESTLA,
-#if CONFIG_EXT_REFS
-  THR_COMP_NEARESTL2A,
-  THR_COMP_NEARESTL3A,
-#endif  // CONFIG_EXT_REFS
-  THR_COMP_NEARESTGA,
-#if CONFIG_EXT_REFS
-  THR_COMP_NEARESTLB,
-  THR_COMP_NEARESTL2B,
-  THR_COMP_NEARESTL3B,
-  THR_COMP_NEARESTGB,
-#if CONFIG_EXT_COMP_REFS
-  THR_COMP_NEARESTLL2,
-  THR_COMP_NEARESTLL3,
-  THR_COMP_NEARESTLG,
-  THR_COMP_NEARESTBA,
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-#endif  // CONFIG_EXT_INTER
-
   THR_TM,
 
-#if CONFIG_ALT_INTRA
   THR_SMOOTH,
 #if CONFIG_SMOOTH_HV
   THR_SMOOTH_V,
   THR_SMOOTH_H,
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-
-#if CONFIG_EXT_INTER
 
   THR_COMP_NEAR_NEARLA,
   THR_COMP_NEW_NEARESTLA,
@@ -266,6 +237,38 @@ typedef enum {
   THR_COMP_NEW_NEWGB,
   THR_COMP_ZERO_ZEROGB,
 
+  THR_COMP_NEAR_NEARLA2,
+  THR_COMP_NEW_NEARESTLA2,
+  THR_COMP_NEAREST_NEWLA2,
+  THR_COMP_NEW_NEARLA2,
+  THR_COMP_NEAR_NEWLA2,
+  THR_COMP_NEW_NEWLA2,
+  THR_COMP_ZERO_ZEROLA2,
+
+  THR_COMP_NEAR_NEARL2A2,
+  THR_COMP_NEW_NEARESTL2A2,
+  THR_COMP_NEAREST_NEWL2A2,
+  THR_COMP_NEW_NEARL2A2,
+  THR_COMP_NEAR_NEWL2A2,
+  THR_COMP_NEW_NEWL2A2,
+  THR_COMP_ZERO_ZEROL2A2,
+
+  THR_COMP_NEAR_NEARL3A2,
+  THR_COMP_NEW_NEARESTL3A2,
+  THR_COMP_NEAREST_NEWL3A2,
+  THR_COMP_NEW_NEARL3A2,
+  THR_COMP_NEAR_NEWL3A2,
+  THR_COMP_NEW_NEWL3A2,
+  THR_COMP_ZERO_ZEROL3A2,
+
+  THR_COMP_NEAR_NEARGA2,
+  THR_COMP_NEW_NEARESTGA2,
+  THR_COMP_NEAREST_NEWGA2,
+  THR_COMP_NEW_NEARGA2,
+  THR_COMP_NEAR_NEWGA2,
+  THR_COMP_NEW_NEWGA2,
+  THR_COMP_ZERO_ZEROGA2,
+
 #if CONFIG_EXT_COMP_REFS
   THR_COMP_NEAR_NEARLL2,
   THR_COMP_NEW_NEARESTLL2,
@@ -301,64 +304,6 @@ typedef enum {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // CONFIG_EXT_INTER
-
-  THR_COMP_NEARLA,
-  THR_COMP_NEWLA,
-#if CONFIG_EXT_REFS
-  THR_COMP_NEARL2A,
-  THR_COMP_NEWL2A,
-  THR_COMP_NEARL3A,
-  THR_COMP_NEWL3A,
-#endif  // CONFIG_EXT_REFS
-  THR_COMP_NEARGA,
-  THR_COMP_NEWGA,
-
-#if CONFIG_EXT_REFS
-  THR_COMP_NEARLB,
-  THR_COMP_NEWLB,
-  THR_COMP_NEARL2B,
-  THR_COMP_NEWL2B,
-  THR_COMP_NEARL3B,
-  THR_COMP_NEWL3B,
-  THR_COMP_NEARGB,
-  THR_COMP_NEWGB,
-
-#if CONFIG_EXT_COMP_REFS
-  THR_COMP_NEARLL2,
-  THR_COMP_NEWLL2,
-  THR_COMP_NEARLL3,
-  THR_COMP_NEWLL3,
-  THR_COMP_NEARLG,
-  THR_COMP_NEWLG,
-  THR_COMP_NEARBA,
-  THR_COMP_NEWBA,
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-  THR_COMP_ZEROLA,
-#if CONFIG_EXT_REFS
-  THR_COMP_ZEROL2A,
-  THR_COMP_ZEROL3A,
-#endif  // CONFIG_EXT_REFS
-  THR_COMP_ZEROGA,
-
-#if CONFIG_EXT_REFS
-  THR_COMP_ZEROLB,
-  THR_COMP_ZEROL2B,
-  THR_COMP_ZEROL3B,
-  THR_COMP_ZEROGB,
-
-#if CONFIG_EXT_COMP_REFS
-  THR_COMP_ZEROLL2,
-  THR_COMP_ZEROLL3,
-  THR_COMP_ZEROLG,
-  THR_COMP_ZEROBA,
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-#endif  // CONFIG_EXT_INTER
-
   THR_H_PRED,
   THR_V_PRED,
   THR_D135_PRED,
@@ -368,7 +313,6 @@ typedef enum {
   THR_D117_PRED,
   THR_D45_PRED,
 
-#if CONFIG_EXT_INTER
   THR_COMP_INTERINTRA_ZEROL,
   THR_COMP_INTERINTRA_NEARESTL,
   THR_COMP_INTERINTRA_NEARL,
@@ -396,13 +340,17 @@ typedef enum {
   THR_COMP_INTERINTRA_NEARESTB,
   THR_COMP_INTERINTRA_NEARB,
   THR_COMP_INTERINTRA_NEWB,
+
+  THR_COMP_INTERINTRA_ZEROA2,
+  THR_COMP_INTERINTRA_NEARESTA2,
+  THR_COMP_INTERINTRA_NEARA2,
+  THR_COMP_INTERINTRA_NEWA2,
 #endif  // CONFIG_EXT_REFS
 
   THR_COMP_INTERINTRA_ZEROA,
   THR_COMP_INTERINTRA_NEARESTA,
   THR_COMP_INTERINTRA_NEARA,
   THR_COMP_INTERINTRA_NEWA,
-#endif  // CONFIG_EXT_INTER
   MAX_MODES
 } THR_MODES;
 
@@ -412,6 +360,7 @@ typedef enum {
   THR_LAST2,
   THR_LAST3,
   THR_BWDR,
+  THR_ALTR2,
 #endif  // CONFIG_EXT_REFS
   THR_GOLD,
   THR_ALTR,
@@ -428,9 +377,16 @@ typedef enum {
   THR_COMP_L2B,
   THR_COMP_L3B,
   THR_COMP_GB,
+
+  THR_COMP_LA2,
+  THR_COMP_L2A2,
+  THR_COMP_L3A2,
+  THR_COMP_GA2,
 #endif  // CONFIG_EXT_REFS
 
   THR_INTRA,
+
+  MAX_REFS
 } THR_MODES_SUB8X8;
 
 typedef struct RD_OPT {
@@ -458,10 +414,8 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
   rd_stats->sse = 0;
   rd_stats->skip = 1;
   rd_stats->zero_rate = 0;
+  rd_stats->invalid_rate = 0;
   rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  rd_stats->dist_y = 0;
-#endif
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     rd_stats->txb_coeff_cost[plane] = 0;
@@ -487,10 +441,8 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
   rd_stats->sse = INT64_MAX;
   rd_stats->skip = 0;
   rd_stats->zero_rate = 0;
+  rd_stats->invalid_rate = 1;
   rd_stats->ref_rdcost = INT64_MAX;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  rd_stats->dist_y = INT64_MAX;
-#endif
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     rd_stats->txb_coeff_cost[plane] = INT_MAX;
@@ -515,9 +467,7 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
   rd_stats_dst->dist += rd_stats_src->dist;
   rd_stats_dst->sse += rd_stats_src->sse;
   rd_stats_dst->skip &= rd_stats_src->skip;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  rd_stats_dst->dist_y += rd_stats_src->dist_y;
-#endif
+  rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
 #if CONFIG_RD_DEBUG
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
@@ -539,6 +489,16 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
 #endif
 }
 
+static INLINE int av1_get_coeff_token_cost(int token, int eob_val, int is_first,
+                                           const int *head_cost_table,
+                                           const int *tail_cost_table) {
+  if (eob_val == LAST_EOB) return av1_cost_zero(128);
+  const int comb_symb = 2 * AOMMIN(token, TWO_TOKEN) - eob_val + is_first;
+  int cost = head_cost_table[comb_symb];
+  if (token > ONE_TOKEN) cost += tail_cost_table[token - TWO_TOKEN];
+  return cost;
+}
+
 struct TileInfo;
 struct TileDataEnc;
 struct AV1_COMP;
@@ -554,7 +514,8 @@ void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
                                   unsigned int qstep, int *rate, int64_t *dist);
 
-int av1_get_switchable_rate(const struct AV1_COMP *cpi, const MACROBLOCKD *xd);
+int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
+                            const MACROBLOCKD *xd);
 
 int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
                             int stride);
@@ -583,9 +544,6 @@ void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
                                int (*fact)[MAX_MODES], int rd_thresh, int bsize,
                                int best_mode_index);
 
-void av1_fill_token_costs(av1_coeff_cost *c,
-                          av1_coeff_probs_model (*p)[PLANE_TYPES]);
-
 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
                                       int thresh_fact) {
   return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
@@ -609,6 +567,16 @@ void av1_setup_pred_block(const MACROBLOCKD *xd,
 int av1_get_intra_cost_penalty(int qindex, int qdelta,
                                aom_bit_depth_t bit_depth);
 
+void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
+                         FRAME_CONTEXT *fc);
+
+#if CONFIG_LV_MAP
+void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc);
+#endif
+
+void av1_fill_token_costs_from_cdf(av1_coeff_cost *cost,
+                                   coeff_cdf_model (*cdf)[PLANE_TYPES]);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c
index 43b00b83b..607db9b86 100644
--- a/third_party/aom/av1/encoder/rdopt.c
+++ b/third_party/aom/av1/encoder/rdopt.c
@@ -21,12 +21,16 @@
 #include "aom_ports/mem.h"
 #include "aom_ports/system_state.h"
 
+#if CONFIG_CFL
+#include "av1/common/cfl.h"
+#endif
 #include "av1/common/common.h"
 #include "av1/common/common_data.h"
 #include "av1/common/entropy.h"
 #include "av1/common/entropymode.h"
 #include "av1/common/idct.h"
 #include "av1/common/mvref_common.h"
+#include "av1/common/obmc.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/reconinter.h"
@@ -51,19 +55,15 @@
 #endif
 #include "av1/encoder/hybrid_fwd_txfm.h"
 #include "av1/encoder/mcomp.h"
-#if CONFIG_PALETTE
 #include "av1/encoder/palette.h"
-#endif  // CONFIG_PALETTE
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
 #include "av1/encoder/rdopt.h"
 #include "av1/encoder/tokenize.h"
 #if CONFIG_PVQ
 #include "av1/encoder/pvq_encoder.h"
-#endif  // CONFIG_PVQ
-#if CONFIG_PVQ || CONFIG_DAALA_DIST
 #include "av1/common/pvq.h"
-#endif  // CONFIG_PVQ || CONFIG_DIST_8X8
+#endif  // CONFIG_PVQ
 #if CONFIG_DUAL_FILTER
 #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
 #if USE_EXTRA_FILTER
@@ -82,26 +82,36 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
 
 #if CONFIG_EXT_REFS
 
-#define LAST_FRAME_MODE_MASK                                      \
-  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
-   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
-#define LAST2_FRAME_MODE_MASK                                    \
-  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
-   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
-#define LAST3_FRAME_MODE_MASK                                    \
-  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
-   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
-#define GOLDEN_FRAME_MODE_MASK                                   \
-  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
-   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
-#define BWDREF_FRAME_MODE_MASK                                   \
-  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
-   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
-#define ALTREF_FRAME_MODE_MASK                                   \
-  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
-   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
-
-#else
+#define LAST_FRAME_MODE_MASK                                          \
+  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |     \
+   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define LAST2_FRAME_MODE_MASK                                         \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) |      \
+   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define LAST3_FRAME_MODE_MASK                                         \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) |      \
+   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define GOLDEN_FRAME_MODE_MASK                                       \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) |     \
+   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define BWDREF_FRAME_MODE_MASK                                       \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) |     \
+   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define ALTREF2_FRAME_MODE_MASK                                     \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) |    \
+   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
+   (1 << ALTREF_FRAME))
+#define ALTREF_FRAME_MODE_MASK                                      \
+  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) |    \
+   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
+   (1 << ALTREF2_FRAME))
+
+#else  // !CONFIG_EXT_REFS
 
 #define LAST_FRAME_MODE_MASK \
   ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
@@ -114,11 +124,12 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
 
 #if CONFIG_EXT_REFS
 #if CONFIG_EXT_COMP_REFS
-#define SECOND_REF_FRAME_MASK                                        \
-  ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | (1 << GOLDEN_FRAME) | \
-   (1 << LAST2_FRAME) | 0x01)  // NOLINT
-#else                          // !CONFIG_EXT_COMP_REFS
-#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
+#define SECOND_REF_FRAME_MASK                                         \
+  ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
+   (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
+#else  // !CONFIG_EXT_COMP_REFS
+#define SECOND_REF_FRAME_MASK \
+  ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01)
 #endif  // CONFIG_EXT_COMP_REFS
 #else   // !CONFIG_EXT_REFS
 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
@@ -135,10 +146,16 @@ static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
 // Setting this to 1 will disable trellis optimization within the
 // transform search. Trellis optimization will still be applied
 // in the final encode.
+#ifndef DISABLE_TRELLISQ_SEARCH
 #define DISABLE_TRELLISQ_SEARCH 0
+#endif
 
-const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
-                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
+static const double ADST_FLIP_SVM[8] = {
+  /* vertical */
+  -6.6623, -2.8062, -3.2531, 3.1671,
+  /* horizontal */
+  -7.7051, -3.2234, -3.6193, 3.4533
+};
 
 typedef struct {
   PREDICTION_MODE mode;
@@ -166,6 +183,7 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
   { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
   { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
+  { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
 #endif  // CONFIG_EXT_REFS
   { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
   { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
@@ -177,6 +195,7 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEWMV, { LAST2_FRAME, NONE_FRAME } },
   { NEWMV, { LAST3_FRAME, NONE_FRAME } },
   { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
+  { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
 #endif  // CONFIG_EXT_REFS
   { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
   { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
@@ -186,6 +205,7 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEARMV, { LAST2_FRAME, NONE_FRAME } },
   { NEARMV, { LAST3_FRAME, NONE_FRAME } },
   { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
+  { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
 #endif  // CONFIG_EXT_REFS
   { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
   { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
@@ -195,14 +215,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
   { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
   { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
+  { ZEROMV, { ALTREF2_FRAME, NONE_FRAME } },
 #endif  // CONFIG_EXT_REFS
   { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
   { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
 
 // TODO(zoeliu): May need to reconsider the order on the modes to check
 
-#if CONFIG_EXT_INTER
-
 #if CONFIG_COMPOUND_SINGLEREF
   // Single ref comp mode
   { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } },
@@ -263,6 +282,10 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
   { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
   { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
 
 #if CONFIG_EXT_COMP_REFS
   { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
@@ -272,40 +295,14 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // CONFIG_EXT_INTER
-
-  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
-#if CONFIG_EXT_REFS
-  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_REFS
-  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-#if CONFIG_EXT_REFS
-  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-
-#if CONFIG_EXT_COMP_REFS
-  { NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-#endif  // CONFIG_EXT_INTER
-
   { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
 
-#if CONFIG_ALT_INTRA
   { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
 #if CONFIG_SMOOTH_HV
   { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
   { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
 
-#if CONFIG_EXT_INTER
   { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
   { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
   { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
@@ -373,6 +370,38 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
   { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
 
+  { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
+  { ZERO_ZEROMV, { LAST_FRAME, ALTREF2_FRAME } },
+
+  { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
+  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF2_FRAME } },
+
+  { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
+  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF2_FRAME } },
+
+  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
+
 #if CONFIG_EXT_COMP_REFS
   { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
   { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
@@ -408,64 +437,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
 #endif  // CONFIG_EXT_COMP_REFS
 #endif  // CONFIG_EXT_REFS
 
-#else  // !CONFIG_EXT_INTER
-
-  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
-  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
-#if CONFIG_EXT_REFS
-  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
-  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_REFS
-  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-
-#if CONFIG_EXT_REFS
-  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
-  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-
-#if CONFIG_EXT_COMP_REFS
-  { NEARMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEWMV, { LAST_FRAME, LAST2_FRAME } },
-  { NEARMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEWMV, { LAST_FRAME, LAST3_FRAME } },
-  { NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
-  { NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
-#if CONFIG_EXT_REFS
-  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
-  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_REFS
-  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
-
-#if CONFIG_EXT_REFS
-  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
-  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
-  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
-  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
-
-#if CONFIG_EXT_COMP_REFS
-  { ZEROMV, { LAST_FRAME, LAST2_FRAME } },
-  { ZEROMV, { LAST_FRAME, LAST3_FRAME } },
-  { ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
-  { ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
-#endif  // CONFIG_EXT_COMP_REFS
-#endif  // CONFIG_EXT_REFS
-
-#endif  // CONFIG_EXT_INTER
-
   { H_PRED, { INTRA_FRAME, NONE_FRAME } },
   { V_PRED, { INTRA_FRAME, NONE_FRAME } },
   { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
@@ -475,7 +446,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
   { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
 
-#if CONFIG_EXT_INTER
   { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
   { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
   { NEARMV, { LAST_FRAME, INTRA_FRAME } },
@@ -503,37 +473,34 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
   { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
   { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
   { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
+
+  { ZEROMV, { ALTREF2_FRAME, INTRA_FRAME } },
+  { NEARESTMV, { ALTREF2_FRAME, INTRA_FRAME } },
+  { NEARMV, { ALTREF2_FRAME, INTRA_FRAME } },
+  { NEWMV, { ALTREF2_FRAME, INTRA_FRAME } },
 #endif  // CONFIG_EXT_REFS
 
   { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
   { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
   { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
   { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
-#endif  // CONFIG_EXT_INTER
 };
 
 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
-  DC_PRED,       H_PRED,        V_PRED,
-#if CONFIG_ALT_INTRA
-  SMOOTH_PRED,
-#endif  // CONFIG_ALT_INTRA
-  TM_PRED,
-#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+  DC_PRED,       H_PRED,        V_PRED,    SMOOTH_PRED, TM_PRED,
+#if CONFIG_SMOOTH_HV
   SMOOTH_V_PRED, SMOOTH_H_PRED,
-#endif  // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
-  D135_PRED,     D207_PRED,     D153_PRED, D63_PRED, D117_PRED, D45_PRED,
+#endif  // CONFIG_SMOOTH_HV
+  D135_PRED,     D207_PRED,     D153_PRED, D63_PRED,    D117_PRED, D45_PRED,
 };
 
 #if CONFIG_CFL
 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
-  UV_DC_PRED,       UV_H_PRED,        UV_V_PRED,
-#if CONFIG_ALT_INTRA
-  UV_SMOOTH_PRED,
-#endif  // CONFIG_ALT_INTRA
-  UV_TM_PRED,
-#if CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+  UV_DC_PRED,       UV_CFL_PRED,      UV_H_PRED,
+  UV_V_PRED,        UV_SMOOTH_PRED,   UV_TM_PRED,
+#if CONFIG_SMOOTH_HV
   UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
-#endif  // CONFIG_ALT_INTRA && CONFIG_SMOOTH_HV
+#endif  // CONFIG_SMOOTH_HV
   UV_D135_PRED,     UV_D207_PRED,     UV_D153_PRED,
   UV_D63_PRED,      UV_D117_PRED,     UV_D45_PRED,
 };
@@ -541,7 +508,6 @@ static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
 #define uv_rd_search_mode_order intra_rd_search_mode_order
 #endif  // CONFIG_CFL
 
-#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
 static INLINE int write_uniform_cost(int n, int v) {
   const int l = get_unsigned_bits(n);
   const int m = (1 << l) - n;
@@ -551,7 +517,6 @@ static INLINE int write_uniform_cost(int n, int v) {
   else
     return l * av1_cost_bit(128, 0);
 }
-#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
 
 // constants for prune 1 and prune 2 decision boundaries
 #define FAST_EXT_TX_CORR_MID 0.0
@@ -559,7 +524,82 @@ static INLINE int write_uniform_cost(int n, int v) {
 #define FAST_EXT_TX_CORR_MARGIN 0.5
 #define FAST_EXT_TX_EDST_MARGIN 0.3
 
-#if CONFIG_DAALA_DIST
+static unsigned pixel_dist_visible_only(
+    const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
+    const int src_stride, const uint8_t *dst, const int dst_stride,
+    const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
+    int visible_cols) {
+  unsigned sse;
+
+  if (txb_rows == visible_rows && txb_cols == visible_cols
+#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
+      && tx_bsize < BLOCK_SIZES
+#endif
+      ) {
+    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+    return sse;
+  }
+#if CONFIG_HIGHBITDEPTH
+  const MACROBLOCKD *xd = &x->e_mbd;
+
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
+                                             visible_cols, visible_rows);
+    return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
+  }
+#else
+  (void)x;
+#endif  // CONFIG_HIGHBITDEPTH
+  sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
+                         visible_rows);
+  return sse;
+}
+
+#if CONFIG_DIST_8X8
+static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
+                                    int sstride, int coeff_shift) {
+  uint64_t svar = 0;
+  uint64_t dvar = 0;
+  uint64_t sum_s = 0;
+  uint64_t sum_d = 0;
+  uint64_t sum_s2 = 0;
+  uint64_t sum_d2 = 0;
+  uint64_t sum_sd = 0;
+  uint64_t dist = 0;
+
+  int i, j;
+  for (i = 0; i < 8; i++) {
+    for (j = 0; j < 8; j++) {
+      sum_s += src[i * sstride + j];
+      sum_d += dst[i * dstride + j];
+      sum_s2 += src[i * sstride + j] * src[i * sstride + j];
+      sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
+      sum_sd += src[i * sstride + j] * dst[i * dstride + j];
+    }
+  }
+  /* Compute the variance -- the calculation cannot go negative. */
+  svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
+  dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
+
+  // Tuning of jm's original dering distortion metric used in CDEF tool,
+  // suggested by jm
+  const uint64_t a = 4;
+  const uint64_t b = 2;
+  const uint64_t c1 = (400 * a << 2 * coeff_shift);
+  const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
+
+  dist =
+      (uint64_t)floor(.5 +
+                      (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) /
+                          (sqrt(svar * (double)dvar + c2)));
+
+  // Calibrate dist to have similar rate for the same QP with MSE only
+  // distortion (as in master branch)
+  dist = (uint64_t)((float)dist * 0.75);
+
+  return dist;
+}
+
 static int od_compute_var_4x4(uint16_t *x, int stride) {
   int sum;
   int s2;
@@ -617,7 +657,7 @@ static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
     }
   }
   /* We use a different variance statistic depending on whether activity
-     masking is used, since the harmonic mean appeared slghtly worse with
+     masking is used, since the harmonic mean appeared slightly worse with
      masking off. The calibration constant just ensures that we preserve the
      rate compared to activity=1. */
   if (use_activity_masking) {
@@ -688,268 +728,241 @@ static double od_compute_dist_common(int activity_masking, uint16_t *x,
 
 static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
                               int bsize_h, int qindex) {
-  int i;
-  double sum;
-  sum = 0;
-
   assert(bsize_w >= 8 && bsize_h >= 8);
-
 #if CONFIG_PVQ
   int activity_masking = 1;
 #else
   int activity_masking = 0;
 #endif
-  {
-    int j;
-    DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
-    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
-    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
-    int mid = OD_DIST_LP_MID;
-    for (i = 0; i < bsize_h; i++) {
-      for (j = 0; j < bsize_w; j++) {
-        e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
-      }
+  int i, j;
+  DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
+  DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
+  DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
+  for (i = 0; i < bsize_h; i++) {
+    for (j = 0; j < bsize_w; j++) {
+      e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
     }
-    for (i = 0; i < bsize_h; i++) {
-      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
-      tmp[i * bsize_w + bsize_w - 1] =
-          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
-      for (j = 1; j < bsize_w - 1; j++) {
-        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
-                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
-      }
+  }
+  int mid = OD_DIST_LP_MID;
+  for (i = 0; i < bsize_h; i++) {
+    tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
+    tmp[i * bsize_w + bsize_w - 1] =
+        mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
+    for (j = 1; j < bsize_w - 1; j++) {
+      tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
+                             e[i * bsize_w + j + 1];
     }
-    sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
-                                 qindex, tmp, e_lp);
   }
-  return sum;
+  return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+                                qindex, tmp, e_lp);
 }
 
 static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
                                    int bsize_h, int qindex) {
-  int i;
-  double sum;
-  sum = 0;
-
   assert(bsize_w >= 8 && bsize_h >= 8);
-
 #if CONFIG_PVQ
   int activity_masking = 1;
 #else
   int activity_masking = 0;
 #endif
-  {
-    int j;
-    DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
-    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
-    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
-    int mid = OD_DIST_LP_MID;
-    for (i = 0; i < bsize_h; i++) {
-      for (j = 0; j < bsize_w; j++) {
-        y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
-      }
+  DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
+  DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
+  DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
+  int i, j;
+  for (i = 0; i < bsize_h; i++) {
+    for (j = 0; j < bsize_w; j++) {
+      y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
     }
-    for (i = 0; i < bsize_h; i++) {
-      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
-      tmp[i * bsize_w + bsize_w - 1] =
-          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
-      for (j = 1; j < bsize_w - 1; j++) {
-        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
-                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
-      }
+  }
+  int mid = OD_DIST_LP_MID;
+  for (i = 0; i < bsize_h; i++) {
+    tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
+    tmp[i * bsize_w + bsize_w - 1] =
+        mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
+    for (j = 1; j < bsize_w - 1; j++) {
+      tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
+                             e[i * bsize_w + j + 1];
     }
-    sum = od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
-                                 qindex, tmp, e_lp);
   }
-  return sum;
+  return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
+                                qindex, tmp, e_lp);
 }
-#endif  // CONFIG_DAALA_DIST
 
-#if CONFIG_DIST_8X8
-#define NEW_FUTURE_DIST 0
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
                      const uint8_t *src, int src_stride, const uint8_t *dst,
                      int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
                      int bsh, int visible_w, int visible_h, int qindex) {
   int64_t d = 0;
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
   int i, j;
+  const MACROBLOCKD *xd = &x->e_mbd;
 
   DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
   DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
-  (void)cpi;
-  (void)tx_bsize;
-#endif  // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
 
-#if !CONFIG_HIGHBITDEPTH
-  (void)xd;
-#endif
+  assert(bsw >= 8);
+  assert(bsh >= 8);
+  assert((bsw & 0x07) == 0);
+  assert((bsh & 0x07) == 0);
 
-#if !CONFIG_DAALA_DIST
-  (void)qindex;
-#endif
-
-#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
-  (void)xd;
-  (void)bsw, (void)bsh;
-  (void)visible_w, (void)visible_h;
-#endif
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+  if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
+      x->tune_metric == AOM_TUNE_DAALA_DIST) {
 #if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (j = 0; j < bsh; j++)
-      for (i = 0; i < bsw; i++)
-        orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-
-    if ((bsw == visible_w) && (bsh == visible_h)) {
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       for (j = 0; j < bsh; j++)
         for (i = 0; i < bsw; i++)
-          rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
-    } else {
-      for (j = 0; j < visible_h; j++)
-        for (i = 0; i < visible_w; i++)
-          rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+          orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
 
-      if (visible_w < bsw) {
+      if ((bsw == visible_w) && (bsh == visible_h)) {
         for (j = 0; j < bsh; j++)
-          for (i = visible_w; i < bsw; i++)
-            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-      }
-
-      if (visible_h < bsh) {
-        for (j = visible_h; j < bsh; j++)
           for (i = 0; i < bsw; i++)
-            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+      } else {
+        for (j = 0; j < visible_h; j++)
+          for (i = 0; i < visible_w; i++)
+            rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
+
+        if (visible_w < bsw) {
+          for (j = 0; j < bsh; j++)
+            for (i = visible_w; i < bsw; i++)
+              rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+        }
+
+        if (visible_h < bsh) {
+          for (j = visible_h; j < bsh; j++)
+            for (i = 0; i < bsw; i++)
+              rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+        }
       }
-    }
-  } else {
+    } else {
 #endif
-    for (j = 0; j < bsh; j++)
-      for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
-
-    if ((bsw == visible_w) && (bsh == visible_h)) {
       for (j = 0; j < bsh; j++)
-        for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
-    } else {
-      for (j = 0; j < visible_h; j++)
-        for (i = 0; i < visible_w; i++)
-          rec[j * bsw + i] = dst[j * dst_stride + i];
+        for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
 
-      if (visible_w < bsw) {
+      if ((bsw == visible_w) && (bsh == visible_h)) {
         for (j = 0; j < bsh; j++)
-          for (i = visible_w; i < bsw; i++)
-            rec[j * bsw + i] = src[j * src_stride + i];
-      }
+          for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
+      } else {
+        for (j = 0; j < visible_h; j++)
+          for (i = 0; i < visible_w; i++)
+            rec[j * bsw + i] = dst[j * dst_stride + i];
+
+        if (visible_w < bsw) {
+          for (j = 0; j < bsh; j++)
+            for (i = visible_w; i < bsw; i++)
+              rec[j * bsw + i] = src[j * src_stride + i];
+        }
 
-      if (visible_h < bsh) {
-        for (j = visible_h; j < bsh; j++)
-          for (i = 0; i < bsw; i++) rec[j * bsw + i] = src[j * src_stride + i];
+        if (visible_h < bsh) {
+          for (j = visible_h; j < bsh; j++)
+            for (i = 0; i < bsw; i++)
+              rec[j * bsw + i] = src[j * src_stride + i];
+        }
       }
-    }
 #if CONFIG_HIGHBITDEPTH
-  }
+    }
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+  }
 
-#if CONFIG_DAALA_DIST
-  d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
-#elif NEW_FUTURE_DIST
-  // Call new 8x8-wise distortion function here, for example
-  for (i = 0; i < bsh; i += 8) {
-    for (j = 0; j < bsw; j += 8) {
-      d +=
-          av1_compute_dist_8x8(&orig[i * bsw + j], &rec[i * bsw + j], bsw, bsh);
+  if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
+    d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
+  } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
+    int coeff_shift = AOMMAX(xd->bd - 8, 0);
+
+    for (i = 0; i < bsh; i += 8) {
+      for (j = 0; j < bsw; j += 8) {
+        d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
+                                 bsw, coeff_shift);
+      }
     }
+#if CONFIG_HIGHBITDEPTH
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+      d = ((uint64_t)d) >> 2 * coeff_shift;
+#endif
+  } else {
+    // Otherwise, MSE by default
+    d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
+                                tx_bsize, bsh, bsw, visible_h, visible_w);
   }
-#else
-  // Otherwise, MSE by default
-  unsigned sse;
-  // TODO(Any): Use even faster function which does not calculate variance
-  cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
-  d = sse;
-#endif  // CONFIG_DAALA_DIST
 
   return d;
 }
 
-static int64_t av1_dist_8x8_diff(const MACROBLOCKD *xd, const uint8_t *src,
+static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
                                  int src_stride, const int16_t *diff,
                                  int diff_stride, int bsw, int bsh,
                                  int visible_w, int visible_h, int qindex) {
   int64_t d = 0;
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
   int i, j;
+  const MACROBLOCKD *xd = &x->e_mbd;
 
   DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
   DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
-#endif  // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
 
-#if !CONFIG_HIGHBITDEPTH
-  (void)xd;
-#endif
+  assert(bsw >= 8);
+  assert(bsh >= 8);
+  assert((bsw & 0x07) == 0);
+  assert((bsh & 0x07) == 0);
 
-#if !CONFIG_DAALA_DIST
-  (void)qindex;
-#endif
-
-#if !CONFIG_DAALA_DIST || !NEW_FUTURE_DIST
-  (void)xd;
-  (void)src, (void)src_stride;
-  (void)bsw, (void)bsh;
-  (void)visible_w, (void)visible_h;
-#endif
-
-#if CONFIG_DAALA_DIST || NEW_FUTURE_DIST
+  if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
+      x->tune_metric == AOM_TUNE_DAALA_DIST) {
 #if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    for (j = 0; j < bsh; j++)
-      for (i = 0; i < bsw; i++)
-        orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
-  } else {
+    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+      for (j = 0; j < bsh; j++)
+        for (i = 0; i < bsw; i++)
+          orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
+    } else {
 #endif
-    for (j = 0; j < bsh; j++)
-      for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
+      for (j = 0; j < bsh; j++)
+        for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
 #if CONFIG_HIGHBITDEPTH
-  }
+    }
 #endif  // CONFIG_HIGHBITDEPTH
 
-  if ((bsw == visible_w) && (bsh == visible_h)) {
-    for (j = 0; j < bsh; j++)
-      for (i = 0; i < bsw; i++) diff16[j * bsw + i] = diff[j * diff_stride + i];
-  } else {
-    for (j = 0; j < visible_h; j++)
-      for (i = 0; i < visible_w; i++)
-        diff16[j * bsw + i] = diff[j * diff_stride + i];
-
-    if (visible_w < bsw) {
+    if ((bsw == visible_w) && (bsh == visible_h)) {
       for (j = 0; j < bsh; j++)
-        for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
-    }
+        for (i = 0; i < bsw; i++)
+          diff16[j * bsw + i] = diff[j * diff_stride + i];
+    } else {
+      for (j = 0; j < visible_h; j++)
+        for (i = 0; i < visible_w; i++)
+          diff16[j * bsw + i] = diff[j * diff_stride + i];
 
-    if (visible_h < bsh) {
-      for (j = visible_h; j < bsh; j++)
-        for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+      if (visible_w < bsw) {
+        for (j = 0; j < bsh; j++)
+          for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
+      }
+
+      if (visible_h < bsh) {
+        for (j = visible_h; j < bsh; j++)
+          for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
+      }
     }
   }
-#endif  // CONFIG_DAALA_DIST || NEW_FUTURE_DIST
 
-#if CONFIG_DAALA_DIST
-  d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
-#elif NEW_FUTURE_DIST
-  // Call new 8x8-wise distortion function (with diff inpu) here, for example
-  for (i = 0; i < bsh; i += 8) {
-    for (j = 0; j < bsw; j += 8) {
-      d += av1_compute_dist_8x8_diff(&orig[i * bsw + j], &diff16[i * bsw + j],
-                                     bsw, bsh);
+  if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
+    d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
+  } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
+    int coeff_shift = AOMMAX(xd->bd - 8, 0);
+    DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
+
+    for (i = 0; i < bsh; i++) {
+      for (j = 0; j < bsw; j++) {
+        dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
+      }
     }
+
+    for (i = 0; i < bsh; i += 8) {
+      for (j = 0; j < bsw; j += 8) {
+        d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
+                                 bsw, coeff_shift);
+      }
+    }
+    // Don't scale 'd' for HBD since it will be done by caller side for diff
+    // input
+  } else {
+    // Otherwise, MSE by default
+    d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
   }
-#else
-  // Otherwise, MSE by default
-  d = aom_sum_squares_2d_i16(diff, diff_stride, bsw, bsh);
-#endif  // CONFIG_DAALA_DIST
 
   return d;
 }
@@ -1169,6 +1182,17 @@ static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
                           pd->dst.stride);
 }
 
+#if CONFIG_EXT_TX
+// 1D Transforms used in inter set, this needs to be changed if
+// ext_tx_used_inter is changed
+static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
+  { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
+#if CONFIG_MRC_TX
+  { 1, 0, 0, 1 },
+#endif  // CONFIG_MRC_TX
+};
+#endif  // CONFIG_EXT_TX
+
 static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
                           const MACROBLOCKD *const xd, int tx_set) {
 #if CONFIG_EXT_TX
@@ -1392,22 +1416,18 @@ static int64_t av1_block_error2_c(const tran_low_t *coeff,
                                   const tran_low_t *ref, intptr_t block_size,
                                   int64_t *ssz) {
   int64_t error;
+  int64_t ssz_trash;
   // Use the existing sse codes for calculating distortion of decoded signal:
   // i.e. (orig - decoded)^2
-  error = av1_block_error_fp(coeff, dqcoeff, block_size);
+  error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
   // prediction residue^2 = (orig - ref)^2
-  *ssz = av1_block_error_fp(coeff, ref, block_size);
+  *ssz = av1_block_error(coeff, ref, block_size, &ssz_trash);
   return error;
 }
 #endif  // CONFIG_HIGHBITDEPTH
 #endif  // CONFIG_PVQ
 
 #if !CONFIG_PVQ || CONFIG_VAR_TX
-/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
- * decide whether to include cost of a trailing EOB node or not (i.e. we
- * can skip this if the last coefficient in this transform block, e.g. the
- * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
- * were non-zero). */
 #if !CONFIG_LV_MAP
 static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                        int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
@@ -1421,17 +1441,19 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
   const uint16_t *band_count = &band_count_table[tx_size][1];
   const int eob = p->eobs[block];
   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  const int tx_size_ctx = txsize_sqr_map[tx_size];
-  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
-      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
+  const TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size];
   uint8_t token_cache[MAX_TX_SQUARE];
   int pt = combine_entropy_contexts(*a, *l);
   int c, cost;
   const int16_t *scan = scan_order->scan;
   const int16_t *nb = scan_order->neighbors;
   const int ref = is_inter_block(mbmi);
-  aom_prob *blockz_probs =
-      cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
+  int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
+      x->token_head_costs[tx_size_ctx][type][ref];
+  int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
+      x->token_tail_costs[tx_size_ctx][type][ref];
+  const int seg_eob = av1_get_tx_eob(&cm->seg, mbmi->segment_id, tx_size);
+  int eob_val;
 
 #if CONFIG_HIGHBITDEPTH
   const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
@@ -1446,8 +1468,8 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
   (void)cm;
 
   if (eob == 0) {
-    // single eob token
-    cost = av1_cost_bit(blockz_probs[pt], 0);
+    // block zero
+    cost = (*head_token_costs)[pt][0];
   } else {
     if (use_fast_coef_costing) {
       int band_left = *band_count++;
@@ -1456,10 +1478,13 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
       int v = qcoeff[0];
       int16_t prev_t;
       cost = av1_get_token_cost(v, &prev_t, cat6_bits);
-      cost += (*token_costs)[!prev_t][pt][prev_t];
+      eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
+      cost += av1_get_coeff_token_cost(
+          prev_t, eob_val, 1, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
 
       token_cache[0] = av1_pt_energy_class[prev_t];
-      ++token_costs;
+      ++head_token_costs;
+      ++tail_token_costs;
 
       // ac tokens
       for (c = 1; c < eob; c++) {
@@ -1468,17 +1493,18 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
 
         v = qcoeff[rc];
         cost += av1_get_token_cost(v, &t, cat6_bits);
-        cost += (*token_costs)[!t][!prev_t][t];
+        eob_val =
+            (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
+        cost += av1_get_coeff_token_cost(t, eob_val, 0,
+                                         (*head_token_costs)[!prev_t],
+                                         (*tail_token_costs)[!prev_t]);
         prev_t = t;
         if (!--band_left) {
           band_left = *band_count++;
-          ++token_costs;
+          ++head_token_costs;
+          ++tail_token_costs;
         }
       }
-
-      // eob token
-      cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
-
     } else {  // !use_fast_coef_costing
       int band_left = *band_count++;
 
@@ -1486,10 +1512,13 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
       int v = qcoeff[0];
       int16_t tok;
       cost = av1_get_token_cost(v, &tok, cat6_bits);
-      cost += (*token_costs)[!tok][pt][tok];
+      eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
+      cost += av1_get_coeff_token_cost(tok, eob_val, 1, (*head_token_costs)[pt],
+                                       (*tail_token_costs)[pt]);
 
       token_cache[0] = av1_pt_energy_class[tok];
-      ++token_costs;
+      ++head_token_costs;
+      ++tail_token_costs;
 
       // ac tokens
       for (c = 1; c < eob; c++) {
@@ -1498,17 +1527,17 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
         v = qcoeff[rc];
         cost += av1_get_token_cost(v, &tok, cat6_bits);
         pt = get_coef_context(nb, token_cache, c);
-        cost += (*token_costs)[!tok][pt][tok];
+        eob_val =
+            (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
+        cost += av1_get_coeff_token_cost(
+            tok, eob_val, 0, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
         token_cache[rc] = av1_pt_energy_class[tok];
         if (!--band_left) {
           band_left = *band_count++;
-          ++token_costs;
+          ++head_token_costs;
+          ++tail_token_costs;
         }
       }
-
-      // eob token
-      pt = get_coef_context(nb, token_cache, c);
-      cost += (*token_costs)[0][pt][EOB_TOKEN];
     }
   }
 
@@ -1520,10 +1549,25 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
                     int blk_row, int blk_col, int block, TX_SIZE tx_size,
                     const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
                     const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
+  const AV1_COMMON *const cm = &cpi->common;
 #if !CONFIG_LV_MAP
   (void)blk_row;
   (void)blk_col;
-  const AV1_COMMON *const cm = &cpi->common;
+#if CONFIG_MRC_TX
+  const MACROBLOCKD *xd = &x->e_mbd;
+  const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  const TX_TYPE tx_type = av1_get_tx_type(xd->plane[plane].plane_type, xd,
+                                          blk_row, blk_col, block, tx_size);
+  const int is_inter = is_inter_block(mbmi);
+  if (tx_type == MRC_DCT && ((is_inter && SIGNAL_MRC_MASK_INTER) ||
+                             (!is_inter && SIGNAL_MRC_MASK_INTRA))) {
+    const int mrc_mask_cost =
+        av1_cost_color_map(x, plane, block, mbmi->sb_type, tx_size, MRC_MAP);
+    return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
+                       use_fast_coef_costing) +
+           mrc_mask_cost;
+  }
+#endif
   return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
                      use_fast_coef_costing);
 #else  // !CONFIG_LV_MAP
@@ -1545,7 +1589,7 @@ int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
 
   TXB_CTX txb_ctx;
   get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
-  return av1_cost_coeffs_txb(cpi, x, plane, blk_row, blk_col, block, tx_size,
+  return av1_cost_coeffs_txb(cm, x, plane, blk_row, blk_col, block, tx_size,
                              &txb_ctx);
 #endif  // !CONFIG_LV_MAP
 }
@@ -1600,31 +1644,16 @@ static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
   assert(visible_cols > 0);
 
 #if CONFIG_DIST_8X8
-  if (plane == 0 && txb_cols >= 8 && txb_rows >= 8)
-    return av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, tx_bsize,
-                        txb_cols, txb_rows, visible_cols, visible_rows,
-                        x->qindex);
+  if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
+    return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
+                                  tx_bsize, txb_cols, txb_rows, visible_cols,
+                                  visible_rows, x->qindex);
 #endif  // CONFIG_DIST_8X8
 
-#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
-  if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
-      tx_bsize < BLOCK_SIZES) {
-#else
-  if (txb_rows == visible_rows && txb_cols == visible_cols) {
-#endif
-    unsigned sse;
-    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
-    return sse;
-  }
-#if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
-                                           visible_cols, visible_rows);
-    return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
-  }
-#endif  // CONFIG_HIGHBITDEPTH
-  unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
-                                  visible_cols, visible_rows);
+  unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
+                                         dst_stride, tx_bsize, txb_rows,
+                                         txb_cols, visible_rows, visible_cols);
+
   return sse;
 }
 
@@ -1649,8 +1678,8 @@ static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
                      NULL, &visible_cols, &visible_rows);
 
 #if CONFIG_DIST_8X8
-  if (plane == 0 && txb_width >= 8 && txb_height >= 8)
-    return av1_dist_8x8_diff(xd, src, src_stride, diff, diff_stride, txb_width,
+  if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
+    return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
                              txb_height, visible_cols, visible_rows, x->qindex);
   else
 #endif
@@ -1658,7 +1687,6 @@ static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
                                   visible_rows);
 }
 
-#if CONFIG_PALETTE || CONFIG_INTRABC
 int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
   int val_count[256];
   memset(val_count, 0, sizeof(val_count));
@@ -1693,7 +1721,6 @@ int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
   return n;
 }
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
 
 void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                     BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
@@ -1707,7 +1734,11 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
   const struct macroblockd_plane *const pd = &xd->plane[plane];
 #endif  // CONFIG_DIST_8X8
 
-  if (cpi->sf.use_transform_domain_distortion && !CONFIG_DIST_8X8) {
+  if (cpi->sf.use_transform_domain_distortion
+#if CONFIG_DIST_8X8
+      && !x->using_dist_8x8
+#endif
+      ) {
     // Transform domain distortion computation is more efficient as it does
     // not involve an inverse transform, but it is less accurate.
     const int buffer_length = tx_size_2d[tx_size];
@@ -1721,25 +1752,22 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
 #if CONFIG_HIGHBITDEPTH
     const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
     *out_dist = av1_highbd_block_error2_c(coeff, dqcoeff, ref_coeff,
-                                          buffer_length, &this_sse, bd) >>
-                shift;
+                                          buffer_length, &this_sse, bd);
 #else
-    *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
-                                   &this_sse) >>
-                shift;
+    *out_dist =
+        av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length, &this_sse);
 #endif  // CONFIG_HIGHBITDEPTH
 #else   // !CONFIG_PVQ
 #if CONFIG_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
       *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
-                                         &this_sse, xd->bd) >>
-                  shift;
+                                         &this_sse, xd->bd);
     else
 #endif
-      *out_dist =
-          av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
+      *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
 #endif  // CONFIG_PVQ
-    *out_sse = this_sse >> shift;
+    *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
+    *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
   } else {
     const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
 #if !CONFIG_PVQ || CONFIG_DIST_8X8
@@ -1808,17 +1836,23 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
         (void)dst;
 #endif  // !CONFIG_PVQ
 
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+        uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
         const PLANE_TYPE plane_type = get_plane_type(plane);
         TX_TYPE tx_type =
             av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
         av1_inverse_transform_block(xd, dqcoeff,
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                     xd->mi[0]->mbmi.mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                    mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                     tx_type, tx_size, recon, MAX_TX_SIZE, eob);
 
 #if CONFIG_DIST_8X8
-        if (plane == 0 && (bsw < 8 || bsh < 8)) {
+        if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
           // Save decoded pixels for inter block in pd->pred to avoid
           // block_8x8_rd_txfm_daala_dist() need to produce them
           // by calling av1_inverse_transform_block() again.
@@ -1864,12 +1898,23 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   const AV1_COMP *cpi = args->cpi;
   ENTROPY_CONTEXT *a = args->t_above + blk_col;
   ENTROPY_CONTEXT *l = args->t_left + blk_row;
-#if !CONFIG_TXK_SEL
   const AV1_COMMON *cm = &cpi->common;
-#endif
   int64_t rd1, rd2, rd;
   RD_STATS this_rd_stats;
 
+#if CONFIG_DIST_8X8
+  // If sub8x8 tx, 8x8 or larger partition, and luma channel,
+  // dist-8x8 disables early skip, because the distortion metrics for
+  // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
+  // (new distortion metric) are different.
+  // Exception is: dist-8x8 is enabled but still MSE is used,
+  // i.e. "--tune=" encoder option is not used.
+  int disable_early_skip =
+      x->using_dist_8x8 && plane == 0 && plane_bsize >= BLOCK_8X8 &&
+      (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
+      x->tune_metric != AOM_TUNE_PSNR;
+#endif  // CONFIG_DIST_8X8
+
 #if !CONFIG_SUPERTX && !CONFIG_VAR_TX
   assert(tx_size == av1_get_tx_size(plane, xd));
 #endif  // !CONFIG_SUPERTX
@@ -1879,26 +1924,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   if (args->exit_early) return;
 
   if (!is_inter_block(mbmi)) {
-    av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
-#if CONFIG_DPCM_INTRA
-    const int block_raster_idx =
-        av1_block_index_to_raster_order(tx_size, block);
-    const PREDICTION_MODE mode = (plane == AOM_PLANE_Y)
-                                     ? get_y_mode(xd->mi[0], block_raster_idx)
-                                     : get_uv_mode(mbmi->uv_mode);
-    TX_TYPE tx_type =
-        av1_get_tx_type((plane == AOM_PLANE_Y) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
-                        xd, blk_row, blk_col, block, tx_size);
-    if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
-      int8_t skip;
-      av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
-                                  plane_bsize, tx_size, tx_type, a, l, &skip);
-      av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
-                     tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
-                     OUTPUT_HAS_DECODED_PIXELS);
-      goto CALCULATE_RD;
-    }
-#endif  // CONFIG_DPCM_INTRA
+    av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row,
+                                   tx_size);
     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
   }
 
@@ -1921,21 +1948,32 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     tmp_dist =
-        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
-        shift;
+        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
   else
 #endif
-    tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+    tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp);
+  tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
 
-  if (RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
+  if (
+#if CONFIG_DIST_8X8
+      disable_early_skip ||
+#endif
+      RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
     av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
-                   a, l);
+                   a, l, 1);
   } else {
     args->exit_early = 1;
     return;
   }
 #endif  // DISABLE_TRELLISQ_SEARCH
 
+#if CONFIG_MRC_TX
+  if (mbmi->tx_type == MRC_DCT && !mbmi->valid_mrc_mask) {
+    args->exit_early = 1;
+    return;
+  }
+#endif  // CONFIG_MRC_TX
+
   if (!is_inter_block(mbmi)) {
     struct macroblock_plane *const p = &x->plane[plane];
     av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
@@ -1949,19 +1987,15 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
                    OUTPUT_HAS_PREDICTED_PIXELS);
   }
 #if CONFIG_CFL
-  if (plane == AOM_PLANE_Y && x->cfl_store_y) {
-    struct macroblockd_plane *const pd = &xd->plane[plane];
-    const int dst_stride = pd->dst.stride;
-    uint8_t *dst =
-        &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
-    // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is
-    // intra predicted.
-    cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize);
+  if (plane == AOM_PLANE_Y && xd->cfl->store_y) {
+#if CONFIG_CHROMA_SUB8X8
+    assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
+#else
+    assert(!is_inter_block(mbmi));
+#endif  // CONFIG_CHROMA_SUB8X8
+    cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
   }
-#endif
-#if CONFIG_DPCM_INTRA
-CALCULATE_RD : {}
-#endif  // CONFIG_DPCM_INTRA
+#endif  // CONFIG_CFL
   rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
   if (args->this_rd + rd > args->best_rd) {
     args->exit_early = 1;
@@ -2008,16 +2042,12 @@ CALCULATE_RD : {}
   args->this_rd += rd;
 
 #if CONFIG_DIST_8X8
-  if (!(plane == 0 && plane_bsize >= BLOCK_8X8 &&
-        (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) {
+  if (!disable_early_skip)
 #endif
     if (args->this_rd > args->best_rd) {
       args->exit_early = 1;
       return;
     }
-#if CONFIG_DIST_8X8
-  }
-#endif
 }
 
 #if CONFIG_DIST_8X8
@@ -2033,8 +2063,10 @@ static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
   const uint8_t *src = &p->src.buf[0];
   const uint8_t *dst = &pd->dst.buf[0];
   const int16_t *pred = &pd->pred[0];
-  const int bw = block_size_wide[bsize];
-  const int bh = block_size_high[bsize];
+  int bw = block_size_wide[bsize];
+  int bh = block_size_high[bsize];
+  int visible_w = bw;
+  int visible_h = bh;
 
   int i, j;
   int64_t rd, rd1, rd2;
@@ -2044,6 +2076,9 @@ static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
   assert((bw & 0x07) == 0);
   assert((bh & 0x07) == 0);
 
+  get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
+                     &visible_h);
+
 #if CONFIG_HIGHBITDEPTH
   uint8_t *pred8;
   DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]);
@@ -2064,22 +2099,30 @@ static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
   } else {
 #endif
     for (j = 0; j < bh; j++)
-      for (i = 0; i < bw; i++) pred8[j * bw + i] = pred[j * bw + i];
+      for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
 #if CONFIG_HIGHBITDEPTH
   }
 #endif  // CONFIG_HIGHBITDEPTH
 
-  tmp1 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, bw, bsize, bw, bh, bw,
-                      bh, qindex);
-  tmp2 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride, bsize, bw, bh,
-                      bw, bh, qindex);
+  tmp1 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw,
+                                bh, visible_w, visible_h, qindex);
+  tmp2 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize,
+                                bw, bh, visible_w, visible_h, qindex);
 
   if (!is_inter_block(mbmi)) {
+    if (x->tune_metric == AOM_TUNE_PSNR) {
+      assert(args->rd_stats.sse == tmp1 * 16);
+      assert(args->rd_stats.dist == tmp2 * 16);
+    }
     args->rd_stats.sse = (int64_t)tmp1 * 16;
     args->rd_stats.dist = (int64_t)tmp2 * 16;
   } else {
     // For inter mode, the decoded pixels are provided in pd->pred,
     // while the predicted pixels are in dst.
+    if (x->tune_metric == AOM_TUNE_PSNR) {
+      assert(args->rd_stats.sse == tmp2 * 16);
+      assert(args->rd_stats.dist == tmp1 * 16);
+    }
     args->rd_stats.sse = (int64_t)tmp2 * 16;
     args->rd_stats.dist = (int64_t)tmp1 * 16;
   }
@@ -2116,7 +2159,8 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
   av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
                                          &args);
 #if CONFIG_DIST_8X8
-  if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
+  if (x->using_dist_8x8 && !args.exit_early && plane == 0 &&
+      bsize >= BLOCK_8X8 &&
       (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
     dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
 #endif
@@ -2174,23 +2218,14 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
-  const int tx_select = cm->tx_mode == TX_MODE_SELECT &&
-#if CONFIG_EXT_PARTITION_TYPES
-                        // Currently these block shapes can only use 4x4
-                        // transforms
-                        mbmi->sb_type != BLOCK_4X16 &&
-                        mbmi->sb_type != BLOCK_16X4 &&
-#endif
-                        mbmi->sb_type >= BLOCK_8X8;
-
-  if (tx_select) {
+  if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
     const int is_inter = is_inter_block(mbmi);
-    const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
-                                     : intra_tx_size_cat_lookup[bsize];
+    const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+                                         : intra_tx_size_cat_lookup[bsize];
     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
     const int depth = tx_size_to_depth(coded_tx_size);
     const int tx_size_ctx = get_tx_size_context(xd);
-    int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+    int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
     if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
       r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
@@ -2202,12 +2237,38 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
   }
 }
 
-// #TODO(angiebird): use this function whenever it's possible
-int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
-                     BLOCK_SIZE bsize, int plane, TX_SIZE tx_size,
-                     TX_TYPE tx_type) {
+#if CONFIG_LGT_FROM_PRED
+int av1_lgt_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
+                 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+                 TX_SIZE tx_size, int use_lgt) {
+  if (plane > 0) return 0;
+  const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  const int is_inter = is_inter_block(mbmi);
+
+  assert(is_lgt_allowed(mbmi->mode, tx_size));
+  if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
+      !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+    const int ext_tx_set =
+        get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
+    if (LGT_FROM_PRED_INTRA && !is_inter && ext_tx_set > 0 &&
+        ALLOW_INTRA_EXT_TX)
+      return x->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode][use_lgt];
+    if (LGT_FROM_PRED_INTRA && is_inter && ext_tx_set > 0)
+      return x->inter_lgt_cost[txsize_sqr_map[tx_size]][use_lgt];
+  }
+  return 0;
+}
+#endif  // CONFIG_LGT_FROM_PRED
+
+// TODO(angiebird): use this function whenever it's possible
+int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
+                     const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+                     TX_SIZE tx_size, TX_TYPE tx_type) {
   if (plane > 0) return 0;
 
+#if CONFIG_LGT_FROM_PRED
+  assert(!xd->mi[0]->mbmi.use_lgt);
+#endif
 #if CONFIG_VAR_TX
   tx_size = get_min_tx_size(tx_size);
 #endif
@@ -2215,31 +2276,31 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
   const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_inter = is_inter_block(mbmi);
 #if CONFIG_EXT_TX
-  const AV1_COMMON *cm = &cpi->common;
   if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
       !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
     const int ext_tx_set =
         get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
     if (is_inter) {
       if (ext_tx_set > 0)
-        return cpi
+        return x
             ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
     } else {
       if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
-        return cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
-                                       [mbmi->mode][tx_type];
+        return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
+                                     [mbmi->mode][tx_type];
     }
   }
 #else
   (void)bsize;
+  (void)cm;
   if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
       !FIXED_TX_TYPE) {
     if (is_inter) {
-      return cpi->inter_tx_type_costs[tx_size][tx_type];
+      return x->inter_tx_type_costs[tx_size][tx_type];
     } else {
-      return cpi->intra_tx_type_costs[tx_size]
-                                     [intra_mode_to_tx_type_context[mbmi->mode]]
-                                     [tx_type];
+      return x->intra_tx_type_costs[tx_size]
+                                   [intra_mode_to_tx_type_context[mbmi->mode]]
+                                   [tx_type];
     }
   }
 #endif  // CONFIG_EXT_TX
@@ -2247,7 +2308,7 @@ int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
 }
 static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
                         RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
-                        TX_TYPE tx_type, int tx_size) {
+                        TX_TYPE tx_type, TX_SIZE tx_size) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -2278,7 +2339,15 @@ static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
   if (rd_stats->rate == INT_MAX) return INT64_MAX;
 #if !CONFIG_TXK_SEL
   int plane = 0;
-  rd_stats->rate += av1_tx_type_cost(cpi, xd, bs, plane, tx_size, tx_type);
+#if CONFIG_LGT_FROM_PRED
+  if (is_lgt_allowed(mbmi->mode, tx_size))
+    rd_stats->rate +=
+        av1_lgt_cost(cm, x, xd, bs, plane, tx_size, mbmi->use_lgt);
+  if (!mbmi->use_lgt)
+    rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
+#else
+  rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
+#endif  // CONFIG_LGT_FROM_PRED
 #endif
 
   if (rd_stats->skip) {
@@ -2316,8 +2385,14 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
 #if CONFIG_MRC_TX
   // MRC_DCT only implemented for TX_32X32 so only include this tx in
   // the search for TX_32X32
-  if (tx_type == MRC_DCT && tx_size != TX_32X32) return 1;
+  if (tx_type == MRC_DCT &&
+      ((is_inter && !USE_MRC_INTER) || (!is_inter && !USE_MRC_INTRA) ||
+       tx_size != TX_32X32))
+    return 1;
 #endif  // CONFIG_MRC_TX
+#if CONFIG_LGT_FROM_PRED
+  if (mbmi->use_lgt && mbmi->ref_mv_idx > 0) return 1;
+#endif  // CONFIG_LGT_FROM_PRED
   if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
   if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
     return 1;
@@ -2330,10 +2405,10 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
   if (max_tx_size >= TX_32X32 && tx_size == TX_4X4) return 1;
 #if CONFIG_EXT_TX
   const AV1_COMMON *const cm = &cpi->common;
-  int ext_tx_set =
-      get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
+  const TxSetType tx_set_type =
+      get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
+  if (!av1_ext_tx_used[tx_set_type][tx_type]) return 1;
   if (is_inter) {
-    if (!ext_tx_used_inter[ext_tx_set][tx_type]) return 1;
     if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
       if (!do_tx_type_search(tx_type, prune)) return 1;
     }
@@ -2341,7 +2416,6 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
     if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
       if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
     }
-    if (!ext_tx_used_intra[ext_tx_set][tx_type]) return 1;
   }
 #else   // CONFIG_EXT_TX
   if (tx_size >= TX_32X32 && tx_type != DCT_DCT) return 1;
@@ -2352,8 +2426,7 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
   return 0;
 }
 
-#if CONFIG_EXT_INTER && \
-    (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA)
+#if (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA)
 static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
                                    MACROBLOCK *x, int *r, int64_t *d, int *s,
                                    int64_t *sse, int64_t ref_best_rd) {
@@ -2366,7 +2439,7 @@ static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
   *sse = rd_stats.sse;
   return rd;
 }
-#endif  // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
+#endif  // (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
 
 static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    RD_STATS *rd_stats, int64_t ref_best_rd,
@@ -2382,9 +2455,14 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
   const int is_inter = is_inter_block(mbmi);
   int prune = 0;
   const int plane = 0;
-#if CONFIG_EXT_TX
-  int ext_tx_set;
-#endif  // CONFIG_EXT_TX
+#if CONFIG_LGT_FROM_PRED
+  int is_lgt_best = 0;
+  int search_lgt = is_inter
+                       ? LGT_FROM_PRED_INTER && !x->use_default_inter_tx_type &&
+                             !cpi->sf.tx_type_search.prune_mode > NO_PRUNE
+                       : LGT_FROM_PRED_INTRA && !x->use_default_intra_tx_type &&
+                             ALLOW_INTRA_EXT_TX;
+#endif  // CONFIG_LGT_FROM_PRED
   av1_invalid_rd_stats(rd_stats);
 
   mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
@@ -2392,8 +2470,10 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
   mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
 #endif  // CONFIG_VAR_TX
 #if CONFIG_EXT_TX
-  ext_tx_set =
+  int ext_tx_set =
       get_ext_tx_set(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
+  const TxSetType tx_set_type =
+      get_ext_tx_set_type(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
 #endif  // CONFIG_EXT_TX
 
   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
@@ -2414,12 +2494,12 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_PVQ
 
     for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+      if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
       RD_STATS this_rd_stats;
       if (is_inter) {
         if (x->use_default_inter_tx_type &&
             tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
           continue;
-        if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
         if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
           if (!do_tx_type_search(tx_type, prune)) continue;
         }
@@ -2430,7 +2510,6 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
         if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
           if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
         }
-        if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
       }
 
       mbmi->tx_type = tx_type;
@@ -2441,7 +2520,7 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
       od_encode_rollback(&x->daala_enc, &pre_buf);
 #endif  // CONFIG_PVQ
       if (this_rd_stats.rate == INT_MAX) continue;
-      av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
+      av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
 
       if (this_rd_stats.skip)
         this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
@@ -2464,6 +2543,33 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
 #if CONFIG_PVQ
     od_encode_rollback(&x->daala_enc, &post_buf);
 #endif  // CONFIG_PVQ
+#if CONFIG_LGT_FROM_PRED
+    // search LGT
+    if (search_lgt && is_lgt_allowed(mbmi->mode, mbmi->tx_size) &&
+        !cm->reduced_tx_set_used) {
+      RD_STATS this_rd_stats;
+      mbmi->use_lgt = 1;
+      txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
+                       mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+      if (this_rd_stats.rate != INT_MAX) {
+        av1_lgt_cost(cm, x, xd, bs, plane, mbmi->tx_size, 1);
+        if (this_rd_stats.skip)
+          this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
+        else
+          this_rd =
+              RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
+        if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
+            !this_rd_stats.skip)
+          this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
+        if (this_rd < best_rd) {
+          best_rd = this_rd;
+          is_lgt_best = 1;
+          *rd_stats = this_rd_stats;
+        }
+      }
+      mbmi->use_lgt = 0;
+    }
+#endif  // CONFIG_LGT_FROM_PRED
   } else {
     mbmi->tx_type = DCT_DCT;
     txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
@@ -2484,7 +2590,7 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
                        mbmi->tx_size, cpi->sf.use_fast_coef_costing);
       if (this_rd_stats.rate == INT_MAX) continue;
 
-      av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
+      av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
       if (is_inter) {
         if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
             !do_tx_type_search(tx_type, prune))
@@ -2511,6 +2617,9 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
   }
 #endif  // CONFIG_EXT_TX
   mbmi->tx_type = best_tx_type;
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = is_lgt_best;
+#endif  // CONFIG_LGT_FROM_PRED
 }
 
 static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
@@ -2549,6 +2658,11 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   TX_SIZE best_tx_size = max_tx_size;
   TX_TYPE best_tx_type = DCT_DCT;
+#if CONFIG_LGT_FROM_PRED
+  int breakout = 0;
+  int is_lgt_best = 0;
+  mbmi->use_lgt = 0;
+#endif  // CONFIG_LGT_FROM_PRED
 #if CONFIG_TXK_SEL
   TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
 #endif  // CONFIG_TXK_SEL
@@ -2584,12 +2698,12 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
       const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
       RD_STATS this_rd_stats;
-      int ext_tx_set =
-          get_ext_tx_set(rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
-      if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
-          (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
+      const TxSetType tx_set_type = get_ext_tx_set_type(
+          rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
+      if (av1_ext_tx_used[tx_set_type][tx_type]) {
         rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
                       rect_tx_size);
+        ref_best_rd = AOMMIN(rd, ref_best_rd);
         if (rd < best_rd) {
 #if CONFIG_TXK_SEL
           memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
@@ -2605,6 +2719,21 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
 #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
     }
+#if CONFIG_LGT_FROM_PRED
+    const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
+    if (is_lgt_allowed(mbmi->mode, rect_tx_size) && !cm->reduced_tx_set_used) {
+      RD_STATS this_rd_stats;
+      mbmi->use_lgt = 1;
+      rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, rect_tx_size);
+      if (rd < best_rd) {
+        is_lgt_best = 1;
+        best_tx_size = rect_tx_size;
+        best_rd = rd;
+        *rd_stats = this_rd_stats;
+      }
+      mbmi->use_lgt = 0;
+    }
+#endif  // CONFIG_LGT_FROM_PRED
   }
 
 #if CONFIG_RECT_TX_EXT
@@ -2632,10 +2761,9 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
       const TX_SIZE tx_size = quarter_txsize_lookup[bs];
       RD_STATS this_rd_stats;
-      int ext_tx_set =
-          get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
-      if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
-          (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
+      const TxSetType tx_set_type =
+          get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
+      if (av1_ext_tx_used[tx_set_type][tx_type]) {
         rd =
             txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
         if (rd < best_rd) {
@@ -2644,6 +2772,9 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
                  sizeof(best_txk_type[0]) * num_blk);
 #endif
           best_tx_type = tx_type;
+#if CONFIG_LGT_FROM_PRED
+          is_lgt_best = 0;
+#endif
           best_tx_size = tx_size;
           best_rd = rd;
           *rd_stats = this_rd_stats;
@@ -2654,6 +2785,21 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
 #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
     }
+#if CONFIG_LGT_FROM_PRED
+    if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used) {
+      const TX_SIZE tx_size = quarter_txsize_lookup[bs];
+      RD_STATS this_rd_stats;
+      mbmi->use_lgt = 1;
+      rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, tx_size);
+      if (rd < best_rd) {
+        is_lgt_best = 1;
+        best_tx_size = tx_size;
+        best_rd = rd;
+        *rd_stats = this_rd_stats;
+      }
+      mbmi->use_lgt = 0;
+    }
+#endif  // CONFIG_LGT_FROM_PRED
   }
 #endif  // CONFIG_RECT_TX_EXT
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -2692,15 +2838,23 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (cpi->sf.tx_size_search_breakout &&
           (rd == INT64_MAX ||
            (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
-           (n < (int)max_tx_size && rd > last_rd)))
+           (n < (int)max_tx_size && rd > last_rd))) {
+#if CONFIG_LGT_FROM_PRED
+        breakout = 1;
+#endif
         break;
+      }
 
       last_rd = rd;
+      ref_best_rd = AOMMIN(rd, ref_best_rd);
       if (rd < best_rd) {
 #if CONFIG_TXK_SEL
         memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
 #endif
         best_tx_type = tx_type;
+#if CONFIG_LGT_FROM_PRED
+        is_lgt_best = 0;
+#endif
         best_tx_size = n;
         best_rd = rd;
         *rd_stats = this_rd_stats;
@@ -2710,9 +2864,28 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
 #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
     }
+#if CONFIG_LGT_FROM_PRED
+    mbmi->use_lgt = 1;
+    if (is_lgt_allowed(mbmi->mode, n) && !skip_txfm_search(cpi, x, bs, 0, n) &&
+        !breakout) {
+      RD_STATS this_rd_stats;
+      rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, n);
+      if (rd < best_rd) {
+        is_lgt_best = 1;
+        best_tx_size = n;
+        best_rd = rd;
+        *rd_stats = this_rd_stats;
+      }
+    }
+    mbmi->use_lgt = 0;
+#endif  // CONFIG_LGT_FROM_PRED
   }
   mbmi->tx_size = best_tx_size;
   mbmi->tx_type = best_tx_type;
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = is_lgt_best;
+  assert(!is_lgt_best || is_lgt_allowed(mbmi->mode, mbmi->tx_size));
+#endif  // CONFIG_LGT_FROM_PRED
 #if CONFIG_TXK_SEL
   memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256);
 #endif
@@ -2768,6 +2941,7 @@ static int conditional_skipintra(PREDICTION_MODE mode,
 // Model based RD estimation for luma intra blocks.
 static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
                                BLOCK_SIZE bsize, int mode_cost) {
+  const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   assert(!is_inter_block(mbmi));
@@ -2785,7 +2959,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
   int block = 0;
   for (row = 0; row < max_blocks_high; row += stepr) {
     for (col = 0; col < max_blocks_wide; col += stepc) {
-      av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
+      av1_predict_intra_block_facade(cm, xd, 0, block, col, row, tx_size);
       block += step;
     }
   }
@@ -2816,7 +2990,6 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
   return this_rd;
 }
 
-#if CONFIG_PALETTE
 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
 // new_height'. Extra rows and columns are filled in by copying last valid
 // row/column.
@@ -2875,6 +3048,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mic->mbmi;
   assert(!is_inter_block(mbmi));
+  assert(bsize >= BLOCK_8X8);
   int this_rate, colors, n;
   const int src_stride = x->plane[0].src.stride;
   const uint8_t *const src = x->plane[0].src.buf;
@@ -2897,9 +3071,8 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_FILTER_INTRA
 
   if (colors > 1 && colors <= 64) {
-    int r, c, i, j, k, palette_mode_cost;
+    int r, c, i, k, palette_mode_cost;
     const int max_itr = 50;
-    uint8_t color_order[PALETTE_MAX_SIZE];
     float *const data = x->palette_buffer->kmeans_data_buf;
     float centroids[PALETTE_MAX_SIZE];
     float lb, ub, val;
@@ -2950,11 +3123,8 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
     if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
 
 #if CONFIG_PALETTE_DELTA_ENCODING
-    const MODE_INFO *above_mi = xd->above_mi;
-    const MODE_INFO *left_mi = xd->left_mi;
     uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-    const int n_cache =
-        av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
+    const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
 
     for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
@@ -2998,7 +3168,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
                                block_height);
       palette_mode_cost =
           dc_mode_cost +
-          cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
+          x->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
           write_uniform_cost(k, color_map[0]) +
           av1_cost_bit(
               av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
@@ -3008,16 +3178,8 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
                                                     color_cache, n_cache,
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
                                                     cpi->common.bit_depth);
-      for (i = 0; i < rows; ++i) {
-        for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          int color_idx;
-          const int color_ctx = av1_get_palette_color_index_context(
-              color_map, block_width, i, j, k, color_order, &color_idx);
-          assert(color_idx >= 0 && color_idx < k);
-          palette_mode_cost += cpi->palette_y_color_cost[k - PALETTE_MIN_SIZE]
-                                                        [color_ctx][color_idx];
-        }
-      }
+      palette_mode_cost +=
+          av1_cost_color_map(x, 0, 0, bsize, mbmi->tx_size, PALETTE_MAP);
       this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
       if (*best_model_rd != INT64_MAX &&
           this_model_rd > *best_model_rd + (*best_model_rd >> 1))
@@ -3027,7 +3189,8 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
       if (tokenonly_rd_stats.rate == INT_MAX) continue;
       this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
       this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-      if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
+      if (!xd->lossless[mbmi->segment_id] &&
+          block_signals_txsize(mbmi->sb_type)) {
         tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
       }
       if (this_rd < *best_rd) {
@@ -3046,12 +3209,11 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
 
   if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
     memcpy(color_map, best_palette_color_map,
-           rows * cols * sizeof(best_palette_color_map[0]));
+           block_width * block_height * sizeof(best_palette_color_map[0]));
   }
   *mbmi = *best_mbmi;
   return rate_overhead;
 }
-#endif  // CONFIG_PALETTE
 
 static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
     const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
@@ -3124,9 +3286,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
 
   xd->mi[0]->mbmi.tx_size = tx_size;
 
-#if CONFIG_PALETTE
   xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -3172,8 +3332,8 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
                          block == 0 || block == 2));
           xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
           av1_predict_intra_block(
-              xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
-              dst_stride, dst, dst_stride, col + idx, row + idy, 0);
+              cm, xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode,
+              dst, dst_stride, dst, dst_stride, col + idx, row + idy, 0);
 #if !CONFIG_PVQ
           aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
                                     src_stride, dst, dst_stride, xd->bd);
@@ -3220,9 +3380,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
             if (!skip)
 #endif
               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                           mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                          BLOCK_OFFSET(xd->mrc_mask, block),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                           DCT_DCT, tx_size, dst, dst_stride,
                                           p->eobs[block]);
           } else {
@@ -3242,7 +3405,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
             av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
                             tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
             av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size,
-                           tempa + idx, templ + idy);
+                           tempa + idx, templ + idy, 1);
 #endif  // DISABLE_TRELLISQ_SEARCH
             ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
                                      scan_order, tempa + idx, templ + idy,
@@ -3273,9 +3436,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
             if (!skip)
 #endif
               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                           mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                          BLOCK_OFFSET(xd->mrc_mask, block),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                           tx_type, tx_size, dst, dst_stride,
                                           p->eobs[block]);
             cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
@@ -3374,7 +3540,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
         assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
                        block == 0 || block == 2));
         xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
-        av1_predict_intra_block(xd, pd->width, pd->height,
+        av1_predict_intra_block(cm, xd, pd->width, pd->height,
                                 txsize_to_bsize[tx_size], mode, dst, dst_stride,
                                 dst, dst_stride,
 #if CONFIG_CB4X4
@@ -3416,7 +3582,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
                         BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
 
         av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx,
-                       templ + idy);
+                       templ + idy, 1);
 #endif  // DISABLE_TRELLISQ_SEARCH
         ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order,
                                  tempa + idx, templ + idy,
@@ -3459,9 +3625,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
           if (!skip)
 #endif  // CONFIG_PVQ
             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                         mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                        BLOCK_OFFSET(xd->mrc_mask, block),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                         tx_type, tx_size, dst, dst_stride,
                                         p->eobs[block]);
           unsigned int tmp;
@@ -3477,9 +3646,12 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
           if (!skip)
 #endif  // CONFIG_PVQ
             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
-#if CONFIG_LGT
+#if CONFIG_LGT_FROM_PRED
                                         mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                                        BLOCK_OFFSET(xd->mrc_mask, block),
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
                                         DCT_DCT, tx_size, dst, dst_stride,
                                         p->eobs[block]);
         }
@@ -3544,7 +3716,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
   int64_t total_distortion = 0;
   int tot_rate_y = 0;
   int64_t total_rd = 0;
-  const int *bmode_costs = cpi->mbmode_cost[0];
+  const int *bmode_costs = mb->mbmode_cost[0];
   const int is_lossless = xd->lossless[mbmi->segment_id];
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
@@ -3565,6 +3737,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
   // expense of speed.
   mbmi->tx_type = DCT_DCT;
   mbmi->tx_size = tx_size;
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+#endif
 
   if (y_skip) *y_skip = 1;
 
@@ -3583,15 +3758,23 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
         const PREDICTION_MODE L =
             av1_left_block_mode(mic, left_mi, pred_block_idx);
 
-        bmode_costs = cpi->y_mode_costs[A][L];
+#if CONFIG_KF_CTX
+        const int above_ctx = intra_mode_context[A];
+        const int left_ctx = intra_mode_context[L];
+        bmode_costs = mb->y_mode_costs[above_ctx][left_ctx];
+#else
+        bmode_costs = mb->y_mode_costs[A][L];
+#endif
       }
       this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
           cpi, mb, idy, idx, &best_mode, bmode_costs,
           xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
           &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
-#if !CONFIG_DIST_8X8
-      if (this_rd >= best_rd - total_rd) return INT64_MAX;
-#endif  // !CONFIG_DIST_8X8
+#if CONFIG_DIST_8X8
+      if (!cpi->oxcf.using_dist_8x8)
+#endif
+        if (this_rd >= best_rd - total_rd) return INT64_MAX;
+
       total_rd += this_rd;
       cost += r;
       total_distortion += d;
@@ -3609,7 +3792,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
   mbmi->mode = mic->bmi[3].as_mode;
 
 #if CONFIG_DIST_8X8
-  {
+  if (cpi->oxcf.using_dist_8x8) {
     const struct macroblock_plane *p = &mb->plane[0];
     const struct macroblockd_plane *pd = &xd->plane[0];
     const int src_stride = p->src.stride;
@@ -3617,11 +3800,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
     uint8_t *src = p->src.buf;
     uint8_t *dst = pd->dst.buf;
 
-#if CONFIG_PVQ
-    use_activity_masking = mb->daala_enc.use_activity_masking;
-#endif  // CONFIG_PVQ
     // Daala-defined distortion computed for the block of 8x8 pixels
-    total_distortion = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+    total_distortion = av1_dist_8x8(cpi, mb, src, src_stride, dst, dst_stride,
                                     BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
                        << 4;
   }
@@ -3634,14 +3814,20 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
         1) {
       const int eset =
           get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
-      rate_tx_type = cpi->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
-                                             [mbmi->mode][mbmi->tx_type];
+#if CONFIG_LGT_FROM_PRED
+      if (LGT_FROM_PRED_INTRA && is_lgt_allowed(mbmi->mode, tx_size))
+        rate_tx_type += mb->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode]
+                                          [mbmi->use_lgt];
+      if (!LGT_FROM_PRED_INTRA || !mbmi->use_lgt)
+#endif  // CONFIG_LGT_FROM_PRED
+        rate_tx_type += mb->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
+                                               [mbmi->mode][mbmi->tx_type];
     }
 #else
     rate_tx_type =
-        cpi->intra_tx_type_costs[txsize_sqr_map[tx_size]]
-                                [intra_mode_to_tx_type_context[mbmi->mode]]
-                                [mbmi->tx_type];
+        mb->intra_tx_type_costs[txsize_sqr_map[tx_size]]
+                               [intra_mode_to_tx_type_context[mbmi->mode]]
+                               [mbmi->tx_type];
 #endif  // CONFIG_EXT_TX
     assert(mbmi->tx_size == tx_size);
     cost += rate_tx_type;
@@ -3671,13 +3857,14 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
   TX_SIZE best_tx_size = TX_4X4;
   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
   TX_TYPE best_tx_type;
+#if CONFIG_LGT_FROM_PRED
+  int use_lgt_when_selected;
+#endif
 
   av1_zero(filter_intra_mode_info);
   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
   mbmi->mode = DC_PRED;
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[0] = 0;
-#endif  // CONFIG_PALETTE
 
   for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
     int this_rate;
@@ -3702,6 +3889,9 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
       best_tx_size = mic->mbmi.tx_size;
       filter_intra_mode_info = mbmi->filter_intra_mode_info;
       best_tx_type = mic->mbmi.tx_type;
+#if CONFIG_LGT_FROM_PRED
+      use_lgt_when_selected = mic->mbmi.use_lgt;
+#endif
       *rate = this_rate;
       *rate_tokenonly = tokenonly_rd_stats.rate;
       *distortion = tokenonly_rd_stats.dist;
@@ -3713,6 +3903,9 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
   if (filter_intra_selected_flag) {
     mbmi->mode = DC_PRED;
     mbmi->tx_size = best_tx_size;
+#if CONFIG_LGT_FROM_PRED
+    mbmi->use_lgt = use_lgt_when_selected;
+#endif
     mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
         filter_intra_mode_info.use_filter_intra_mode[0];
     mbmi->filter_intra_mode_info.filter_intra_mode[0] =
@@ -3733,6 +3926,9 @@ static int64_t calc_rd_given_intra_angle(
     int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
     RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
     TX_TYPE *best_tx_type,
+#if CONFIG_LGT_FROM_PRED
+    int *use_lgt_when_selected,
+#endif
 #if CONFIG_INTRA_INTERP
     INTRA_FILTER *best_filter,
 #endif  // CONFIG_INTRA_INTERP
@@ -3765,6 +3961,9 @@ static int64_t calc_rd_given_intra_angle(
     *best_filter = mbmi->intra_filter;
 #endif  // CONFIG_INTRA_INTERP
     *best_tx_type = mbmi->tx_type;
+#if CONFIG_LGT_FROM_PRED
+    *use_lgt_when_selected = mbmi->use_lgt;
+#endif
     *rate = this_rate;
     rd_stats->rate = tokenonly_rd_stats.rate;
     rd_stats->dist = tokenonly_rd_stats.dist;
@@ -3794,6 +3993,9 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
   int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
   TX_SIZE best_tx_size = mic->mbmi.tx_size;
   TX_TYPE best_tx_type = mbmi->tx_type;
+#if CONFIG_LGT_FROM_PRED
+  int use_lgt_when_selected = mbmi->use_lgt;
+#endif
 
   for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
 
@@ -3810,12 +4012,15 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
         this_rd = calc_rd_given_intra_angle(
             cpi, x, bsize,
 #if CONFIG_INTRA_INTERP
-            mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
+            mode_cost + x->intra_filter_cost[intra_filter_ctx][filter],
 #else
           mode_cost,
 #endif  // CONFIG_INTRA_INTERP
             best_rd_in, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
             rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
+#if CONFIG_LGT_FROM_PRED
+            &use_lgt_when_selected,
+#endif
 #if CONFIG_INTRA_INTERP
             &best_filter,
 #endif  // CONFIG_INTRA_INTERP
@@ -3851,12 +4056,15 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
           calc_rd_given_intra_angle(
               cpi, x, bsize,
 #if CONFIG_INTRA_INTERP
-              mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
+              mode_cost + x->intra_filter_cost[intra_filter_ctx][filter],
 #else
             mode_cost,
 #endif  // CONFIG_INTRA_INTERP
               best_rd, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
               rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
+#if CONFIG_LGT_FROM_PRED
+              &use_lgt_when_selected,
+#endif
 #if CONFIG_INTRA_INTERP
               &best_filter,
 #endif  // CONFIG_INTRA_INTERP
@@ -3876,10 +4084,13 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
         mic->mbmi.intra_filter = filter;
         this_rd = calc_rd_given_intra_angle(
             cpi, x, bsize,
-            mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
-            best_rd, best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
-            &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
-            &best_rd, best_model_rd);
+            mode_cost + x->intra_filter_cost[intra_filter_ctx][filter], best_rd,
+            best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
+            &best_angle_delta, &best_tx_size, &best_tx_type,
+#if CONFIG_LGT_FROM_PRED
+            &use_lgt_when_selected,
+#endif
+            &best_filter, &best_rd, best_model_rd);
       }
     }
   }
@@ -3891,6 +4102,9 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
   mic->mbmi.intra_filter = best_filter;
 #endif  // CONFIG_INTRA_INTERP
   mbmi->tx_type = best_tx_type;
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = use_lgt_when_selected;
+#endif
   return best_rd;
 }
 
@@ -3919,9 +4133,7 @@ static const uint8_t gradient_to_angle_bin[2][7][16] = {
 /* clang-format off */
 static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
   0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
-#if CONFIG_ALT_INTRA
   0,
-#endif  // CONFIG_ALT_INTRA
 };
 /* clang-format on */
 
@@ -4064,16 +4276,12 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
 #endif  // CONFIG_FILTER_INTRA
   const int *bmode_costs;
-#if CONFIG_PALETTE
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  uint8_t *best_palette_color_map =
-      cpi->common.allow_screen_content_tools
-          ? x->palette_buffer->best_palette_color_map
-          : NULL;
   int palette_y_mode_ctx = 0;
   const int try_palette =
-      cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
-#endif  // CONFIG_PALETTE
+      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
+  uint8_t *best_palette_color_map =
+      try_palette ? x->palette_buffer->best_palette_color_map : NULL;
   const MODE_INFO *above_mi = xd->above_mi;
   const MODE_INFO *left_mi = xd->left_mi;
   const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
@@ -4085,7 +4293,14 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   od_encode_checkpoint(&x->daala_enc, &pre_buf);
   od_encode_checkpoint(&x->daala_enc, &post_buf);
 #endif  // CONFIG_PVQ
-  bmode_costs = cpi->y_mode_costs[A][L];
+
+#if CONFIG_KF_CTX
+  const int above_ctx = intra_mode_context[A];
+  const int left_ctx = intra_mode_context[L];
+  bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
+#else
+  bmode_costs = x->y_mode_costs[A][L];
+#endif
 
 #if CONFIG_EXT_INTRA
   mbmi->angle_delta[0] = 0;
@@ -4101,14 +4316,17 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
 #endif  // CONFIG_FILTER_INTRA
-#if CONFIG_PALETTE
   pmi->palette_size[0] = 0;
-  if (above_mi)
-    palette_y_mode_ctx +=
-        (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
-  if (left_mi)
-    palette_y_mode_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
-#endif  // CONFIG_PALETTE
+  if (try_palette) {
+    if (above_mi) {
+      palette_y_mode_ctx +=
+          (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+    }
+    if (left_mi) {
+      palette_y_mode_ctx +=
+          (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+    }
+  }
 
   if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
     x->use_default_intra_tx_type = 1;
@@ -4160,21 +4378,20 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 
     this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
 
-    if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
+    if (!xd->lossless[mbmi->segment_id] &&
+        block_signals_txsize(mbmi->sb_type)) {
       // super_block_yrd above includes the cost of the tx_size in the
       // tokenonly rate, but for intra blocks, tx_size is always coded
       // (prediction granularity), so we account for it in the full rate,
       // not the tokenonly rate.
       this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
     }
-#if CONFIG_PALETTE
     if (try_palette && mbmi->mode == DC_PRED) {
       this_rate +=
           av1_cost_bit(av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
                                                       [palette_y_mode_ctx],
                        0);
     }
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
     if (mbmi->mode == DC_PRED)
       this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
@@ -4185,8 +4402,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       const int p_angle =
           mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
       if (av1_is_intra_filter_switchable(p_angle))
-        this_rate +=
-            cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
+        this_rate += x->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
 #endif  // CONFIG_INTRA_INTERP
       if (av1_use_angle_delta(bsize)) {
         this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
@@ -4194,6 +4410,10 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       }
     }
 #endif  // CONFIG_EXT_INTRA
+#if CONFIG_INTRABC
+    if (bsize >= BLOCK_8X8 && cpi->common.allow_screen_content_tools)
+      this_rate += x->intrabc_cost[0];
+#endif  // CONFIG_INTRABC
     this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
 #if CONFIG_FILTER_INTRA
     if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
@@ -4221,14 +4441,12 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   od_encode_rollback(&x->daala_enc, &post_buf);
 #endif  // CONFIG_PVQ
 
-#if CONFIG_PALETTE
   if (try_palette) {
     rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
                               bmode_costs[DC_PRED], &best_mbmi,
                               best_palette_color_map, &best_rd, &best_model_rd,
                               rate, rate_tokenonly, distortion, skippable);
   }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
   if (beat_best_rd) {
@@ -4317,6 +4535,9 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 
   int64_t tmp;
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
   PLANE_TYPE plane_type = get_plane_type(plane);
   TX_TYPE tx_type =
       av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
@@ -4346,6 +4567,22 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 
   int coeff_ctx = get_entropy_context(tx_size, a, l);
 
+  tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
+                        plane_bsize, txm_bsize);
+
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
+#endif  // CONFIG_HIGHBITDEPTH
+  rd_stats->sse += tmp << 4;
+
+  if (rd_stats->invalid_rate) {
+    rd_stats->dist += tmp << 4;
+    rd_stats->rate += rd_stats->zero_rate;
+    rd_stats->skip = 1;
+    return;
+  }
+
 // TODO(any): Use av1_dist_block to compute distortion
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -4373,43 +4610,59 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
   const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   const int buffer_length = tx_size_2d[tx_size];
-  int64_t tmp_dist;
+  int64_t tmp_dist, tmp_sse;
+#if CONFIG_DIST_8X8
+  int disable_early_skip =
+      x->using_dist_8x8 && plane == 0 && plane_bsize >= BLOCK_8X8 &&
+      (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
+      x->tune_metric != AOM_TUNE_PSNR;
+#endif  // CONFIG_DIST_8X8
+
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     tmp_dist =
-        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd) >>
-        shift;
+        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
   else
 #endif
-    tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp) >> shift;
+    tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp_sse);
+
+  tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
 
-  if (RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
+#if CONFIG_MRC_TX
+  if (tx_type == MRC_DCT && !xd->mi[0]->mbmi.valid_mrc_mask) {
+    av1_invalid_rd_stats(rd_stats);
+    return;
+  }
+#endif  // CONFIG_MRC_TX
+  if (
+#if CONFIG_DIST_8X8
+      disable_early_skip ||
+#endif
+      RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
     av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
-                   a, l);
+                   a, l, 1);
+  } else {
+    rd_stats->rate += rd_stats->zero_rate;
+    rd_stats->dist += tmp << 4;
+    rd_stats->skip = 1;
+    rd_stats->invalid_rate = 1;
+    return;
   }
 #endif  // DISABLE_TRELLISQ_SEARCH
 
-  tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
-                        plane_bsize, txm_bsize);
-
-#if CONFIG_HIGHBITDEPTH
-  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
-#endif  // CONFIG_HIGHBITDEPTH
-  rd_stats->sse += tmp * 16;
   const int eob = p->eobs[block];
 
-#if CONFIG_LGT
-  PREDICTION_MODE mode = get_prediction_mode(xd->mi[0], plane, tx_size, block);
-  av1_inverse_transform_block(xd, dqcoeff, mode, tx_type, tx_size, rec_buffer,
-                              MAX_TX_SIZE, eob);
-#else
-  av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
-                              MAX_TX_SIZE, eob);
+  av1_inverse_transform_block(xd, dqcoeff,
+#if CONFIG_LGT_FROM_PRED
+                              xd->mi[0]->mbmi.mode,
 #endif
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              mrc_mask,
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+                              tx_type, tx_size, rec_buffer, MAX_TX_SIZE, eob);
   if (eob > 0) {
 #if CONFIG_DIST_8X8
-    if (plane == 0 && (bw < 8 && bh < 8)) {
+    if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
       // Save sub8x8 luma decoded pixels
       // since 8x8 luma decoded pixels are not available for daala-dist
       // after recursive split of BLOCK_8x8 is done.
@@ -4451,12 +4704,12 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
 }
 
 static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                            int blk_col, int plane, int block, int block32,
-                            TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
+                            int blk_col, int plane, int block, TX_SIZE tx_size,
+                            int depth, BLOCK_SIZE plane_bsize,
                             ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
                             TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
                             RD_STATS *rd_stats, int64_t ref_best_rd,
-                            int *is_cost_valid, RD_STATS *rd_stats_stack) {
+                            int *is_cost_valid) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[plane];
@@ -4519,32 +4772,28 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
   TX_SIZE txs_ctx = get_txsize_context(tx_size);
   TXB_CTX txb_ctx;
   get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
+
+#if LV_MAP_PROB
+  zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
+                      .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
+#else
   zero_blk_rate =
       av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1);
+#endif  // LV_MAP_PROB
 #else
-  int tx_size_ctx = txsize_sqr_map[tx_size];
+  TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size];
   int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
-  zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
-                                [coeff_ctx][EOB_TOKEN];
+  zero_blk_rate =
+      x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
 #endif
 
   rd_stats->ref_rdcost = ref_best_rd;
   rd_stats->zero_rate = zero_blk_rate;
   if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
     inter_tx_size[0][0] = tx_size;
-
-    if (tx_size == TX_32X32 && mbmi->tx_type != DCT_DCT &&
-        rd_stats_stack[block32].rate != INT_MAX) {
-      *rd_stats = rd_stats_stack[block32];
-      p->eobs[block] = !rd_stats->skip;
-      x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
-    } else {
-      av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
-                        plane_bsize, pta, ptl, rd_stats);
-      if (tx_size == TX_32X32) {
-        rd_stats_stack[block32] = *rd_stats;
-      }
-    }
+    av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+                      plane_bsize, pta, ptl, rd_stats);
+    if (rd_stats->rate == INT_MAX) return;
 
     if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
              RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
@@ -4599,11 +4848,12 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
 
       av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize,
                         pta, ptl, &rd_stats_qttx);
+      if (rd_stats->rate == INT_MAX) return;
 
       tx_size_ctx = txsize_sqr_map[quarter_txsize];
       coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl);
-      zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
-                                    [coeff_ctx][EOB_TOKEN];
+      zero_blk_rate =
+          x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
       if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >=
                RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) ||
            rd_stats_qttx.skip == 1) &&
@@ -4629,11 +4879,15 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
                         plane, block_offset_qttx, plane_bsize, pta, ptl,
                         &rd_stats_tmp);
 
+      if (rd_stats->rate == INT_MAX) return;
+
+#if !CONFIG_PVQ
       av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl);
+#endif  // !CONFIG_PVQ
       coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset,
                                       ptl + blk_row_offset);
-      zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
-                                    [coeff_ctx][EOB_TOKEN];
+      zero_blk_rate =
+          x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
       if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >=
                RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) ||
            rd_stats_tmp.skip == 1) &&
@@ -4684,13 +4938,13 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
 #endif
   }
 
+  if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH
 #if CONFIG_MRC_TX
-  // If the tx type we are trying is MRC_DCT, we cannot partition the transform
-  // into anything smaller than TX_32X32
-  if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && mbmi->tx_type != MRC_DCT) {
-#else
-  if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
-#endif
+      // If the tx type we are trying is MRC_DCT, we cannot partition the
+      // transform into anything smaller than TX_32X32
+      && mbmi->tx_type != MRC_DCT
+#endif  // CONFIG_MRC_TX
+      ) {
     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
     const int bsl = tx_size_wide_unit[sub_txs];
     int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
@@ -4713,25 +4967,26 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
-      select_tx_block(cpi, x, offsetr, offsetc, plane, block, block32, sub_txs,
+      select_tx_block(cpi, x, offsetr, offsetc, plane, block, sub_txs,
                       depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
-                      &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
-                      rd_stats_stack);
+                      &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid);
 #if CONFIG_DIST_8X8
-      if (plane == 0 && tx_size == TX_8X8) {
+      if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
         sub8x8_eob[i] = p->eobs[block];
       }
 #endif  // CONFIG_DIST_8X8
       av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
 
       tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
-#if !CONFIG_DIST_8X8
-      if (this_rd < tmp_rd) break;
+#if CONFIG_DIST_8X8
+      if (!x->using_dist_8x8)
 #endif
+        if (this_rd < tmp_rd) break;
       block += sub_step;
     }
 #if CONFIG_DIST_8X8
-    if (this_cost_valid && plane == 0 && tx_size == TX_8X8) {
+    if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
+        tx_size == TX_8X8) {
       const int src_stride = p->src.stride;
       const int dst_stride = pd->dst.stride;
 
@@ -4757,7 +5012,7 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
       DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
 #endif  // CONFIG_HIGHBITDEPTH
 
-      dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, dst, dst_stride,
+      dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
                               BLOCK_8X8, 8, 8, 8, 8, qindex) *
                  16;
       sum_rd_stats.sse = dist_8x8;
@@ -4802,7 +5057,7 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
               for (j = 0; j < 4; j++)
                 for (i = 0; i < 4; i++)
                   pred8[(row * 4 + j) * 8 + 4 * col + i] =
-                      pred[(row * 4 + j) * pred_stride + 4 * col + i];
+                      (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
             } else {
               for (j = 0; j < 4; j++)
                 for (i = 0; i < 4; i++)
@@ -4814,7 +5069,7 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
 #if CONFIG_HIGHBITDEPTH
       }
 #endif  // CONFIG_HIGHBITDEPTH
-      dist_8x8 = av1_dist_8x8(cpi, xd, src, src_stride, pred8, 8, BLOCK_8X8, 8,
+      dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
                               8, 8, 8, qindex) *
                  16;
       sum_rd_stats.dist = dist_8x8;
@@ -4853,12 +5108,14 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
     }
 #endif
 
+#if !CONFIG_PVQ
     av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
 #if CONFIG_RECT_TX_EXT
     if (is_qttx_picked)
       av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected,
                           pta + blk_col_offset, ptl + blk_row_offset);
-#endif
+#endif  // CONFIG_RECT_TX_EXT
+#endif  // !CONFIG_PVQ
 
     txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
                           tx_size);
@@ -4889,7 +5146,7 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
 
 static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
                             RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                            int64_t ref_best_rd, RD_STATS *rd_stats_stack) {
+                            int64_t ref_best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   int is_cost_valid = 1;
   int64_t this_rd = 0;
@@ -4908,7 +5165,8 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
     const int bw = tx_size_wide_unit[max_tx_size];
     int idx, idy;
     int block = 0;
-    int block32 = 0;
+    int init_depth =
+        (mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
     int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
     ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
     ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
@@ -4924,15 +5182,17 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
 
     for (idy = 0; idy < mi_height; idy += bh) {
       for (idx = 0; idx < mi_width; idx += bw) {
-        select_tx_block(cpi, x, idy, idx, 0, block, block32, max_tx_size,
-                        mi_height != mi_width, plane_bsize, ctxa, ctxl,
-                        tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
-                        &is_cost_valid, rd_stats_stack);
+        select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size, init_depth,
+                        plane_bsize, ctxa, ctxl, tx_above, tx_left,
+                        &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid);
+        if (pn_rd_stats.rate == INT_MAX) {
+          av1_invalid_rd_stats(rd_stats);
+          return;
+        }
         av1_merge_rd_stats(rd_stats, &pn_rd_stats);
         this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
                           RDCOST(x->rdmult, 0, pn_rd_stats.sse));
         block += step;
-        ++block32;
       }
     }
   }
@@ -4949,8 +5209,7 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
 
 static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                                       int64_t ref_best_rd, TX_TYPE tx_type,
-                                       RD_STATS *rd_stats_stack) {
+                                       int64_t ref_best_rd, TX_TYPE tx_type) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -4964,7 +5223,7 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
 
   mbmi->tx_type = tx_type;
-  inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack);
+  inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd);
   mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
 
   if (rd_stats->rate == INT_MAX) return INT64_MAX;
@@ -4981,23 +5240,37 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
       !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
     const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
                                           cm->reduced_tx_set_used);
-    if (is_inter) {
-      if (ext_tx_set > 0)
+#if CONFIG_LGT_FROM_PRED
+    if (is_lgt_allowed(mbmi->mode, mbmi->min_tx_size)) {
+      if (LGT_FROM_PRED_INTRA && !is_inter && ext_tx_set > 0 &&
+          ALLOW_INTRA_EXT_TX)
+        rd_stats->rate += x->intra_lgt_cost[txsize_sqr_map[mbmi->min_tx_size]]
+                                           [mbmi->mode][mbmi->use_lgt];
+      if (LGT_FROM_PRED_INTER && is_inter && ext_tx_set > 0)
         rd_stats->rate +=
-            cpi->inter_tx_type_costs[ext_tx_set]
+            x->inter_lgt_cost[txsize_sqr_map[mbmi->min_tx_size]][mbmi->use_lgt];
+    }
+    if (!mbmi->use_lgt) {
+#endif  // CONFIG_LGT_FROM_PRED
+      if (is_inter) {
+        if (ext_tx_set > 0)
+          rd_stats->rate +=
+              x->inter_tx_type_costs[ext_tx_set]
                                     [txsize_sqr_map[mbmi->min_tx_size]]
                                     [mbmi->tx_type];
-    } else {
-      if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
-        rd_stats->rate +=
-            cpi->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
+      } else {
+        if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+          rd_stats->rate +=
+              x->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
                                     [mbmi->tx_type];
+      }
     }
+#if CONFIG_LGT_FROM_PRED
   }
+#endif
 #else
   if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
-    rd_stats->rate +=
-        cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
+    rd_stats->rate += x->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
 #endif  // CONFIG_EXT_TX
 #endif  // CONFIG_TXK_SEL
 
@@ -5013,6 +5286,162 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
   return rd;
 }
 
+static uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
+  const int rows = block_size_high[bsize];
+  const int cols = block_size_wide[bsize];
+  const int diff_stride = cols;
+  const struct macroblock_plane *const p = &x->plane[0];
+  const int16_t *diff = &p->src_diff[0];
+  uint8_t hash_data[MAX_SB_SQUARE];
+  for (int r = 0; r < rows; ++r) {
+    for (int c = 0; c < cols; ++c) {
+      hash_data[cols * r + c] = clip_pixel(diff[c] + 128);
+    }
+    diff += diff_stride;
+  }
+  return (av1_get_crc_value(&x->tx_rd_record.crc_calculator, hash_data,
+                            rows * cols)
+          << 7) +
+         bsize;
+}
+
+static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
+                            const RD_STATS *const rd_stats,
+                            TX_RD_INFO *const tx_rd_info) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  tx_rd_info->hash_value = hash;
+  tx_rd_info->tx_type = mbmi->tx_type;
+  tx_rd_info->tx_size = mbmi->tx_size;
+#if CONFIG_VAR_TX
+  tx_rd_info->min_tx_size = mbmi->min_tx_size;
+  memcpy(tx_rd_info->blk_skip, x->blk_skip[0],
+         sizeof(tx_rd_info->blk_skip[0]) * n4);
+  for (int idy = 0; idy < xd->n8_h; ++idy)
+    for (int idx = 0; idx < xd->n8_w; ++idx)
+      tx_rd_info->inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
+#endif  // CONFIG_VAR_TX
+#if CONFIG_TXK_SEL
+  av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
+#endif  // CONFIG_TXK_SEL
+  tx_rd_info->rd_stats = *rd_stats;
+}
+
+static void fetch_tx_rd_info(int n4, const TX_RD_INFO *const tx_rd_info,
+                             RD_STATS *const rd_stats, MACROBLOCK *const x) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  mbmi->tx_type = tx_rd_info->tx_type;
+  mbmi->tx_size = tx_rd_info->tx_size;
+#if CONFIG_VAR_TX
+  mbmi->min_tx_size = tx_rd_info->min_tx_size;
+  memcpy(x->blk_skip[0], tx_rd_info->blk_skip,
+         sizeof(tx_rd_info->blk_skip[0]) * n4);
+  for (int idy = 0; idy < xd->n8_h; ++idy)
+    for (int idx = 0; idx < xd->n8_w; ++idx)
+      mbmi->inter_tx_size[idy][idx] = tx_rd_info->inter_tx_size[idy][idx];
+#endif  // CONFIG_VAR_TX
+#if CONFIG_TXK_SEL
+  av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
+#endif  // CONFIG_TXK_SEL
+  *rd_stats = tx_rd_info->rd_stats;
+}
+
+// Uses simple features on top of DCT coefficients to quickly predict
+// whether optimal RD decision is to skip encoding the residual.
+static int predict_skip_flag_8bit(const MACROBLOCK *x, BLOCK_SIZE bsize) {
+  if (bsize > BLOCK_16X16) return 0;
+  // Tuned for target false-positive rate of 5% for all block sizes:
+  const uint32_t threshold_table[] = { 50, 50, 50, 55, 47, 47, 53, 22, 22, 37 };
+  const struct macroblock_plane *const p = &x->plane[0];
+  const int bw = block_size_wide[bsize];
+  const int bh = block_size_high[bsize];
+  tran_low_t DCT_coefs[32 * 32];
+  TxfmParam param;
+  param.tx_type = DCT_DCT;
+#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
+  param.tx_size = max_txsize_rect_lookup[bsize];
+#else
+  param.tx_size = max_txsize_lookup[bsize];
+#endif
+  param.bd = 8;
+  param.lossless = 0;
+  av1_fwd_txfm(p->src_diff, DCT_coefs, bw, &param);
+
+  uint32_t dc = (uint32_t)av1_dc_quant(x->qindex, 0, AOM_BITS_8);
+  uint32_t ac = (uint32_t)av1_ac_quant(x->qindex, 0, AOM_BITS_8);
+  uint32_t max_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[0])) / dc;
+  for (int i = 1; i < bw * bh; i++) {
+    uint32_t cur_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[i])) / ac;
+    if (cur_quantized_coef > max_quantized_coef)
+      max_quantized_coef = cur_quantized_coef;
+  }
+
+  return max_quantized_coef < threshold_table[AOMMAX(bsize - BLOCK_4X4, 0)];
+}
+
+// Used to set proper context for early termination with skip = 1.
+static void set_skip_flag(const AV1_COMP *cpi, MACROBLOCK *x,
+                          RD_STATS *rd_stats, int bsize) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int n4 = bsize_to_num_blk(bsize);
+#if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
+  const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
+#else
+  const TX_SIZE tx_size = max_txsize_lookup[bsize];
+#endif
+  mbmi->tx_type = DCT_DCT;
+  for (int idy = 0; idy < xd->n8_h; ++idy)
+    for (int idx = 0; idx < xd->n8_w; ++idx)
+      mbmi->inter_tx_size[idy][idx] = tx_size;
+  mbmi->tx_size = tx_size;
+  mbmi->min_tx_size = get_min_tx_size(tx_size);
+  memset(x->blk_skip[0], 1, sizeof(uint8_t) * n4);
+  rd_stats->skip = 1;
+
+  // Rate.
+  const int tx_size_ctx = txsize_sqr_map[tx_size];
+  ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
+  ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
+  av1_get_entropy_contexts(bsize, 0, &xd->plane[0], ctxa, ctxl);
+  int coeff_ctx = get_entropy_context(tx_size, ctxa, ctxl);
+  int rate = x->token_head_costs[tx_size_ctx][PLANE_TYPE_Y][1][0][coeff_ctx][0];
+  if (tx_size > TX_4X4) {
+    int ctx = txfm_partition_context(
+        xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
+    rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+  }
+#if !CONFIG_TXK_SEL
+#if CONFIG_EXT_TX
+  const AV1_COMMON *cm = &cpi->common;
+  const int ext_tx_set = get_ext_tx_set(max_txsize_lookup[bsize], bsize, 1,
+                                        cm->reduced_tx_set_used);
+  if (get_ext_tx_types(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used) >
+          1 &&
+      !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+    if (ext_tx_set > 0)
+      rate +=
+          x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
+                                [mbmi->tx_type];
+  }
+#else
+  if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
+    rd_stats->rate += x->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
+#endif  // CONFIG_EXT_TX
+#endif  // CONFIG_TXK_SEL
+  rd_stats->rate = rate;
+
+  // Distortion.
+  int64_t tmp = pixel_diff_dist(x, 0, x->plane[0].src_diff,
+                                block_size_wide[bsize], 0, 0, bsize, bsize);
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
+#endif  // CONFIG_HIGHBITDEPTH
+  rd_stats->dist = rd_stats->sse = (tmp << 4);
+}
+
 static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
                                RD_STATS *rd_stats, BLOCK_SIZE bsize,
                                int64_t ref_best_rd) {
@@ -5037,18 +5466,52 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
   const int n4 = bsize_to_num_blk(bsize);
   int idx, idy;
   int prune = 0;
-  const int count32 =
-      1 << (2 * (cm->mib_size_log2 - mi_width_log2_lookup[BLOCK_32X32]));
-#if CONFIG_EXT_PARTITION
-  RD_STATS rd_stats_stack[16];
-#else
-  RD_STATS rd_stats_stack[4];
-#endif  // CONFIG_EXT_PARTITION
 #if CONFIG_EXT_TX
+  const TxSetType tx_set_type = get_ext_tx_set_type(
+      max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
   const int ext_tx_set =
       get_ext_tx_set(max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
 #endif  // CONFIG_EXT_TX
 
+  av1_invalid_rd_stats(rd_stats);
+
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+  int search_lgt = is_inter
+                       ? LGT_FROM_PRED_INTER &&
+                             (!cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
+                       : LGT_FROM_PRED_INTRA && ALLOW_INTRA_EXT_TX;
+#endif  // CONFIG_LGT_FROM_PRED
+
+  const uint32_t hash = get_block_residue_hash(x, bsize);
+  TX_RD_RECORD *tx_rd_record = &x->tx_rd_record;
+
+  if (ref_best_rd != INT64_MAX) {
+    for (int i = 0; i < tx_rd_record->num; ++i) {
+      const int index = (tx_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
+      // If there is a match in the tx_rd_record, fetch the RD decision and
+      // terminate early.
+      if (tx_rd_record->tx_rd_info[index].hash_value == hash) {
+        TX_RD_INFO *tx_rd_info = &tx_rd_record->tx_rd_info[index];
+        fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
+        return;
+      }
+    }
+  }
+
+// If we predict that skip is the optimal RD decision - set the respective
+// context and terminate early.
+#if CONFIG_HIGHBITDEPTH
+  if (!(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH))
+#endif  // CONFIG_HIGHBITDEPTH
+  {
+    if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
+        predict_skip_flag_8bit(x, bsize)) {
+      set_skip_flag(cpi, x, rd_stats, bsize);
+      return;
+    }
+  }
+
   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
 #if CONFIG_EXT_TX
     prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
@@ -5056,10 +5519,7 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
     prune = prune_tx_types(cpi, bsize, x, xd, 0);
 #endif  // CONFIG_EXT_TX
 
-  av1_invalid_rd_stats(rd_stats);
-
-  for (idx = 0; idx < count32; ++idx)
-    av1_invalid_rd_stats(&rd_stats_stack[idx]);
+  int found = 0;
 
   for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
     RD_STATS this_rd_stats;
@@ -5067,11 +5527,14 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
 #if CONFIG_MRC_TX
     // MRC_DCT only implemented for TX_32X32 so only include this tx in
     // the search for TX_32X32
-    if (tx_type == MRC_DCT && max_tx_size != TX_32X32) continue;
+    if (tx_type == MRC_DCT &&
+        (max_tx_size != TX_32X32 || (is_inter && !USE_MRC_INTER) ||
+         (!is_inter && !USE_MRC_INTRA)))
+      continue;
 #endif  // CONFIG_MRC_TX
 #if CONFIG_EXT_TX
+    if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
     if (is_inter) {
-      if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
       if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
         if (!do_tx_type_search(tx_type, prune)) continue;
       }
@@ -5079,7 +5542,6 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
       if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
         if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
       }
-      if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
     }
 #else   // CONFIG_EXT_TX
     if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
@@ -5094,8 +5556,8 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
       if (tx_type != DCT_DCT) continue;
 
     rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
-                                 tx_type, rd_stats_stack);
-
+                                 tx_type);
+    ref_best_rd = AOMMIN(rd, ref_best_rd);
     if (rd < best_rd) {
       best_rd = rd;
       *rd_stats = this_rd_stats;
@@ -5103,12 +5565,41 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
       best_tx = mbmi->tx_size;
       best_min_tx_size = mbmi->min_tx_size;
       memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
+      found = 1;
       for (idy = 0; idy < xd->n8_h; ++idy)
         for (idx = 0; idx < xd->n8_w; ++idx)
           best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
     }
   }
 
+  // We should always find at least one candidate unless ref_best_rd is less
+  // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
+  // might have failed to find something better)
+  assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
+  if (!found) return;
+
+#if CONFIG_LGT_FROM_PRED
+  if (search_lgt && is_lgt_allowed(mbmi->mode, max_tx_size) &&
+      !cm->reduced_tx_set_used) {
+    RD_STATS this_rd_stats;
+    mbmi->use_lgt = 1;
+    rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd, 0);
+    if (rd < best_rd) {
+      best_rd = rd;
+      *rd_stats = this_rd_stats;
+      best_tx = mbmi->tx_size;
+      best_min_tx_size = mbmi->min_tx_size;
+      memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
+      for (idy = 0; idy < xd->n8_h; ++idy)
+        for (idx = 0; idx < xd->n8_w; ++idx)
+          best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
+    } else {
+      mbmi->use_lgt = 0;
+    }
+  }
+#endif  // CONFIG_LGT_FROM_PRED
+  // We found a candidate transform to use. Copy our results from the "best"
+  // array into mbmi.
   mbmi->tx_type = best_tx_type;
   for (idy = 0; idy < xd->n8_h; ++idy)
     for (idx = 0; idx < xd->n8_w; ++idx)
@@ -5116,6 +5607,19 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
   mbmi->tx_size = best_tx;
   mbmi->min_tx_size = best_min_tx_size;
   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+
+  // Save the RD search results into tx_rd_record.
+  int index;
+  if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
+    index =
+        (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
+    ++tx_rd_record->num;
+  } else {
+    index = tx_rd_record->index_start;
+    tx_rd_record->index_start =
+        (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
+  }
+  save_tx_rd_info(n4, hash, x, rd_stats, &tx_rd_record->tx_rd_info[index]);
 }
 
 static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
@@ -5145,7 +5649,9 @@ static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
     ENTROPY_CONTEXT *tl = left_ctx + blk_row;
     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
                       plane_bsize, ta, tl, rd_stats);
+#if !CONFIG_PVQ
     av1_set_txb_context(x, plane, block, tx_size, ta, tl);
+#endif  // !CONFIG_PVQ
   } else {
     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
     const int bsl = tx_size_wide_unit[sub_txs];
@@ -5250,7 +5756,6 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
 }
 #endif  // CONFIG_VAR_TX
 
-#if CONFIG_PALETTE
 static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
                                        int dc_mode_cost,
                                        uint8_t *best_palette_color_map,
@@ -5263,6 +5768,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
   assert(!is_inter_block(mbmi));
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
   const BLOCK_SIZE bsize = mbmi->sb_type;
+  assert(bsize >= BLOCK_8X8);
   int this_rate;
   int64_t this_rd;
   int colors_u, colors_v, colors;
@@ -5296,17 +5802,14 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_PALETTE_DELTA_ENCODING
-  const MODE_INFO *above_mi = xd->above_mi;
-  const MODE_INFO *left_mi = xd->left_mi;
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
-  const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
+  const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
 
   colors = colors_u > colors_v ? colors_u : colors_v;
   if (colors > 1 && colors <= 64) {
     int r, c, n, i, j;
     const int max_itr = 50;
-    uint8_t color_order[PALETTE_MAX_SIZE];
     float lb_u, ub_u, val_u;
     float lb_v, ub_v, val_v;
     float *const data = x->palette_buffer->kmeans_data_buf;
@@ -5402,7 +5905,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
       if (tokenonly_rd_stats.rate == INT_MAX) continue;
       this_rate =
           tokenonly_rd_stats.rate + dc_mode_cost +
-          cpi->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
+          x->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
           write_uniform_cost(n, color_map[0]) +
           av1_cost_bit(
               av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
@@ -5411,17 +5914,8 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
                                              color_cache, n_cache,
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
                                              cpi->common.bit_depth);
-      for (i = 0; i < rows; ++i) {
-        for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          int color_idx;
-          const int color_ctx = av1_get_palette_color_index_context(
-              color_map, plane_block_width, i, j, n, color_order, &color_idx);
-          assert(color_idx >= 0 && color_idx < n);
-          this_rate += cpi->palette_uv_color_cost[n - PALETTE_MIN_SIZE]
-                                                 [color_ctx][color_idx];
-        }
-      }
-
+      this_rate +=
+          av1_cost_color_map(x, 1, 0, bsize, mbmi->tx_size, PALETTE_MAP);
       this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
       if (this_rd < *best_rd) {
         *best_rd = this_rd;
@@ -5438,10 +5932,10 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
   }
   if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
     memcpy(color_map, best_palette_color_map,
-           rows * cols * sizeof(best_palette_color_map[0]));
+           plane_block_width * plane_block_height *
+               sizeof(best_palette_color_map[0]));
   }
 }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
 // Return 1 if an filter intra mode is selected; return 0 otherwise.
@@ -5461,9 +5955,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
   av1_zero(filter_intra_mode_info);
   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
   mbmi->uv_mode = UV_DC_PRED;
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[1] = 0;
-#endif  // CONFIG_PALETTE
 
   for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
     mbmi->filter_intra_mode_info.filter_intra_mode[1] = mode;
@@ -5472,7 +5964,7 @@ static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
 
     this_rate = tokenonly_rd_stats.rate +
                 av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
-                cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
+                x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
                 write_uniform_cost(FILTER_INTRA_MODES, mode);
     this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
     if (this_rd < *best_rd) {
@@ -5586,11 +6078,10 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_EXT_INTRA
 
 #if CONFIG_CFL
-static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
-                              const int y_averages_q3[MAX_NUM_TXB],
-                              const uint8_t *src, int src_stride, int width,
-                              int height, TX_SIZE tx_size, int dc_pred,
-                              int alpha_q3, int64_t *dist_neg_out) {
+static int64_t cfl_alpha_dist_lbd(const int16_t *pred_buf_q3,
+                                  const uint8_t *src, int src_stride, int width,
+                                  int height, int dc_pred, int alpha_q3,
+                                  int64_t *dist_neg_out) {
   int64_t dist = 0;
   int diff;
 
@@ -5609,63 +6100,87 @@ static int64_t cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
   }
 
   int64_t dist_neg = 0;
-  const int tx_height = tx_size_high[tx_size];
-  const int tx_width = tx_size_wide[tx_size];
-  const int y_block_row_off = y_stride * tx_height;
-  const int src_block_row_off = src_stride * tx_height;
-  const uint8_t *t_y_pix;
-  const uint8_t *t_src;
-  int a = 0;
-  for (int b_j = 0; b_j < height; b_j += tx_height) {
-    const int h = b_j + tx_height;
-    for (int b_i = 0; b_i < width; b_i += tx_width) {
-      const int w = b_i + tx_width;
-      const int tx_avg_q3 = y_averages_q3[a++];
-      t_y_pix = y_pix;
-      t_src = src;
-      for (int t_j = b_j; t_j < h; t_j++) {
-        for (int t_i = b_i; t_i < w; t_i++) {
-          const int uv = t_src[t_i];
-
-          const int scaled_luma =
-              get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
-
-          // TODO(ltrudeau) add support for HBD.
-          diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
-          dist += diff * diff;
-
-          // TODO(ltrudeau) add support for HBD.
-          diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
-          dist_neg += diff * diff;
-        }
-        t_y_pix += y_stride;
-        t_src += src_stride;
-      }
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      const int uv = src[i];
+      const int scaled_luma = get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]);
+
+      diff = uv - clip_pixel(scaled_luma + dc_pred);
+      dist += diff * diff;
+
+      diff = uv - clip_pixel(-scaled_luma + dc_pred);
+      dist_neg += diff * diff;
     }
-    y_pix += y_block_row_off;
-    src += src_block_row_off;
+    pred_buf_q3 += MAX_SB_SIZE;
+    src += src_stride;
   }
 
   if (dist_neg_out) *dist_neg_out = dist_neg;
 
   return dist;
 }
+#if CONFIG_HIGHBITDEPTH
+static int64_t cfl_alpha_dist_hbd(const int16_t *pred_buf_q3,
+                                  const uint16_t *src, int src_stride,
+                                  int width, int height, int dc_pred,
+                                  int alpha_q3, int bit_depth,
+                                  int64_t *dist_neg_out) {
+  const int shift = 2 * (bit_depth - 8);
+  const int rounding = shift > 0 ? (1 << shift) >> 1 : 0;
+  int64_t dist = 0;
+  int diff;
 
-static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
-  assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
-         AOM_ICDF(CDF_PROB_TOP));
+  if (alpha_q3 == 0) {
+    for (int j = 0; j < height; j++) {
+      for (int i = 0; i < width; i++) {
+        diff = src[i] - dc_pred;
+        dist += diff * diff;
+      }
+      src += src_stride;
+    }
+    dist = (dist + rounding) >> shift;
 
-  aom_cdf_prob prev_cdf = 0;
+    if (dist_neg_out) *dist_neg_out = dist;
 
-  for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
-    const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
-                              (cfl_alpha_codes[c][CFL_PRED_V] != 0);
+    return dist;
+  }
+
+  int64_t dist_neg = 0;
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i++) {
+      const int uv = src[i];
+      const int scaled_luma = get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]);
 
-    aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
-    prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
+      diff = uv - clip_pixel_highbd(scaled_luma + dc_pred, bit_depth);
+      dist += diff * diff;
 
-    cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
+      diff = uv - clip_pixel_highbd(-scaled_luma + dc_pred, bit_depth);
+      dist_neg += diff * diff;
+    }
+    pred_buf_q3 += MAX_SB_SIZE;
+    src += src_stride;
+  }
+
+  if (dist_neg_out) *dist_neg_out = (dist_neg + rounding) >> shift;
+
+  return (dist + rounding) >> shift;
+}
+#endif  // CONFIG_HIGHBITDEPTH
+static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
+                              int src_stride, int width, int height,
+                              int dc_pred, int alpha_q3, int use_hbd,
+                              int bit_depth, int64_t *dist_neg_out) {
+#if CONFIG_HIGHBITDEPTH
+  if (use_hbd) {
+    const uint16_t *src_16 = CONVERT_TO_SHORTPTR(src);
+    return cfl_alpha_dist_hbd(pred_buf_q3, src_16, src_stride, width, height,
+                              dc_pred, alpha_q3, bit_depth, dist_neg_out);
   }
+#endif  // CONFIG_HIGHBITDEPTH
+  (void)use_hbd;
+  (void)bit_depth;
+  return cfl_alpha_dist_lbd(pred_buf_q3, src, src_stride, width, height,
+                            dc_pred, alpha_q3, dist_neg_out);
 }
 
 static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
@@ -5677,7 +6192,6 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
   const int src_stride_v = p_v->src.stride;
 
   MACROBLOCKD *const xd = &x->e_mbd;
-  FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
   CFL_CTX *const cfl = xd->cfl;
@@ -5686,74 +6200,71 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
   const int height = cfl->uv_height;
   const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
   const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
-  const int *y_averages_q3 = cfl->y_averages_q3;
-  const uint8_t *y_pix = cfl->y_down_pix;
-
-  CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
-
-  cfl_update_costs(cfl, ec_ctx);
+  const int16_t *pred_buf_q3 = cfl->pred_buf_q3;
+  const int use_hbd = get_bitdepth_data_path_index(xd);
 
   int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
   sse[CFL_PRED_U][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
-                     width, height, tx_size, dc_pred_u, 0, NULL);
+      cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
+                     0, use_hbd, xd->bd, NULL);
   sse[CFL_PRED_V][0] =
-      cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
-                     width, height, tx_size, dc_pred_v, 0, NULL);
+      cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
+                     0, use_hbd, xd->bd, NULL);
 
-  for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
-    assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
+  for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
+    const int m = c * 2 + 1;
+    const int abs_alpha_q3 = c + 1;
     sse[CFL_PRED_U][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
-        tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
+        pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
+        abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_U][m + 1]);
     sse[CFL_PRED_V][m] = cfl_alpha_dist(
-        y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
-        tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
+        pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
+        abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_V][m + 1]);
   }
 
   int64_t dist;
   int64_t cost;
-  int64_t best_cost;
+  int64_t best_cost = INT64_MAX;
+  int best_rate = 0;
 
   // Compute least squares parameter of the entire block
-  // IMPORTANT: We assume that the first code is 0,0
   int ind = 0;
-  signs[CFL_PRED_U] = CFL_SIGN_POS;
-  signs[CFL_PRED_V] = CFL_SIGN_POS;
-
-  dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
-  dist *= 16;
-  best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
-
-  for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
-    const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
-    const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
-    for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
-      for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
+  int signs = 0;
+
+  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
+    const int sign_u = CFL_SIGN_U(joint_sign);
+    const int sign_v = CFL_SIGN_V(joint_sign);
+    const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
+    const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
+    for (int u = 0; u < size_u; u++) {
+      const int idx_u = (sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1;
+      for (int v = 0; v < size_v; v++) {
+        const int idx_v = (sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1;
         dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
                sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
         dist *= 16;
-        cost = RDCOST(x->rdmult, cfl->costs[c], dist);
+        const int rate = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
+                         x->cfl_cost[joint_sign][CFL_PRED_V][v];
+        cost = RDCOST(x->rdmult, rate, dist);
         if (cost < best_cost) {
           best_cost = cost;
-          ind = c;
-          signs[CFL_PRED_U] = sign_u;
-          signs[CFL_PRED_V] = sign_v;
+          best_rate = rate;
+          ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
+          signs = joint_sign;
         }
       }
     }
   }
 
   mbmi->cfl_alpha_idx = ind;
-  return cfl->costs[ind];
+  mbmi->cfl_alpha_signs = signs;
+  return best_rate;
 }
 #endif  // CONFIG_CFL
 
 static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
   mbmi->uv_mode = UV_DC_PRED;
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[1] = 0;
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
 #endif  // CONFIG_FILTER_INTRA
@@ -5772,9 +6283,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   od_rollback_buffer buf;
   od_encode_checkpoint(&x->daala_enc, &buf);
 #endif  // CONFIG_PVQ
-#if CONFIG_PALETTE
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-#endif  // CONFIG_PALETTE
+  const int try_palette =
+      av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
 
   for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
     int this_rate;
@@ -5782,7 +6293,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
 #if CONFIG_EXT_INTRA
     const int is_directional_mode =
-        av1_is_directional_mode(mode, mbmi->sb_type);
+        av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type);
 #endif  // CONFIG_EXT_INTRA
     if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
           (1 << mode)))
@@ -5791,7 +6302,8 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     mbmi->uv_mode = mode;
 #if CONFIG_CFL
     int cfl_alpha_rate = 0;
-    if (mode == UV_DC_PRED) {
+    if (mode == UV_CFL_PRED) {
+      assert(!is_directional_mode);
       const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
       cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
     }
@@ -5799,7 +6311,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #if CONFIG_EXT_INTRA
     mbmi->angle_delta[1] = 0;
     if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
-      const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
+      const int rate_overhead = x->intra_uv_mode_cost[mbmi->mode][mode] +
                                 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
       if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
                                     &this_rate, &tokenonly_rd_stats))
@@ -5816,10 +6328,10 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
 #endif  // CONFIG_EXT_INTRA
     this_rate =
-        tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
+        tokenonly_rd_stats.rate + x->intra_uv_mode_cost[mbmi->mode][mode];
 
 #if CONFIG_CFL
-    if (mode == UV_DC_PRED) {
+    if (mode == UV_CFL_PRED) {
       this_rate += cfl_alpha_rate;
     }
 #endif
@@ -5830,15 +6342,12 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
 #endif  // CONFIG_EXT_INTRA
 #if CONFIG_FILTER_INTRA
-    if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED)
+    if (mbmi->sb_type >= BLOCK_8X8 && mode == UV_DC_PRED)
       this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
 #endif  // CONFIG_FILTER_INTRA
-#if CONFIG_PALETTE
-    if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
-        mode == UV_DC_PRED)
+    if (try_palette && mode == UV_DC_PRED)
       this_rate += av1_cost_bit(
           av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_PVQ
     od_encode_rollback(&x->daala_enc, &buf);
@@ -5855,15 +6364,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
   }
 
-#if CONFIG_PALETTE
-  if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
+  if (try_palette) {
     uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
     rd_pick_palette_intra_sbuv(cpi, x,
-                               cpi->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
+                               x->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
                                best_palette_color_map, &best_mbmi, &best_rd,
                                rate, rate_tokenonly, distortion, skippable);
   }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
   if (mbmi->sb_type >= BLOCK_8X8) {
@@ -5880,19 +6387,17 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 }
 
 static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                                 PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
-                                 TX_SIZE max_tx_size, int *rate_uv,
-                                 int *rate_uv_tokenonly, int64_t *dist_uv,
-                                 int *skip_uv, UV_PREDICTION_MODE *mode_uv) {
+                                 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
+                                 int *rate_uv, int *rate_uv_tokenonly,
+                                 int64_t *dist_uv, int *skip_uv,
+                                 UV_PREDICTION_MODE *mode_uv) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   // Use an estimated rd for uv_intra based on DC_PRED if the
   // appropriate speed flag is set.
-  (void)ctx;
-  init_sbuv_mode(&x->e_mbd.mi[0]->mbmi);
+  init_sbuv_mode(mbmi);
 #if CONFIG_CB4X4
-#if CONFIG_CHROMA_2X2
-  rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                          bsize, max_tx_size);
-#else
+#if !CONFIG_CHROMA_2X2
   if (x->skip_chroma_rd) {
     *rate_uv = 0;
     *rate_uv_tokenonly = 0;
@@ -5901,32 +6406,47 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
     *mode_uv = UV_DC_PRED;
     return;
   }
-  BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
-                                     x->e_mbd.plane[1].subsampling_y);
-  rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                          bs, max_tx_size);
-#endif  // CONFIG_CHROMA_2X2
+  bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
+                             xd->plane[AOM_PLANE_U].subsampling_y);
+#endif  // !CONFIG_CHROMA_2X2
+#if CONFIG_CFL
+  // Only store reconstructed luma when there's chroma RDO. When there's no
+  // chroma RDO, the reconstructed luma will be stored in encode_superblock().
+  xd->cfl->store_y = !x->skip_chroma_rd;
+#endif  // CONFIG_CFL
 #else
-  rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
-                          bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
+  bsize = bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize;
+#if CONFIG_CFL
+  xd->cfl->store_y = 1;
+#endif  // CONFIG_CFL
 #endif  // CONFIG_CB4X4
-  *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
+#if CONFIG_CFL
+  if (xd->cfl->store_y) {
+    // Perform one extra call to txfm_rd_in_plane(), with the values chosen
+    // during luma RDO, so we can store reconstructed luma values
+    RD_STATS this_rd_stats;
+    txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
+                     mbmi->sb_type, mbmi->tx_size,
+                     cpi->sf.use_fast_coef_costing);
+    xd->cfl->store_y = 0;
+  }
+#endif  // CONFIG_CFL
+  rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
+                          bsize, max_tx_size);
+  *mode_uv = mbmi->uv_mode;
 }
 
-static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
+static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
                        int16_t mode_context) {
-#if CONFIG_EXT_INTER
   if (is_inter_compound_mode(mode)) {
-    return cpi
+    return x
         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
 #if CONFIG_COMPOUND_SINGLEREF
   } else if (is_inter_singleref_comp_mode(mode)) {
-    return cpi
-        ->inter_singleref_comp_mode_cost[mode_context]
-                                        [INTER_SINGLEREF_COMP_OFFSET(mode)];
+    return x->inter_singleref_comp_mode_cost[mode_context]
+                                            [INTER_SINGLEREF_COMP_OFFSET(mode)];
 #endif  // CONFIG_COMPOUND_SINGLEREF
   }
-#endif
 
   int mode_cost = 0;
   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
@@ -5935,32 +6455,32 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
   assert(is_inter_mode(mode));
 
   if (mode == NEWMV) {
-    mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
+    mode_cost = x->newmv_mode_cost[mode_ctx][0];
     return mode_cost;
   } else {
-    mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
+    mode_cost = x->newmv_mode_cost[mode_ctx][1];
     mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
 
     if (is_all_zero_mv) return mode_cost;
 
     if (mode == ZEROMV) {
-      mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
+      mode_cost += x->zeromv_mode_cost[mode_ctx][0];
       return mode_cost;
     } else {
-      mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
+      mode_cost += x->zeromv_mode_cost[mode_ctx][1];
       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
 
       if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
       if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
       if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
 
-      mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
+      mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
       return mode_cost;
     }
   }
 }
 
-#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
+#if (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
 static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
                                              COMPOUND_TYPE comp_type) {
   (void)bsize;
@@ -5975,7 +6495,7 @@ static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
     default: assert(0); return 0;
   }
 }
-#endif  // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
+#endif  // (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
 
 typedef struct {
   int eobs;
@@ -5986,9 +6506,7 @@ typedef struct {
   int64_t brdcost;
   int_mv mvs[2];
   int_mv pred_mv[2];
-#if CONFIG_EXT_INTER
   int_mv ref_mv[2];
-#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_CHROMA_2X2
   ENTROPY_CONTEXT ta[4];
@@ -6009,16 +6527,12 @@ typedef struct {
   int64_t sse;
   int segment_yrate;
   PREDICTION_MODE modes[4];
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES +
                        INTER_COMPOUND_MODES];
 #else   // !CONFIG_COMPOUND_SINGLEREF
   SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-  SEG_RDSTAT rdstat[4][INTER_MODES];
-#endif  // CONFIG_EXT_INTER
   int mvthresh;
 } BEST_SEG_INFO;
 
@@ -6032,10 +6546,9 @@ static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
 // TODO(aconverse): Find out if this is still productive then clean up or remove
 static int check_best_zero_mv(
-    const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
-#if CONFIG_EXT_INTER
+    const AV1_COMP *const cpi, const MACROBLOCK *const x,
+    const int16_t mode_context[TOTAL_REFS_PER_FRAME],
     const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
-#endif  // CONFIG_EXT_INTER
     int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
     const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
     int mi_row, int mi_col) {
@@ -6045,34 +6558,33 @@ static int check_best_zero_mv(
 #endif
   (void)mi_row;
   (void)mi_col;
+  (void)cpi;
 #if CONFIG_GLOBAL_MOTION
-  if (this_mode == ZEROMV
-#if CONFIG_EXT_INTER
-      || this_mode == ZERO_ZEROMV
-#endif  // CONFIG_EXT_INTER
-      ) {
+  if (this_mode == ZEROMV || this_mode == ZERO_ZEROMV) {
     for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
       zeromv[cur_frm].as_int =
           gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
                                cpi->common.allow_high_precision_mv, bsize,
-                               mi_col, mi_row, block)
+                               mi_col, mi_row, block
+#if CONFIG_AMVR
+                               ,
+                               cpi->common.cur_frame_mv_precision_level
+#endif
+                               )
               .as_int;
     }
   }
 #endif  // CONFIG_GLOBAL_MOTION
 
-#if !CONFIG_EXT_INTER
-  assert(ref_frames[1] != INTRA_FRAME);  // Just sanity check
-#endif                                   // !CONFIG_EXT_INTER
   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
       frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
       (ref_frames[1] <= INTRA_FRAME ||
        frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
     int16_t rfc =
         av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
-    int c1 = cost_mv_ref(cpi, NEARMV, rfc);
-    int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
-    int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+    int c1 = cost_mv_ref(x, NEARMV, rfc);
+    int c2 = cost_mv_ref(x, NEARESTMV, rfc);
+    int c3 = cost_mv_ref(x, ZEROMV, rfc);
 
     if (this_mode == NEARMV) {
       if (c1 > c3) return 0;
@@ -6092,16 +6604,14 @@ static int check_best_zero_mv(
           return 0;
       }
     }
-  }
-#if CONFIG_EXT_INTER
-  else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
-            this_mode == ZERO_ZEROMV) &&
-           frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
-           frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
+  } else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
+              this_mode == ZERO_ZEROMV) &&
+             frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
+             frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
     int16_t rfc = compound_mode_context[ref_frames[0]];
-    int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
-    int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
-    int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
+    int c2 = cost_mv_ref(x, NEAREST_NEARESTMV, rfc);
+    int c3 = cost_mv_ref(x, ZERO_ZEROMV, rfc);
+    int c5 = cost_mv_ref(x, NEAR_NEARMV, rfc);
 
     if (this_mode == NEAREST_NEARESTMV) {
       if (c2 > c3) return 0;
@@ -6116,45 +6626,42 @@ static int check_best_zero_mv(
         return 0;
     }
   }
-#endif  // CONFIG_EXT_INTER
   return 1;
 }
 
 static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int_mv *frame_mv,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                                 int_mv *frame_comp_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                                 int mi_row, int mi_col,
-#if CONFIG_EXT_INTER
                                 int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
-                                int mask_stride,
-#endif  // CONFIG_EXT_INTER
-                                int *rate_mv, const int block) {
+                                int mask_stride, int *rate_mv,
+                                const int block) {
   const AV1_COMMON *const cm = &cpi->common;
   const int pw = block_size_wide[bsize];
   const int ph = block_size_high[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 // This function should only ever be called for compound modes
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!has_second_ref(mbmi)) {
     assert(is_inter_singleref_comp_mode(mbmi->mode));
     assert(frame_comp_mv);
   }
   assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
-  const int refs[2] = { mbmi->ref_frame[0], has_second_ref(mbmi)
-                                                ? mbmi->ref_frame[1]
-                                                : mbmi->ref_frame[0] };
+  const int refs[2] = { mbmi->ref_frame[0],
+                        has_second_ref(mbmi) ? mbmi->ref_frame[1]
+                                             : mbmi->ref_frame[0] };
 #else
   assert(has_second_ref(mbmi));
   const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   int_mv ref_mv[2];
   int ite, ref;
   struct scale_factors sf;
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
-  // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
+  // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
   const int ic = block & 1;
   const int ir = (block - ic) >> 1;
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -6162,18 +6669,19 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
   const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
 #if CONFIG_GLOBAL_MOTION
   int is_global[2];
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
 #else
-  for (ref = 0; ref < 2; ++ref) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+  for (ref = 0; ref < 2; ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  {
     WarpedMotionParams *const wm =
         &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
     is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
   }
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!has_second_ref(mbmi)) is_global[1] = is_global[0];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 #endif  // CONFIG_GLOBAL_MOTION
 #else   // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
   (void)block;
@@ -6195,20 +6703,21 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
   DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if CONFIG_EXT_INTER && CONFIG_CB4X4
+#if CONFIG_CB4X4
   (void)ref_mv_sub8x8;
-#endif  // CONFIG_EXT_INTER && CONFIG_CB4X4
+#endif  // CONFIG_CB4X4
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
 #else
-  for (ref = 0; ref < 2; ++ref) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_EXT_INTER && !CONFIG_CB4X4
+  for (ref = 0; ref < 2; ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  {
+#if !CONFIG_CB4X4
     if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
       ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
     else
-#endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
+#endif  // !CONFIG_CB4X4
       ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
 
     if (scaled_ref_frame[ref]) {
@@ -6223,7 +6732,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!has_second_ref(mbmi)) {
     assert(is_inter_singleref_comp_mode(mbmi->mode));
     // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes
@@ -6239,7 +6748,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
       av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL);
     }
   }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 // Since we have scaled the reference frames to match the size of the current
 // frame we must use a unit scaling factor during mode selection.
@@ -6253,14 +6762,15 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
 // Allow joint search multiple times iteratively for each reference frame
 // and break out of the search loop if it couldn't find a better mv.
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   const int num_ites =
       (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1;
   const int start_ite = has_second_ref(mbmi) ? 0 : 1;
-  for (ite = start_ite; ite < (start_ite + num_ites); ite++) {
+  for (ite = start_ite; ite < (start_ite + num_ites); ite++)
 #else
-  for (ite = 0; ite < 4; ite++) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+  for (ite = 0; ite < 4; ite++)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  {
     struct buf_2d ref_yv12[2];
     int bestsme = INT_MAX;
     int sadpb = x->sadperbit16;
@@ -6288,23 +6798,23 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
     ref_yv12[1] = xd->plane[plane].pre[1];
 
 // Get the prediction block from the 'other' reference frame.
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     MV *const the_other_mv = (has_second_ref(mbmi) || id)
                                  ? &frame_mv[refs[!id]].as_mv
                                  : &frame_comp_mv[refs[0]].as_mv;
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if CONFIG_HIGHBITDEPTH
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
       av1_highbd_build_inter_predictor(
           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
           the_other_mv,
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
           &frame_mv[refs[!id]].as_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-          &sf, pw, ph, 0, mbmi->interp_filter,
+#endif  // CONFIG_COMPOUND_SINGLEREF
+          &sf, pw, ph, 0, mbmi->interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
           &warp_types, p_col, p_row,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6314,12 +6824,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 #endif  // CONFIG_HIGHBITDEPTH
       av1_build_inter_predictor(
           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
           the_other_mv,
-#else   // !(CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF)
+#else   // !(CONFIG_COMPOUND_SINGLEREF)
         &frame_mv[refs[!id]].as_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-          &sf, pw, ph, &conv_params, mbmi->interp_filter,
+#endif  // CONFIG_COMPOUND_SINGLEREF
+          &sf, pw, ph, &conv_params, mbmi->interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
           &warp_types, p_col, p_row, plane, !id,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -6334,74 +6844,75 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
 // Use the mv result from the single mode as mv predictor.
 // Use the mv result from the single mode as mv predictor.
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!has_second_ref(mbmi) && id)
       *best_mv = frame_comp_mv[refs[0]].as_mv;
     else
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
       *best_mv = frame_mv[refs[id]].as_mv;
 
     best_mv->col >>= 3;
     best_mv->row >>= 3;
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!has_second_ref(mbmi))
       av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
     else
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
       av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
 
     // Small-range full-pixel motion search.
-    bestsme =
-        av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
-#if CONFIG_EXT_INTER
-                                 mask, mask_stride, id,
-#endif
-                                 &ref_mv[id].as_mv, second_pred);
+    bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
+                                       &cpi->fn_ptr[bsize], mask, mask_stride,
+                                       id, &ref_mv[id].as_mv, second_pred);
     if (bestsme < INT_MAX) {
-#if CONFIG_EXT_INTER
       if (mask)
         bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
                                           second_pred, mask, mask_stride, id,
                                           &cpi->fn_ptr[bsize], 1);
       else
-#endif
         bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
                                         second_pred, &cpi->fn_ptr[bsize], 1);
     }
 
     x->mv_limits = tmp_mv_limits;
 
-    if (bestsme < INT_MAX) {
+#if CONFIG_AMVR
+    if (cpi->common.cur_frame_mv_precision_level) {
+      x->best_mv.as_mv.row *= 8;
+      x->best_mv.as_mv.col *= 8;
+    }
+    if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0)
+#else
+    if (bestsme < INT_MAX)
+#endif
+    {
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
       bestsme = cpi->find_fractional_mv_step(
           x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
           x->errorperbit, &cpi->fn_ptr[bsize], 0,
           cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
-          &dis, &sse, second_pred,
-#if CONFIG_EXT_INTER
-          mask, mask_stride, id,
-#endif
-          pw, ph, cpi->sf.use_upsampled_references);
+          &dis, &sse, second_pred, mask, mask_stride, id, pw, ph,
+          cpi->sf.use_upsampled_references);
     }
 
     // Restore the pointer to the first (possibly scaled) prediction buffer.
     if (id) xd->plane[plane].pre[0] = ref_yv12[0];
 
     if (bestsme < last_besterr[id]) {
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
       // NOTE: For single ref comp mode, frame_mv stores the first mv and
       //       frame_comp_mv stores the second mv.
       if (!has_second_ref(mbmi) && id)
         frame_comp_mv[refs[0]].as_mv = *best_mv;
       else
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
         frame_mv[refs[id]].as_mv = *best_mv;
       last_besterr[id] = bestsme;
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
       if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
     } else {
       break;
     }
@@ -6409,11 +6920,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
   *rate_mv = 0;
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+#if CONFIG_COMPOUND_SINGLEREF
+  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
 #else
-  for (ref = 0; ref < 2; ++ref) {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+  for (ref = 0; ref < 2; ++ref)
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  {
     if (scaled_ref_frame[ref]) {
       // Restore the prediction frame pointers to their unscaled versions.
       int i;
@@ -6421,14 +6933,14 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
         xd->plane[i].pre[ref] = backup_yv12[ref][i];
     }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!has_second_ref(mbmi))
       av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
     else
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
       av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     if (!has_second_ref(mbmi)) {
       // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the
       //       first mv is stored in frame_mv[] and the second mv is stored in
@@ -6442,25 +6954,25 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
                                   &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
     } else {
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-#if CONFIG_EXT_INTER && !CONFIG_CB4X4
+#endif  // CONFIG_COMPOUND_SINGLEREF
+#if !CONFIG_CB4X4
       if (bsize >= BLOCK_8X8)
-#endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
+#endif  // !CONFIG_CB4X4
         *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
                                     &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
                                     x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-#if CONFIG_EXT_INTER && !CONFIG_CB4X4
+#if !CONFIG_CB4X4
       else
         *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
                                     &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
                                     x->mvcost, MV_COST_WEIGHT);
-#endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // !CONFIG_CB4X4
+#if CONFIG_COMPOUND_SINGLEREF
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!has_second_ref(mbmi)) {
     if (scaled_ref_frame[0]) {
       // Restore the prediction frame pointers to their unscaled versions.
@@ -6469,7 +6981,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
         xd->plane[i].pre[1] = backup_yv12[1][i];
     }
   }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 }
 
 static void estimate_ref_frame_costs(
@@ -6516,6 +7028,7 @@ static void estimate_ref_frame_costs(
       aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
       aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
       aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
+      aom_prob ref_single_p6 = av1_get_pred_prob_single_ref_p6(cm, xd);
 #endif  // CONFIG_EXT_REFS
 
       unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
@@ -6523,7 +7036,7 @@ static void estimate_ref_frame_costs(
       ref_costs_single[LAST_FRAME] =
 #if CONFIG_EXT_REFS
           ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
-              ref_costs_single[BWDREF_FRAME] =
+              ref_costs_single[BWDREF_FRAME] = ref_costs_single[ALTREF2_FRAME] =
 #endif  // CONFIG_EXT_REFS
                   ref_costs_single[GOLDEN_FRAME] =
                       ref_costs_single[ALTREF_FRAME] = base_cost;
@@ -6534,6 +7047,7 @@ static void estimate_ref_frame_costs(
       ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
       ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
+      ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p1, 1);
       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
 
       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
@@ -6542,6 +7056,7 @@ static void estimate_ref_frame_costs(
       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
 
       ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
+      ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p2, 0);
       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
 
       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
@@ -6549,6 +7064,9 @@ static void estimate_ref_frame_costs(
 
       ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
+
+      ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p6, 0);
+      ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p6, 1);
 #else   // !CONFIG_EXT_REFS
       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
@@ -6563,6 +7081,7 @@ static void estimate_ref_frame_costs(
       ref_costs_single[LAST2_FRAME] = 512;
       ref_costs_single[LAST3_FRAME] = 512;
       ref_costs_single[BWDREF_FRAME] = 512;
+      ref_costs_single[ALTREF2_FRAME] = 512;
 #endif  // CONFIG_EXT_REFS
       ref_costs_single[GOLDEN_FRAME] = 512;
       ref_costs_single[ALTREF_FRAME] = 512;
@@ -6574,6 +7093,7 @@ static void estimate_ref_frame_costs(
       aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
       aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
       aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
+      aom_prob bwdref_comp_p1 = av1_get_pred_prob_comp_bwdref_p1(cm, xd);
 #endif  // CONFIG_EXT_REFS
 
       unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
@@ -6589,7 +7109,8 @@ static void estimate_ref_frame_costs(
 #else
               base_cost;
 #endif  // USE_UNI_COMP_REFS
-      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF_FRAME] = 0;
+      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
+      ref_bicomp_costs[ALTREF_FRAME] = 0;
 
       ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
       ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
@@ -6603,14 +7124,18 @@ static void estimate_ref_frame_costs(
       ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
 
       ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
+      ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
       ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
 
-      int ref0;
+      ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
+      ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
+
+      int ref0, ref1;
       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
-        ref_costs_comp[ref0][BWDREF_FRAME] =
-            ref_bicomp_costs[ref0] + ref_bicomp_costs[BWDREF_FRAME];
-        ref_costs_comp[ref0][ALTREF_FRAME] =
-            ref_bicomp_costs[ref0] + ref_bicomp_costs[ALTREF_FRAME];
+        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
+          ref_costs_comp[ref0][ref1] =
+              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
+        }
       }
 
       aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
@@ -6642,7 +7167,8 @@ static void estimate_ref_frame_costs(
               ref_costs_comp[GOLDEN_FRAME] = base_cost;
 
 #if CONFIG_EXT_REFS
-      ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
+      ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF2_FRAME] =
+          ref_costs_comp[ALTREF_FRAME] = 0;
 #endif  // CONFIG_EXT_REFS
 
 #if CONFIG_EXT_REFS
@@ -6660,7 +7186,11 @@ static void estimate_ref_frame_costs(
       // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
       //               more bit.
       ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
+      ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
       ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
+
+      ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
+      ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
 #else   // !CONFIG_EXT_REFS
       ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
       ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
@@ -6668,10 +7198,10 @@ static void estimate_ref_frame_costs(
 #endif  // CONFIG_EXT_COMP_REFS
     } else {
 #if CONFIG_EXT_COMP_REFS
-      int ref0;
+      int ref0, ref1;
       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
-        ref_costs_comp[ref0][BWDREF_FRAME] = 512;
-        ref_costs_comp[ref0][ALTREF_FRAME] = 512;
+        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
+          ref_costs_comp[ref0][ref1] = 512;
       }
       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
@@ -6683,6 +7213,7 @@ static void estimate_ref_frame_costs(
       ref_costs_comp[LAST2_FRAME] = 512;
       ref_costs_comp[LAST3_FRAME] = 512;
       ref_costs_comp[BWDREF_FRAME] = 512;
+      ref_costs_comp[ALTREF2_FRAME] = 512;
       ref_costs_comp[ALTREF_FRAME] = 512;
 #endif  // CONFIG_EXT_REFS
       ref_costs_comp[GOLDEN_FRAME] = 512;
@@ -6732,17 +7263,19 @@ static void setup_buffer_inter(
   // Gets an initial list of candidate vectors from neighbours and orders them
   av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
                    mbmi_ext->ref_mv_stack[ref_frame],
-#if CONFIG_EXT_INTER
-                   mbmi_ext->compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                   candidates, mi_row, mi_col, NULL, NULL,
-                   mbmi_ext->mode_context);
+                   mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
+                   NULL, NULL, mbmi_ext->mode_context);
 
-  // Candidate refinement carried out at encoder and decoder
+// Candidate refinement carried out at encoder and decoder
+#if CONFIG_AMVR
+  av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
+                        &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
+                        cm->cur_frame_mv_precision_level);
+#else
   av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
                         &frame_nearest_mv[ref_frame],
                         &frame_near_mv[ref_frame]);
-
+#endif
 // Further refinement that is encode side only to test the top few candidates
 // in full and choose the best as the centre point for subsequent searches.
 // The current implementation doesn't support scaling.
@@ -6758,10 +7291,7 @@ static void setup_buffer_inter(
 
 static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
-#if CONFIG_EXT_INTER
-                                 int ref_idx,
-#endif  // CONFIG_EXT_INTER
-                                 int *rate_mv) {
+                                 int ref_idx, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -6770,17 +7300,12 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
   int step_param;
   int sadpb = x->sadperbit16;
   MV mvp_full;
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   int ref =
       has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
 #else   // !CONFIG_COMPOUND_SINGLEREF
   int ref = mbmi->ref_frame[ref_idx];
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-  int ref = mbmi->ref_frame[0];
-  int ref_idx = 0;
-#endif  // CONFIG_EXT_INTER
   MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
 
   MvLimits tmp_mv_limits = x->mv_limits;
@@ -6812,7 +7337,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
   // Work out the size of the first step in the mv step search.
   // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
-    // Take wtd average of the step_params based on the last frame's
+    // Take the weighted average of the step_params based on the last frame's
     // max mv magnitude and that based on the best ref mvs of the current
     // block for the given reference.
     step_param =
@@ -6834,10 +7359,13 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
     int bhl = b_height_log2_lookup[bsize];
     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
 
-    if (tlevel < 5) step_param += 2;
+    if (tlevel < 5) {
+      step_param += 2;
+      step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
+    }
 
     // prev_mv_sad is not setup for dynamically scaled frames.
-    if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+    if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
       int i;
       for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
         if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
@@ -6874,9 +7402,16 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
   switch (mbmi->motion_mode) {
     case SIMPLE_TRANSLATION:
 #endif  // CONFIG_MOTION_VAR
+#if CONFIG_HASH_ME
       bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
                                       sadpb, cond_cost_list(cpi, cost_list),
-                                      &ref_mv, INT_MAX, 1);
+                                      &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
+                                      (MI_SIZE * mi_row), 0);
+#else
+  bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
+                                  cond_cost_list(cpi, cost_list), &ref_mv,
+                                  INT_MAX, 1);
+#endif
 #if CONFIG_MOTION_VAR
       break;
     case OBMC_CAUSAL:
@@ -6891,7 +7426,15 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
 
   x->mv_limits = tmp_mv_limits;
 
+#if CONFIG_AMVR
+  if (cpi->common.cur_frame_mv_precision_level) {
+    x->best_mv.as_mv.row *= 8;
+    x->best_mv.as_mv.col *= 8;
+  }
+  if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0) {
+#else
   if (bestsme < INT_MAX) {
+#endif
     int dis; /* TODO: use dis in distortion calculation later. */
 #if CONFIG_MOTION_VAR
     switch (mbmi->motion_mode) {
@@ -6908,11 +7451,8 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
               x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
               &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
               cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
-#if CONFIG_EXT_INTER
-              NULL, 0, 0,
-#endif
-              pw, ph, 1);
+              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
+              0, 0, pw, ph, 1);
 
           if (try_second) {
             const int minc =
@@ -6936,11 +7476,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                   &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
                   cpi->sf.mv.subpel_iters_per_step,
                   cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
-                  &dis, &x->pred_sse[ref], NULL,
-#if CONFIG_EXT_INTER
-                  NULL, 0, 0,
-#endif
-                  pw, ph, 1);
+                  &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
               if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
               x->best_mv.as_mv = best_mv;
             }
@@ -6950,11 +7486,8 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
               x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
               &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
               cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
-              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
-#if CONFIG_EXT_INTER
-              NULL, 0, 0,
-#endif
-              0, 0, 0);
+              x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
+              0, 0, 0, 0, 0);
         }
 #if CONFIG_MOTION_VAR
         break;
@@ -6994,7 +7527,6 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
   }
 }
 
-#if CONFIG_EXT_INTER
 static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
                                     BLOCK_SIZE bsize, const MV *other_mv,
                                     int mi_row, int mi_col, const int block,
@@ -7013,7 +7545,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
   struct scale_factors sf;
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
   struct macroblockd_plane *const pd = &xd->plane[0];
-  // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
+  // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
   const int ic = block & 1;
   const int ir = (block - ic) >> 1;
   const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
@@ -7079,7 +7611,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     av1_highbd_build_inter_predictor(
         ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
-        0, mbmi->interp_filter,
+        0, mbmi->interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
         &warp_types, p_col, p_row,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -7088,7 +7620,7 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
 #endif  // CONFIG_HIGHBITDEPTH
     av1_build_inter_predictor(
         ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
-        &conv_params, mbmi->interp_filter,
+        &conv_params, mbmi->interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
         &warp_types, p_col, p_row, plane, !ref_idx,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -7197,7 +7729,15 @@ static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
   x->mv_limits = tmp_mv_limits;
 
+#if CONFIG_AMVR
+  if (cpi->common.cur_frame_mv_precision_level) {
+    x->best_mv.as_mv.row *= 8;
+    x->best_mv.as_mv.col *= 8;
+  }
+  if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0) {
+#else
   if (bestsme < INT_MAX) {
+#endif
     int dis; /* TODO: use dis in distortion calculation later. */
     unsigned int sse;
     bestsme = cpi->find_fractional_mv_step(
@@ -7339,9 +7879,8 @@ static void do_masked_motion_search_indexed(
     tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
 }
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
 
-// In some situations we want to discount tha pparent cost of a new motion
+// In some situations we want to discount the apparent cost of a new motion
 // vector. Where there is a subtle motion field and especially where there is
 // low spatial complexity then it can be hard to cover the cost of a new motion
 // vector in a single block, even if that motion vector reduces distortion.
@@ -7371,7 +7910,6 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
            xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
 
-#if CONFIG_EXT_INTER
 #if CONFIG_WEDGE
 static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
                                const BLOCK_SIZE bsize, const uint8_t *pred0,
@@ -7416,7 +7954,6 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
   return (tl + br > 0);
 }
 #endif  // CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
 
 #if !CONFIG_DUAL_FILTER
 static InterpFilter predict_interp_filter(
@@ -7440,19 +7977,17 @@ static InterpFilter predict_interp_filter(
                   (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   if (pred_filter_search) {
     InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
-    if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
-    if (xd->left_available) lf = xd->mi[-1]->mbmi.interp_filter;
+    if (xd->up_available)
+      af = av1_extract_interp_filter(
+          xd->mi[-xd->mi_stride]->mbmi.interp_filters, 0);
+    if (xd->left_available)
+      lf = av1_extract_interp_filter(xd->mi[-1]->mbmi.interp_filters, 0);
 
-#if CONFIG_EXT_INTER
     if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
-#else
-    if ((this_mode != NEWMV) || (af == lf))
-#endif  // CONFIG_EXT_INTER
       best_filter = af;
   }
   if (is_comp_pred) {
     if (cpi->sf.adaptive_mode_search) {
-#if CONFIG_EXT_INTER
       switch (this_mode) {
         case NEAREST_NEARESTMV:
           if (single_filter[NEARESTMV][refs[0]] ==
@@ -7495,11 +8030,6 @@ static InterpFilter predict_interp_filter(
             best_filter = single_filter[this_mode][refs[0]];
           break;
       }
-#else
-      if (single_filter[this_mode][refs[0]] ==
-          single_filter[this_mode][refs[1]])
-        best_filter = single_filter[this_mode][refs[0]];
-#endif  // CONFIG_EXT_INTER
     }
   }
   if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
@@ -7509,7 +8039,6 @@ static InterpFilter predict_interp_filter(
 }
 #endif  // !CONFIG_DUAL_FILTER
 
-#if CONFIG_EXT_INTER
 // Choose the best wedge index and sign
 #if CONFIG_WEDGE
 static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
@@ -7924,7 +8453,6 @@ static int64_t build_and_cost_compound_type(
   return best_rd_cur;
 }
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
-#endif  // CONFIG_EXT_INTER
 
 typedef struct {
 #if CONFIG_MOTION_VAR
@@ -7935,23 +8463,21 @@ typedef struct {
   int left_pred_stride[MAX_MB_PLANE];
 #endif  // CONFIG_MOTION_VAR
   int_mv *single_newmv;
-#if CONFIG_EXT_INTER
   // Pointer to array of motion vectors to use for each ref and their rates
   // Should point to first of 2 arrays in 2D array
   int *single_newmv_rate;
   // Pointer to array of predicted rate-distortion
   // Should point to first of 2 arrays in 2D array
   int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
-#endif  // CONFIG_EXT_INTER
   InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
 } HandleInterModeArgs;
 
 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
                             const BLOCK_SIZE bsize,
                             int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                             int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME],
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                             const int mi_row, const int mi_col,
                             int *const rate_mv, int_mv *const single_newmv,
                             HandleInterModeArgs *const args) {
@@ -7960,13 +8486,11 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const int is_comp_pred = has_second_ref(mbmi);
   const PREDICTION_MODE this_mode = mbmi->mode;
-#if CONFIG_EXT_INTER
   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
-#endif  // CONFIG_EXT_INTER
   int_mv *const frame_mv = mode_mv[this_mode];
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   int_mv *const frame_comp_mv = mode_comp_mv[this_mode];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   const int refs[2] = { mbmi->ref_frame[0],
                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
   int i;
@@ -7974,7 +8498,6 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
   (void)args;
 
   if (is_comp_pred) {
-#if CONFIG_EXT_INTER
     for (i = 0; i < 2; ++i) {
       single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
     }
@@ -7985,9 +8508,9 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
 
       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
         joint_motion_search(cpi, x, bsize, frame_mv,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                             NULL,  // int_mv *frame_comp_mv
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif                             // CONFIG_COMPOUND_SINGLEREF
                             mi_row, mi_col, NULL, NULL, 0, rate_mv, 0);
       } else {
         *rate_mv = 0;
@@ -8034,24 +8557,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
       }
     }
-#else   // !CONFIG_EXT_INTER
-    // Initialize mv using single prediction mode result.
-    frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
-    frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
-
-    if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
-      joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, rate_mv, 0);
-    } else {
-      *rate_mv = 0;
-      for (i = 0; i < 2; ++i) {
-        av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
-        *rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv,
-                                    &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
-                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-      }
-    }
-#endif  // CONFIG_EXT_INTER
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   } else if (is_inter_singleref_comp_mode(this_mode)) {
     // Single ref comp mode
     const int mode0 = compound_ref0_mode(this_mode);
@@ -8085,9 +8591,8 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
                                   &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   } else {
-#if CONFIG_EXT_INTER
     if (is_comp_interintra_pred) {
       x->best_mv = args->single_newmv[refs[0]];
       *rate_mv = args->single_newmv_rate[refs[0]];
@@ -8096,10 +8601,6 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
       args->single_newmv[refs[0]] = x->best_mv;
       args->single_newmv_rate[refs[0]] = *rate_mv;
     }
-#else
-    single_motion_search(cpi, x, bsize, mi_row, mi_col, rate_mv);
-    single_newmv[refs[0]] = x->best_mv;
-#endif  // CONFIG_EXT_INTER
 
     if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
 
@@ -8149,7 +8650,7 @@ int64_t interpolation_filter_search(
 
   set_default_interp_filters(mbmi, assign_filter);
 
-  *switchable_rate = av1_get_switchable_rate(cpi, xd);
+  *switchable_rate = av1_get_switchable_rate(cm, x, xd);
   av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
   model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
                   skip_txfm_sb, skip_sse_sb);
@@ -8164,12 +8665,7 @@ int64_t interpolation_filter_search(
       const int filter_set_size = SWITCHABLE_FILTERS;
 #endif  // CONFIG_DUAL_FILTER
       int best_in_temp = 0;
-#if CONFIG_DUAL_FILTER
-      InterpFilter best_filter[4];
-      av1_copy(best_filter, mbmi->interp_filter);
-#else
-      InterpFilter best_filter = mbmi->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+      InterpFilters best_filters = mbmi->interp_filters;
       restore_dst_buf(xd, *tmp_dst);
       // EIGHTTAP_REGULAR mode is calculated beforehand
       for (i = 1; i < filter_set_size; ++i) {
@@ -8178,14 +8674,12 @@ int64_t interpolation_filter_search(
         int tmp_rs;
         int64_t tmp_rd;
 #if CONFIG_DUAL_FILTER
-        mbmi->interp_filter[0] = filter_sets[i][0];
-        mbmi->interp_filter[1] = filter_sets[i][1];
-        mbmi->interp_filter[2] = filter_sets[i][0];
-        mbmi->interp_filter[3] = filter_sets[i][1];
+        mbmi->interp_filters =
+            av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
 #else
-        mbmi->interp_filter = (InterpFilter)i;
+        mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
 #endif  // CONFIG_DUAL_FILTER
-        tmp_rs = av1_get_switchable_rate(cpi, xd);
+        tmp_rs = av1_get_switchable_rate(cm, x, xd);
         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
         model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
                         &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
@@ -8193,12 +8687,8 @@ int64_t interpolation_filter_search(
 
         if (tmp_rd < *rd) {
           *rd = tmp_rd;
-          *switchable_rate = av1_get_switchable_rate(cpi, xd);
-#if CONFIG_DUAL_FILTER
-          av1_copy(best_filter, mbmi->interp_filter);
-#else
-          best_filter = mbmi->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+          *switchable_rate = av1_get_switchable_rate(cm, x, xd);
+          best_filters = mbmi->interp_filters;
           *skip_txfm_sb = tmp_skip_sb;
           *skip_sse_sb = tmp_skip_sse;
           best_in_temp = !best_in_temp;
@@ -8214,24 +8704,29 @@ int64_t interpolation_filter_search(
       } else {
         restore_dst_buf(xd, *orig_dst);
       }
-#if CONFIG_DUAL_FILTER
-      av1_copy(mbmi->interp_filter, best_filter);
-#else
-      mbmi->interp_filter = best_filter;
-#endif  // CONFIG_DUAL_FILTER
+      mbmi->interp_filters = best_filters;
     } else {
-#if CONFIG_DUAL_FILTER
-      for (i = 0; i < 4; ++i)
-        assert(mbmi->interp_filter[i] == EIGHTTAP_REGULAR);
-#else
-      assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
-#endif  // CONFIG_DUAL_FILTER
+      assert(mbmi->interp_filters ==
+             av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
     }
   }
 
   return 0;
 }
 
+#if CONFIG_DUAL_FILTER
+static InterpFilters condition_interp_filters_on_mv(
+    InterpFilters interp_filters, const MACROBLOCKD *xd) {
+  InterpFilter filters[2];
+  for (int i = 0; i < 2; ++i)
+    filters[i] = (has_subpel_mv_component(xd->mi[0], xd, i))
+                     ? av1_extract_interp_filter(interp_filters, i)
+                     : EIGHTTAP_REGULAR;
+
+  return av1_make_interp_filters(filters[0], filters[1]);
+}
+#endif
+
 // TODO(afergs): Refactor the MBMI references in here - there's four
 // TODO(afergs): Refactor optional args - add them to a struct or remove
 static int64_t motion_mode_rd(
@@ -8242,10 +8737,8 @@ static int64_t motion_mode_rd(
     const int *refs, int rate_mv,
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
     // only used when WARPED_MOTION is on?
-    int_mv *const single_newmv,
-#if CONFIG_EXT_INTER
-    int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc,
-#endif  // CONFIG_EXT_INTER
+    int_mv *const single_newmv, int rate2_bmc_nocoeff,
+    MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc,
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
     int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
   const AV1_COMMON *const cm = &cpi->common;
@@ -8263,6 +8756,9 @@ static int64_t motion_mode_rd(
   (void)rate_mv;
   (void)is_comp_pred;
   (void)this_mode;
+#if !CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
+  (void)single_newmv;
+#endif
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   MOTION_MODE motion_mode, last_motion_mode_allowed;
@@ -8298,23 +8794,10 @@ static int64_t motion_mode_rd(
 #else
   mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
 #endif  // WARPED_MOTION_SORT_SAMPLES
-#if CONFIG_EXT_INTER
   best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
-#endif  // CONFIG_EXT_INTER
 #endif  // CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   rate2_nocoeff = rd_stats->rate;
-#if CONFIG_NCOBMC_ADAPT_WEIGHT
-  // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT
-  // right now since it requires mvs from all neighboring blocks. We will
-  // check if this mode is beneficial after all the mv's in the current
-  // superblock are selected.
-  last_motion_mode_allowed = motion_mode_allowed_wrapper(1,
-#if CONFIG_GLOBAL_MOTION
-                                                         0, xd->global_motion,
-#endif  // CONFIG_GLOBAL_MOTION
-                                                         mi);
-#else
   last_motion_mode_allowed = motion_mode_allowed(
 #if CONFIG_GLOBAL_MOTION
       0, xd->global_motion,
@@ -8323,7 +8806,6 @@ static int64_t motion_mode_rd(
       xd,
 #endif
       mi);
-#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
   base_mbmi = *mbmi;
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
@@ -8334,54 +8816,44 @@ static int64_t motion_mode_rd(
     int64_t tmp_rd = INT64_MAX;
     int tmp_rate;
     int64_t tmp_dist;
-#if CONFIG_EXT_INTER
     int tmp_rate2 =
         motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
-#else
-    int tmp_rate2 = rate2_nocoeff;
-#endif  // CONFIG_EXT_INTER
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+    // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT
+    // right now since it requires mvs from all neighboring blocks. We will
+    // check if this mode is beneficial after all the mv's in the current
+    // superblock are selected.
+    if (motion_mode == NCOBMC_ADAPT_WEIGHT) continue;
+#endif
 
     *mbmi = base_mbmi;
     mbmi->motion_mode = motion_mode;
 #if CONFIG_MOTION_VAR
     if (mbmi->motion_mode == OBMC_CAUSAL) {
-#if CONFIG_EXT_INTER
       *mbmi = *best_bmc_mbmi;
       mbmi->motion_mode = OBMC_CAUSAL;
-#endif  // CONFIG_EXT_INTER
       if (!is_comp_pred &&
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
           !is_inter_singleref_comp_mode(this_mode) &&
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
           have_newmv_in_inter_mode(this_mode)) {
         int tmp_rate_mv = 0;
 
-        single_motion_search(cpi, x, bsize, mi_row, mi_col,
-#if CONFIG_EXT_INTER
-                             0,
-#endif  // CONFIG_EXT_INTER
-                             &tmp_rate_mv);
+        single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
         mbmi->mv[0].as_int = x->best_mv.as_int;
         if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
                                 refs[0])) {
           tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
         }
-#if CONFIG_EXT_INTER
         tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
-#else
-        tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_DUAL_FILTER
-        if (!has_subpel_mv_component(xd->mi[0], xd, 0))
-          mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
-        if (!has_subpel_mv_component(xd->mi[0], xd, 1))
-          mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+        mbmi->interp_filters =
+            condition_interp_filters_on_mv(mbmi->interp_filters, xd);
 #endif  // CONFIG_DUAL_FILTER
         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-#if CONFIG_EXT_INTER
       } else {
         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
-#endif  // CONFIG_EXT_INTER
       }
       av1_build_obmc_inter_prediction(
           cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
@@ -8396,20 +8868,11 @@ static int64_t motion_mode_rd(
 #if WARPED_MOTION_SORT_SAMPLES
       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
 #endif  // WARPED_MOTION_SORT_SAMPLES
-#if CONFIG_EXT_INTER
       *mbmi = *best_bmc_mbmi;
       mbmi->motion_mode = WARPED_CAUSAL;
-#endif  // CONFIG_EXT_INTER
       mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
-#if CONFIG_DUAL_FILTER
-      for (int dir = 0; dir < 4; ++dir)
-        mbmi->interp_filter[dir] = cm->interp_filter == SWITCHABLE
-                                       ? EIGHTTAP_REGULAR
-                                       : cm->interp_filter;
-#else
-      mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
-                                                            : cm->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+      mbmi->interp_filters = av1_broadcast_interp_filter(
+          av1_unswitchable_filter(cm->interp_filter));
 
 #if WARPED_MOTION_SORT_SAMPLES
       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
@@ -8418,9 +8881,7 @@ static int64_t motion_mode_rd(
       if (mbmi->num_proj_ref[0] > 1) {
         mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
                                             pts_inref, mbmi->num_proj_ref[0]);
-#if CONFIG_EXT_INTER
         best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
-#endif  // CONFIG_EXT_INTER
       }
 #endif  // WARPED_MOTION_SORT_SAMPLES
 
@@ -8461,19 +8922,13 @@ static int64_t motion_mode_rd(
                                     refs[0])) {
               tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
             }
-#if CONFIG_EXT_INTER
 #if WARPED_MOTION_SORT_SAMPLES
             best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
 #endif  // WARPED_MOTION_SORT_SAMPLES
             tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
-#else
-            tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_DUAL_FILTER
-            if (!has_subpel_mv_component(xd->mi[0], xd, 0))
-              mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
-            if (!has_subpel_mv_component(xd->mi[0], xd, 1))
-              mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+            mbmi->interp_filters =
+                condition_interp_filters_on_mv(mbmi->interp_filters, xd);
 #endif  // CONFIG_DUAL_FILTER
           } else {
             // Restore the old MV and WM parameters.
@@ -8503,10 +8958,10 @@ static int64_t motion_mode_rd(
 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
       if (last_motion_mode_allowed == WARPED_CAUSAL)
 #endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
-        rd_stats->rate += cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+        rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
       else
-        rd_stats->rate += cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
+        rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
 #endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
     }
 #if CONFIG_WARPED_MOTION
@@ -8629,25 +9084,11 @@ static int64_t motion_mode_rd(
     }
 
 #if CONFIG_GLOBAL_MOTION
-    if (this_mode == ZEROMV
-#if CONFIG_EXT_INTER
-        || this_mode == ZERO_ZEROMV
-#endif  // CONFIG_EXT_INTER
-        ) {
+    if (this_mode == ZEROMV || this_mode == ZERO_ZEROMV) {
       if (is_nontrans_global_motion(xd)) {
         rd_stats->rate -= rs;
-#if CONFIG_DUAL_FILTER
-        mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
-                                     ? EIGHTTAP_REGULAR
-                                     : cm->interp_filter;
-        mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
-                                     ? EIGHTTAP_REGULAR
-                                     : cm->interp_filter;
-#else
-        mbmi->interp_filter = cm->interp_filter == SWITCHABLE
-                                  ? EIGHTTAP_REGULAR
-                                  : cm->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+        mbmi->interp_filters = av1_broadcast_interp_filter(
+            av1_unswitchable_filter(cm->interp_filter));
       }
     }
 #endif  // CONFIG_GLOBAL_MOTION
@@ -8697,48 +9138,43 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                  RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
                                  int *disable_skip,
                                  int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                                  int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME],
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                                  int mi_row, int mi_col,
                                  HandleInterModeArgs *args,
                                  const int64_t ref_best_rd) {
   const AV1_COMMON *cm = &cpi->common;
-  (void)cm;
   MACROBLOCKD *xd = &x->e_mbd;
   MODE_INFO *mi = xd->mi[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const int is_comp_pred = has_second_ref(mbmi);
   const int this_mode = mbmi->mode;
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode);
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   int_mv *frame_mv = mode_mv[this_mode];
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   // The comp mv for the compound mode in single ref
   int_mv *frame_comp_mv = mode_comp_mv[this_mode];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   int i;
   int refs[2] = { mbmi->ref_frame[0],
                   (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   int_mv cur_mv[2];
   int rate_mv = 0;
-#if CONFIG_EXT_INTER
   int pred_exists = 1;
 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA
   const int bw = block_size_wide[bsize];
-#endif  // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
+#endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   int_mv single_newmv[TOTAL_REFS_PER_FRAME];
 #if CONFIG_INTERINTRA
-  const unsigned int *const interintra_mode_cost =
-      cpi->interintra_mode_cost[size_group_lookup[bsize]];
+  const int *const interintra_mode_cost =
+      x->interintra_mode_cost[size_group_lookup[bsize]];
 #endif  // CONFIG_INTERINTRA
   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-#else
-  int_mv *const single_newmv = args->single_newmv;
-#endif  // CONFIG_EXT_INTER
 #if CONFIG_HIGHBITDEPTH
   DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
 #else
@@ -8747,11 +9183,9 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   uint8_t *tmp_buf;
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-#if CONFIG_EXT_INTER
   int rate2_bmc_nocoeff;
   MB_MODE_INFO best_bmc_mbmi;
   int rate_mv_bmc;
-#endif  // CONFIG_EXT_INTER
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   int64_t rd = INT64_MAX;
   BUFFER_SET orig_dst, tmp_dst;
@@ -8766,7 +9200,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   mbmi->ncobmc_mode[1] = NO_OVERLAP;
 #endif
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
   int compmode_interintra_cost = 0;
   mbmi->use_wedge_interintra = 0;
@@ -8775,6 +9208,9 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   int compmode_interinter_cost = 0;
   mbmi->interinter_compound_type = COMPOUND_AVERAGE;
 #endif
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+#endif
 
 #if CONFIG_INTERINTRA
   if (!cm->allow_interintra_compound && is_comp_interintra_pred)
@@ -8785,9 +9221,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   assert(!is_comp_interintra_pred || (!is_comp_pred));
   // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
   assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
-#endif  // CONFIG_EXT_INTER
 
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
   if (is_comp_pred || is_singleref_comp_mode)
 #else   // !CONFIG_COMPOUND_SINGLEREF
@@ -8795,7 +9229,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_COMPOUND_SINGLEREF
     mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
   else
-#endif  // CONFIG_EXT_INTER
     mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
                                          mbmi->ref_frame, bsize, -1);
 
@@ -8818,21 +9251,21 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     if (frame_mv[refs[0]].as_int == INVALID_MV ||
         frame_mv[refs[1]].as_int == INVALID_MV)
       return INT64_MAX;
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   } else if (is_singleref_comp_mode) {
     if (frame_mv[refs[0]].as_int == INVALID_MV ||
         frame_comp_mv[refs[0]].as_int == INVALID_MV)
       return INT64_MAX;
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   }
 
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   if (have_newmv_in_inter_mode(this_mode)) {
     const int64_t ret_val =
         handle_newmv(cpi, x, bsize, mode_mv,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                      mode_comp_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                      mi_row, mi_col, &rate_mv, single_newmv, args);
     if (ret_val != 0)
       return ret_val;
@@ -8847,7 +9280,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     mbmi->mv[i].as_int = cur_mv[i].as_int;
   }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   if (!is_comp_pred && is_singleref_comp_mode) {
     cur_mv[1] = frame_comp_mv[refs[0]];
     // Clip "next_nearest" so that it does not extend to far out of image
@@ -8855,17 +9288,9 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
     mbmi->mv[1].as_int = cur_mv[1].as_int;
   }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-#if CONFIG_EXT_INTER
-  if (this_mode == NEAREST_NEARESTMV)
-#else
-  if (this_mode == NEARESTMV && is_comp_pred)
-#endif  // CONFIG_EXT_INTER
-  {
-#if !CONFIG_EXT_INTER
-    uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-#endif  // !CONFIG_EXT_INTER
+  if (this_mode == NEAREST_NEARESTMV) {
     if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
@@ -8878,7 +9303,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
   }
 
-#if CONFIG_EXT_INTER
   if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
 #if CONFIG_COMPOUND_SINGLEREF
     if (this_mode == NEAREST_NEWMV ||  // this_mode == SR_NEAREST_NEWMV ||
@@ -8889,7 +9313,12 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     {
       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
 
+#if CONFIG_AMVR
+      lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_mv_precision_level);
+#else
       lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+#endif
       clamp_mv2(&cur_mv[0].as_mv, xd);
       if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
       mbmi->mv[0].as_int = cur_mv[0].as_int;
@@ -8898,7 +9327,12 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     if (this_mode == NEW_NEARESTMV) {
       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
 
+#if CONFIG_AMVR
+      lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_mv_precision_level);
+#else
       lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+#endif
       clamp_mv2(&cur_mv[1].as_mv, xd);
       if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
       mbmi->mv[1].as_int = cur_mv[1].as_int;
@@ -8914,7 +9348,12 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
         this_mode == NEAR_NEARMV) {
       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
 
+#if CONFIG_AMVR
+      lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_mv_precision_level);
+#else
       lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+#endif
       clamp_mv2(&cur_mv[0].as_mv, xd);
       if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
       mbmi->mv[0].as_int = cur_mv[0].as_int;
@@ -8932,28 +9371,17 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_COMPOUND_SINGLEREF
         cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
 
+#if CONFIG_AMVR
+      lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_mv_precision_level);
+#else
       lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+#endif
       clamp_mv2(&cur_mv[1].as_mv, xd);
       if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
       mbmi->mv[1].as_int = cur_mv[1].as_int;
     }
   }
-#else   // !CONFIG_EXT_INTER
-  if (this_mode == NEARMV && is_comp_pred) {
-    uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-    if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
-      int ref_mv_idx = mbmi->ref_mv_idx + 1;
-      cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
-      cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
-
-      for (i = 0; i < 2; ++i) {
-        clamp_mv2(&cur_mv[i].as_mv, xd);
-        if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
-        mbmi->mv[i].as_int = cur_mv[i].as_int;
-      }
-    }
-  }
-#endif  // CONFIG_EXT_INTER
 
   // do first prediction into the destination buffer. Do the next
   // prediction into a temporary buffer. Then keep track of which one
@@ -8978,26 +9406,15 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   // initiation of a motion field.
   if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
                           refs[0])) {
-#if CONFIG_EXT_INTER
-    rd_stats->rate +=
-        AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
-               cost_mv_ref(cpi, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV,
-                           mode_ctx));
-#else
-    rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
-                             cost_mv_ref(cpi, NEARESTMV, mode_ctx));
-#endif  // CONFIG_EXT_INTER
+    rd_stats->rate += AOMMIN(
+        cost_mv_ref(x, this_mode, mode_ctx),
+        cost_mv_ref(x, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV, mode_ctx));
   } else {
-    rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
+    rd_stats->rate += cost_mv_ref(x, this_mode, mode_ctx);
   }
 
   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
-#if CONFIG_EXT_INTER
-      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
-#else
-      mbmi->mode != NEARESTMV
-#endif  // CONFIG_EXT_INTER
-      )
+      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
     return INT64_MAX;
 
   int64_t ret_val = interpolation_filter_search(
@@ -9005,7 +9422,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
   if (ret_val != 0) return ret_val;
 
-#if CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
   best_bmc_mbmi = *mbmi;
   rate2_bmc_nocoeff = rd_stats->rate;
@@ -9028,7 +9444,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     int best_tmp_rate_mv = rate_mv;
     int tmp_skip_txfm_sb;
     int64_t tmp_skip_sse_sb;
-    int compound_type_cost[COMPOUND_TYPES];
     DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
     DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
     uint8_t *preds0[1] = { pred0 };
@@ -9040,6 +9455,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     masked_compound_used = masked_compound_used && cm->allow_masked_compound;
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
     COMPOUND_TYPE cur_type;
+    int best_compmode_interinter_cost = 0;
 
     best_mv[0].as_int = cur_mv[0].as_int;
     best_mv[1].as_int = cur_mv[1].as_int;
@@ -9049,7 +9465,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     best_compound_data.seg_mask = tmp_mask_buf;
 #endif  // CONFIG_COMPOUND_SEGMENT
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     // TODO(zoeliu): To further check whether the following setups are needed.
     // Single ref compound mode: Prepare the 2nd ref frame predictor the same as
     // the 1st one.
@@ -9058,11 +9474,9 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       for (i = 0; i < MAX_MB_PLANE; i++)
         xd->plane[i].pre[1] = xd->plane[i].pre[0];
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
     if (masked_compound_used) {
-      av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
-                      av1_compound_type_tree);
       // get inter predictors to use for masked compound modes
       av1_build_inter_predictors_for_planes_single_buf(
           xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
@@ -9076,11 +9490,19 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       tmp_rate_mv = rate_mv;
       best_rd_cur = INT64_MAX;
       mbmi->interinter_compound_type = cur_type;
+      int masked_type_cost = 0;
+      if (masked_compound_used) {
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+        if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
+          masked_type_cost += av1_cost_literal(1);
+        else
+#endif  // CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          masked_type_cost +=
+              x->compound_type_cost[bsize][mbmi->interinter_compound_type];
+      }
       rs2 = av1_cost_literal(get_interinter_compound_type_bits(
                 bsize, mbmi->interinter_compound_type)) +
-            (masked_compound_used
-                 ? compound_type_cost[mbmi->interinter_compound_type]
-                 : 0);
+            masked_type_cost;
 
       switch (cur_type) {
         case COMPOUND_AVERAGE:
@@ -9130,6 +9552,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #endif  // CONFIG_COMPOUND_SEGMENT
         best_compound_data.interinter_compound_type =
             mbmi->interinter_compound_type;
+        best_compmode_interinter_cost = rs2;
         if (have_newmv_in_inter_mode(this_mode)) {
           if (use_masked_motion_search(cur_type)) {
             best_tmp_rate_mv = tmp_rate_mv;
@@ -9174,12 +9597,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 
     pred_exists = 0;
 
-    compmode_interinter_cost =
-        av1_cost_literal(get_interinter_compound_type_bits(
-            bsize, mbmi->interinter_compound_type)) +
-        (masked_compound_used
-             ? compound_type_cost[mbmi->interinter_compound_type]
-             : 0);
+    compmode_interinter_cost = best_compmode_interinter_cost;
   }
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
 
@@ -9216,7 +9634,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     for (j = 0; j < INTERINTRA_MODES; ++j) {
       mbmi->interintra_mode = (INTERINTRA_MODE)j;
       rmode = interintra_mode_cost[mbmi->interintra_mode];
-      av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
+      av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
                                                 intrapred, bw);
       av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
       model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
@@ -9229,7 +9647,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
     mbmi->interintra_mode = best_interintra_mode;
     rmode = interintra_mode_cost[mbmi->interintra_mode];
-    av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
+    av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
                                               intrapred, bw);
     av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
     av1_subtract_plane(x, bsize, 0);
@@ -9340,16 +9758,11 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                     &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
   }
-#endif  // CONFIG_EXT_INTER
 
   if (!is_comp_pred)
-#if CONFIG_DUAL_FILTER
-    args->single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
-#else
-    args->single_filter[this_mode][refs[0]] = mbmi->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+    args->single_filter[this_mode][refs[0]] =
+        av1_extract_interp_filter(mbmi->interp_filters, 0);
 
-#if CONFIG_EXT_INTER
   if (args->modelled_rd != NULL) {
     if (is_comp_pred) {
       const int mode0 = compound_ref0_mode(this_mode);
@@ -9364,7 +9777,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       args->modelled_rd[this_mode][refs[0]] = rd;
     }
   }
-#endif  // CONFIG_EXT_INTER
 
   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
     // if current pred_error modeled rd is substantially more than the best
@@ -9375,7 +9787,6 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
   }
 
-#if CONFIG_EXT_INTER
 #if CONFIG_INTERINTRA
   rd_stats->rate += compmode_interintra_cost;
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -9385,18 +9796,14 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
   rd_stats->rate += compmode_interinter_cost;
 #endif
-#endif
 
-  ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
-                           disable_skip, mode_mv, mi_row, mi_col, args,
-                           ref_best_rd, refs, rate_mv,
+  ret_val = motion_mode_rd(
+      cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip, mode_mv,
+      mi_row, mi_col, args, ref_best_rd, refs, rate_mv,
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-                           single_newmv,
-#if CONFIG_EXT_INTER
-                           rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc,
-#endif  // CONFIG_EXT_INTER
+      single_newmv, rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc,
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-                           rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
+      rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
   if (ret_val != 0) return ret_val;
 
   return 0;  // The rate-distortion cost will be re-calculated by caller.
@@ -9407,11 +9814,10 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
                                        int64_t best_rd) {
   const AV1_COMMON *const cm = &cpi->common;
-  if (bsize < BLOCK_8X8 || !cm->allow_screen_content_tools) return INT64_MAX;
+  if (!av1_allow_intrabc(bsize, cm)) return INT64_MAX;
 
   MACROBLOCKD *const xd = &x->e_mbd;
   const TileInfo *tile = &xd->tile;
-  FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
   MODE_INFO *const mi = xd->mi[0];
   const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
   const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
@@ -9425,11 +9831,8 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
   int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
   av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
                    mbmi_ext->ref_mv_stack[ref_frame],
-#if CONFIG_EXT_INTER
-                   mbmi_ext->compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                   candidates, mi_row, mi_col, NULL, NULL,
-                   mbmi_ext->mode_context);
+                   mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
+                   NULL, NULL, mbmi_ext->mode_context);
 
   int_mv nearestmv, nearmv;
   av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
@@ -9495,9 +9898,16 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
     mvp_full.row >>= 3;
     int sadpb = x->sadperbit16;
     int cost_list[5];
+#if CONFIG_HASH_ME
+    int bestsme = av1_full_pixel_search(
+        cpi, x, bsize, &mvp_full, step_param, sadpb,
+        cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
+        (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
+#else
     int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
                                         sadpb, cond_cost_list(cpi, cost_list),
                                         &dv_ref.as_mv, INT_MAX, 1);
+#endif
 
     x->mv_limits = tmp_mv_limits;
     if (bestsme == INT_MAX) continue;
@@ -9506,18 +9916,12 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
     if (mv_check_bounds(&x->mv_limits, &dv)) continue;
     if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
 
-#if CONFIG_PALETTE
     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
-#endif
     mbmi->use_intrabc = 1;
     mbmi->mode = DC_PRED;
     mbmi->uv_mode = UV_DC_PRED;
     mbmi->mv[0].as_mv = dv;
-#if CONFIG_DUAL_FILTER
-    for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
-#else
-    mbmi->interp_filter = BILINEAR;
-#endif
+    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
     mbmi->skip = 0;
     x->skip = 0;
     av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
@@ -9527,8 +9931,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
     // in MV_COST_WEIGHT is too large. Explore other values.
     int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
                                   x->mvcost, MV_COST_WEIGHT_SUB);
-    const int rate_mode = av1_cost_bit(ec_ctx->intrabc_prob, 1);
-
+    const int rate_mode = x->intrabc_cost[1];
     RD_STATS rd_stats, rd_stats_uv;
     av1_subtract_plane(x, bsize, 0);
     super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
@@ -9605,6 +10008,9 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
   mbmi->use_intrabc = 0;
   mbmi->mv[0].as_int = 0;
 #endif  // CONFIG_INTRABC
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+#endif
 
   const int64_t intra_yrd =
       (bsize >= BLOCK_8X8 || unify_bsize)
@@ -9615,25 +10021,23 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
 
   if (intra_yrd < best_rd) {
 #if CONFIG_CFL
-    // Perform one extra txfm_rd_in_plane() call, this time with the best value
-    // so we can store reconstructed luma values
-    RD_STATS this_rd_stats;
-
 #if CONFIG_CB4X4
-    // Don't store the luma value if no chroma is associated.
-    // Don't worry, we will store this reconstructed luma in the following
-    // encode dry-run the chroma plane will never know.
-    x->cfl_store_y = !x->skip_chroma_rd;
+    // Only store reconstructed luma when there's chroma RDO. When there's no
+    // chroma RDO, the reconstructed luma will be stored in encode_superblock().
+    xd->cfl->store_y = !x->skip_chroma_rd;
 #else
-    x->cfl_store_y = 1;
-#endif
-
-    txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
-                     mbmi->sb_type, mbmi->tx_size,
-                     cpi->sf.use_fast_coef_costing);
-
-    x->cfl_store_y = 0;
-#endif
+    xd->cfl->store_y = 1;
+#endif  // CONFIG_CB4X4
+    if (xd->cfl->store_y) {
+      // Perform one extra call to txfm_rd_in_plane(), with the values chosen
+      // during luma RDO, so we can store reconstructed luma values
+      RD_STATS this_rd_stats;
+      txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
+                       mbmi->sb_type, mbmi->tx_size,
+                       cpi->sf.use_fast_coef_costing);
+      xd->cfl->store_y = 0;
+    }
+#endif  // CONFIG_CFL
     max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
                                      [pd[1].subsampling_y];
     init_sbuv_mode(mbmi);
@@ -9646,7 +10050,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
                             &uv_skip, AOMMAX(BLOCK_8X8, bsize), max_uv_tx_size);
 #endif  // CONFIG_CB4X4
 
-    if (y_skip && uv_skip) {
+    if (y_skip && (uv_skip || x->skip_chroma_rd)) {
       rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
                       av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
       rd_cost->dist = dist_y + dist_uv;
@@ -9656,9 +10060,6 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
       rd_cost->dist = dist_y + dist_uv;
     }
     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-    rd_cost->dist_y = dist_y;
-#endif
   } else {
     rd_cost->rate = INT_MAX;
   }
@@ -9747,12 +10148,12 @@ int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
          av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
 }
 
-#if CONFIG_PALETTE
 static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
   const BLOCK_SIZE bsize = mbmi->sb_type;
+  assert(bsize >= BLOCK_8X8);
   int src_stride = x->plane[1].src.stride;
   const uint8_t *const src_u = x->plane[1].src.buf;
   const uint8_t *const src_v = x->plane[2].src.buf;
@@ -9796,24 +10197,20 @@ static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
   extend_palette_color_map(color_map, cols, rows, plane_block_width,
                            plane_block_height);
 }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
 static void pick_filter_intra_interframe(
-    const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-    BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
-    int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
-    UV_PREDICTION_MODE *mode_uv,
+    const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
+    int mi_col, int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv,
+    int *skip_uv, UV_PREDICTION_MODE *mode_uv,
     FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
 #if CONFIG_EXT_INTRA
     int8_t *uv_angle_delta,
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
-    PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
-#endif  // CONFIG_PALETTE
-    int skip_mask, unsigned int *ref_costs_single, int64_t *best_rd,
-    int64_t *best_intra_rd, PREDICTION_MODE *best_intra_mode,
-    int *best_mode_index, int *best_skip2, int *best_mode_skippable,
+    PALETTE_MODE_INFO *pmi_uv, int palette_ctx, int skip_mask,
+    unsigned int *ref_costs_single, int64_t *best_rd, int64_t *best_intra_rd,
+    PREDICTION_MODE *best_intra_mode, int *best_mode_index, int *best_skip2,
+    int *best_mode_skippable,
 #if CONFIG_SUPERTX
     int *returnrate_nocoef,
 #endif  // CONFIG_SUPERTX
@@ -9821,12 +10218,12 @@ static void pick_filter_intra_interframe(
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if CONFIG_PALETTE
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-#endif  // CONFIG_PALETTE
+  const int try_palette =
+      av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
   int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
   int dc_mode_index;
-  const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
+  const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
   int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd;
   int64_t distortion_uv, model_rd = INT64_MAX;
   TX_SIZE uv_tx;
@@ -9854,12 +10251,10 @@ static void pick_filter_intra_interframe(
   uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
                           [xd->plane[1].subsampling_y];
   if (rate_uv_intra[uv_tx] == INT_MAX) {
-    choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+    choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
                          &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
                          &skip_uv[uv_tx], &mode_uv[uv_tx]);
-#if CONFIG_PALETTE
     if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
-#endif  // CONFIG_PALETTE
     filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
 #if CONFIG_EXT_INTRA
     uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
@@ -9870,14 +10265,12 @@ static void pick_filter_intra_interframe(
   distortion_uv = dist_uv[uv_tx];
   skippable = skippable && skip_uv[uv_tx];
   mbmi->uv_mode = mode_uv[uv_tx];
-#if CONFIG_PALETTE
   if (cm->allow_screen_content_tools) {
     pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
     memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
            pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
            2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
   }
-#endif  // CONFIG_PALETTE
 #if CONFIG_EXT_INTRA
   mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
 #endif  // CONFIG_EXT_INTRA
@@ -9889,13 +10282,10 @@ static void pick_filter_intra_interframe(
   }
 
   rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
-          cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
-#if CONFIG_PALETTE
-  if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED &&
-      bsize >= BLOCK_8X8)
+          x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+  if (try_palette && mbmi->mode == DC_PRED)
     rate2 += av1_cost_bit(
         av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
-#endif  // CONFIG_PALETTE
 
   if (!xd->lossless[mbmi->segment_id]) {
     // super_block_yrd above includes the cost of the tx_size in the
@@ -9910,7 +10300,7 @@ static void pick_filter_intra_interframe(
   rate2 += write_uniform_cost(
       FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
 #if CONFIG_EXT_INTRA
-  if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+  if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
       av1_use_angle_delta(bsize)) {
     rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
@@ -9992,11 +10382,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
   const SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if CONFIG_PALETTE
   const int try_palette =
-      cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
+      av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-#endif  // CONFIG_PALETTE
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const struct segmentation *const seg = &cm->seg;
   PREDICTION_MODE this_mode;
@@ -10004,15 +10392,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
   unsigned char segment_id = mbmi->segment_id;
   int comp_pred, i, k;
   int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
   struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
   int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
-#if CONFIG_EXT_INTER
   int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
   int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
-#endif  // CONFIG_EXT_INTER
   static const int flag_list[TOTAL_REFS_PER_FRAME] = {
     0,
     AOM_LAST_FLAG,
@@ -10023,6 +10409,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     AOM_GOLD_FLAG,
 #if CONFIG_EXT_REFS
     AOM_BWD_FLAG,
+    AOM_ALT2_FLAG,
 #endif  // CONFIG_EXT_REFS
     AOM_ALT_FLAG
   };
@@ -10049,9 +10436,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
   int64_t dist_uvs[TX_SIZES_ALL];
   int skip_uvs[TX_SIZES_ALL];
   UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
-#if CONFIG_PALETTE
   PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
-#endif  // CONFIG_PALETTE
 #if CONFIG_EXT_INTRA
   int8_t uv_angle_delta[TX_SIZES_ALL];
   int is_directional_mode, angle_stats_ready = 0;
@@ -10063,14 +10448,14 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif  // CONFIG_FILTER_INTRA
   const int intra_cost_penalty = av1_get_intra_cost_penalty(
       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
-  const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
+  const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
   int best_skip2 = 0;
   uint16_t ref_frame_skip_mask[2] = { 0 };
   uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
   MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
   int64_t best_single_inter_rd = INT64_MAX;
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
   int mode_skip_start = sf->mode_skip_start + 1;
   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
   const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
@@ -10088,25 +10473,17 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     { NULL },
     { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
 #endif  // CONFIG_MOTION_VAR
-#if CONFIG_EXT_INTER
     NULL,
     NULL,
     NULL,
-#else   // CONFIG_EXT_INTER
-    NULL,
-#endif  // CONFIG_EXT_INTER
     { { 0 } },
   };
 
-#if CONFIG_PALETTE || CONFIG_EXT_INTRA
   const int rows = block_size_high[bsize];
   const int cols = block_size_wide[bsize];
-#endif  // CONFIG_PALETTE || CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
   int palette_ctx = 0;
   const MODE_INFO *above_mi = xd->above_mi;
   const MODE_INFO *left_mi = xd->left_mi;
-#endif  // CONFIG_PALETTE
 #if CONFIG_MOTION_VAR
   int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
   int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
@@ -10141,7 +10518,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 
   av1_zero(best_mbmode);
 
-#if CONFIG_PALETTE
   av1_zero(pmi_uv);
   if (try_palette) {
     if (above_mi)
@@ -10149,7 +10525,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     if (left_mi)
       palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
   }
-#endif  // CONFIG_PALETTE
 
   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
                            &comp_mode_p);
@@ -10168,16 +10543,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
   *returnrate_nocoef = INT_MAX;
 #endif  // CONFIG_SUPERTX
 
-#if CONFIG_SPEED_REFS
-  memset(x->mbmi_ext->ref_mvs, 0, sizeof(x->mbmi_ext->ref_mvs));
-#endif  // CONFIG_SPEED_REFS
-
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     x->pred_mv_sad[ref_frame] = INT_MAX;
     x->mbmi_ext->mode_context[ref_frame] = 0;
-#if CONFIG_EXT_INTER
     x->mbmi_ext->compound_mode_context[ref_frame] = 0;
-#endif  // CONFIG_EXT_INTER
     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
       assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
       setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
@@ -10188,12 +10557,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     frame_mv[ZEROMV][ref_frame].as_int =
         gm_get_motion_vector(&cm->global_motion[ref_frame],
                              cm->allow_high_precision_mv, bsize, mi_col, mi_row,
-                             0)
+                             0
+#if CONFIG_AMVR
+                             ,
+                             cm->cur_frame_mv_precision_level
+#endif
+                             )
             .as_int;
 #else   // CONFIG_GLOBAL_MOTION
     frame_mv[ZEROMV][ref_frame].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
-#if CONFIG_EXT_INTER
     frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
 #if CONFIG_COMPOUND_SINGLEREF
     frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
@@ -10203,12 +10576,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     frame_mv[ZERO_ZEROMV][ref_frame].as_int =
         gm_get_motion_vector(&cm->global_motion[ref_frame],
                              cm->allow_high_precision_mv, bsize, mi_col, mi_row,
-                             0)
+                             0
+#if CONFIG_AMVR
+                             ,
+                             cm->cur_frame_mv_precision_level
+#endif
+                             )
             .as_int;
 #else   // CONFIG_GLOBAL_MOTION
     frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
-#endif  // CONFIG_EXT_INTER
   }
 
   for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
@@ -10217,11 +10594,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     x->mbmi_ext->mode_context[ref_frame] = 0;
     av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
                      mbmi_ext->ref_mv_stack[ref_frame],
-#if CONFIG_EXT_INTER
-                     mbmi_ext->compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                     candidates, mi_row, mi_col, NULL, NULL,
-                     mbmi_ext->mode_context);
+                     mbmi_ext->compound_mode_context, candidates, mi_row,
+                     mi_col, NULL, NULL, mbmi_ext->mode_context);
     if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
       MV_REFERENCE_FRAME rf[2];
       av1_set_ref_frame(rf, ref_frame);
@@ -10257,25 +10631,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
-// Skip checking missing references in both single and compound reference
-// modes. Note that a mode will be skipped iff both reference frames
-// are masked out.
-#if CONFIG_EXT_COMP_REFS
+      // Skip checking missing references in both single and compound reference
+      // modes. Note that a mode will be skipped iff both reference frames
+      // are masked out.
       ref_frame_skip_mask[0] |= (1 << ref_frame);
       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
-#else  // !CONFIG_EXT_COMP_REFS
-#if CONFIG_EXT_REFS
-      if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
-        ref_frame_skip_mask[0] |= (1 << ref_frame);
-        ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
-      } else {
-#endif  // CONFIG_EXT_REFS
-        ref_frame_skip_mask[0] |= (1 << ref_frame);
-        ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
-#if CONFIG_EXT_REFS
-      }
-#endif  // CONFIG_EXT_REFS
-#endif  // CONFIG_EXT_COMP_REFS
     } else {
       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
         // Skip fixed mv modes for poor references
@@ -10307,7 +10667,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
 #if CONFIG_EXT_REFS
                                (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
-                               (1 << BWDREF_FRAME) |
+                               (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) |
 #endif  // CONFIG_EXT_REFS
                                (1 << GOLDEN_FRAME);
       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
@@ -10317,7 +10677,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #if CONFIG_GLOBAL_MOTION
       zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
                                            cm->allow_high_precision_mv, bsize,
-                                           mi_col, mi_row, 0)
+                                           mi_col, mi_row, 0
+#if CONFIG_AMVR
+                                           ,
+                                           cm->cur_frame_mv_precision_level
+#endif
+                                           )
                           .as_int;
 #else
       zeromv.as_int = 0;
@@ -10326,7 +10691,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
-#if CONFIG_EXT_INTER
       if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
       if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
@@ -10337,7 +10701,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
               zeromv.as_int)
         mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV);
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#endif  // CONFIG_EXT_INTER
     }
   }
 
@@ -10400,11 +10763,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #if CONFIG_PVQ
   od_encode_checkpoint(&x->daala_enc, &pre_buf);
 #endif  // CONFIG_PVQ
-#if CONFIG_EXT_INTER
   for (i = 0; i < MB_MODE_COUNT; ++i)
     for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
       modelled_rd[i][ref_frame] = INT64_MAX;
-#endif  // CONFIG_EXT_INTER
 
   for (midx = 0; midx < MAX_MODES; ++midx) {
     int mode_index;
@@ -10414,10 +10775,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     int compmode_cost = 0;
     int rate2 = 0, rate_y = 0, rate_uv = 0;
     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-    int64_t distortion2_y = 0;
-    int64_t total_sse_y = INT64_MAX;
-#endif
     int skippable = 0;
     int this_skip2 = 0;
     int64_t total_sse = INT64_MAX;
@@ -10431,7 +10788,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
     mbmi->ref_mv_idx = 0;
 
-#if CONFIG_EXT_INTER
     if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
       // Mode must by compatible
       if (!is_interintra_allowed_mode(this_mode)) continue;
@@ -10451,7 +10807,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
           frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int;
 #endif  // CONFIG_COMPOUND_SINGLEREF
     }
-#endif  // CONFIG_EXT_INTER
 
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
@@ -10481,6 +10836,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
           ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
           break;
+        case ALTREF2_FRAME:
+          ref_frame_skip_mask[0] |= ALTREF2_FRAME_MODE_MASK;
+          ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+          break;
 #endif  // CONFIG_EXT_REFS
         case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
 #if CONFIG_EXT_REFS
@@ -10537,7 +10896,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     // This is only used in motion vector unit test.
     if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
 
-#if CONFIG_ONE_SIDED_COMPOUND  // Changes LL bitstream
+#if CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS  // Changes LL bitstream
 #if CONFIG_EXT_REFS
     if (cpi->oxcf.pass == 0) {
       // Complexity-compression trade-offs
@@ -10546,8 +10905,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       if (second_ref_frame == ALTREF_FRAME) continue;
       // if (second_ref_frame == BWDREF_FRAME) continue;
     }
-#endif
-#endif
+#endif  // CONFIG_EXT_REFS
+#endif  // CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
     comp_pred = second_ref_frame > INTRA_FRAME;
     if (comp_pred) {
       if (!cpi->allow_comp_inter_inter) continue;
@@ -10601,12 +10960,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     } else {
 #endif  // CONFIG_GLOBAL_MOTION
       const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
-      if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
-#if CONFIG_EXT_INTER
-                              mbmi_ext->compound_mode_context,
-#endif  // CONFIG_EXT_INTER
-                              frame_mv, this_mode, ref_frames, bsize, -1,
-                              mi_row, mi_col))
+      if (!check_best_zero_mv(cpi, x, mbmi_ext->mode_context,
+                              mbmi_ext->compound_mode_context, frame_mv,
+                              this_mode, ref_frames, bsize, -1, mi_row, mi_col))
         continue;
     }
 
@@ -10614,10 +10970,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     mbmi->uv_mode = UV_DC_PRED;
     mbmi->ref_frame[0] = ref_frame;
     mbmi->ref_frame[1] = second_ref_frame;
-#if CONFIG_PALETTE
     pmi->palette_size[0] = 0;
     pmi->palette_size[1] = 0;
-#endif  // CONFIG_PALETTE
 #if CONFIG_FILTER_INTRA
     mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
     mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
@@ -10639,18 +10993,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
     }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     // Single ref compound mode
     if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) {
       xd->block_refs[1] = xd->block_refs[0];
       for (i = 0; i < MAX_MB_PLANE; i++)
         xd->plane[i].pre[1] = xd->plane[i].pre[0];
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
     mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
 
     if (ref_frame == INTRA_FRAME) {
       RD_STATS rd_stats_y;
@@ -10699,12 +11053,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
                               [pd->subsampling_y];
       if (rate_uv_intra[uv_tx] == INT_MAX) {
-        choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+        choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
                              &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
                              &skip_uvs[uv_tx], &mode_uv[uv_tx]);
-#if CONFIG_PALETTE
         if (try_palette) pmi_uv[uv_tx] = *pmi;
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_EXT_INTRA
         uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
@@ -10718,14 +11070,12 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       distortion_uv = dist_uvs[uv_tx];
       skippable = skippable && skip_uvs[uv_tx];
       mbmi->uv_mode = mode_uv[uv_tx];
-#if CONFIG_PALETTE
       if (try_palette) {
         pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
         memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
                pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
                2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
       }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_EXT_INTRA
       mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
@@ -10742,20 +11092,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #if CONFIG_CB4X4
       rate2 = rate_y + intra_mode_cost[mbmi->mode];
       if (!x->skip_chroma_rd)
-        rate2 += rate_uv + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+        rate2 += rate_uv + x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
 #else
       rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
-              cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+              x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
 #endif  // CONFIG_CB4X4
 
-#if CONFIG_PALETTE
       if (try_palette && mbmi->mode == DC_PRED) {
         rate2 += av1_cost_bit(
             av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
       }
-#endif  // CONFIG_PALETTE
 
-      if (!xd->lossless[mbmi->segment_id] && bsize >= BLOCK_8X8) {
+      if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
         // super_block_yrd above includes the cost of the tx_size in the
         // tokenonly rate, but for intra blocks, tx_size is always coded
         // (prediction granularity), so we account for it in the full rate,
@@ -10769,14 +11117,14 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         const int p_angle =
             mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
         if (av1_is_intra_filter_switchable(p_angle))
-          rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
+          rate2 += x->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
 #endif  // CONFIG_INTRA_INTERP
         if (av1_use_angle_delta(bsize)) {
           rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                       MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
         }
       }
-      if (av1_is_directional_mode(mbmi->uv_mode, bsize) &&
+      if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
           av1_use_angle_delta(bsize)) {
         rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
                                     MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
@@ -10806,19 +11154,15 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
-#endif
     } else {
       int_mv backup_ref_mv[2];
 
-#if !SUB8X8_COMP_REF
-      if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
-#endif  // !SUB8X8_COMP_REF
+      if (!is_comp_ref_allowed(bsize) && mbmi->ref_frame[1] > INTRA_FRAME)
+        continue;
 
       backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
       if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
       if (second_ref_frame == INTRA_FRAME) {
         if (best_single_inter_ref != ref_frame) continue;
         mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
@@ -10836,11 +11180,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
 #endif  // CONFIG_FILTER_INTRA
       }
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
       mbmi->ref_mv_idx = 0;
       ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
 
-#if CONFIG_EXT_INTER
       if (comp_pred) {
         if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
           int ref_mv_idx = 0;
@@ -10887,7 +11230,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         }
 #endif  // CONFIG_COMPOUND_SINGLEREF
       } else {
-#endif  // CONFIG_EXT_INTER
         if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
           int ref;
           for (ref = 0; ref < 1 + comp_pred; ++ref) {
@@ -10899,38 +11241,21 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
             mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
           }
         }
-#if CONFIG_EXT_INTER
       }
-#endif  // CONFIG_EXT_INTER
       {
         RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
         av1_init_rd_stats(&rd_stats);
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        // While av1 master uses rd_stats_y.rate through out the codebase,
-        // which is set when handle_inter_moden is called, the daala-dist code
-        // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
-        // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
-        // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
-        // initialized when rd_stats.skip = 1,
-        // then instead rd_stats.dist and rd_stats.sse have the
-        // combined luma and chroma dist and sse.
-        // This can be seen inside motion_mode_rd(), which is called by
-        // handle_inter_mode().
-        if (bsize < BLOCK_8X8) av1_init_rd_stats(&rd_stats_y);
-#endif
         rd_stats.rate = rate2;
 
         // Point to variables that are maintained between loop iterations
         args.single_newmv = single_newmv;
-#if CONFIG_EXT_INTER
         args.single_newmv_rate = single_newmv_rate;
         args.modelled_rd = modelled_rd;
-#endif  // CONFIG_EXT_INTER
         this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
                                     &rd_stats_uv, &disable_skip, frame_mv,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                                     frame_comp_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                                     mi_row, mi_col, &args, best_rd);
 
         rate2 = rd_stats.rate;
@@ -10939,21 +11264,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         total_sse = rd_stats.sse;
         rate_y = rd_stats_y.rate;
         rate_uv = rd_stats_uv.rate;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        if (bsize < BLOCK_8X8) {
-          if (rd_stats_y.rate != INT_MAX) {
-            assert(rd_stats_y.sse < INT64_MAX);
-            assert(rd_stats_y.dist < INT64_MAX);
-          }
-          total_sse_y = rd_stats_y.sse;
-          distortion2_y = rd_stats_y.dist;
-        }
-#endif
       }
 
 // TODO(jingning): This needs some refactoring to improve code quality
 // and reduce redundant steps.
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
       if ((have_nearmv_in_inter_mode(mbmi->mode) &&
            mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
@@ -10966,11 +11280,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
           ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
            mbmi_ext->ref_mv_count[ref_frame_type] > 1))
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-      if ((mbmi->mode == NEARMV &&
-           mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
-          (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1))
-#endif  // CONFIG_EXT_INTER
       {
         int_mv backup_mv = frame_mv[NEARMV][ref_frame];
         MB_MODE_INFO backup_mbmi = *mbmi;
@@ -10978,12 +11287,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         int64_t tmp_ref_rd = this_rd;
         int ref_idx;
 
-// TODO(jingning): This should be deprecated shortly.
-#if CONFIG_EXT_INTER
+        // TODO(jingning): This should be deprecated shortly.
         int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
-#else
-        int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
-#endif  // CONFIG_EXT_INTER
         int ref_set =
             AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
 
@@ -10994,7 +11299,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         backup_fmv[0] = frame_mv[NEWMV][ref_frame];
         if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
 
-        rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
+        rate2 += (rate2 < INT_MAX ? x->drl_mode_cost0[drl_ctx][0] : 0);
 
         if (this_rd < INT64_MAX) {
           if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
@@ -11003,10 +11308,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
                 x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
                 distortion2);
           else
-            tmp_ref_rd = RDCOST(
-                x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
-                               rate_y - rate_uv,
-                total_sse);
+            tmp_ref_rd =
+                RDCOST(x->rdmult,
+                       rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+                           rate_y - rate_uv,
+                       total_sse);
         }
 #if CONFIG_VAR_TX
         for (i = 0; i < MAX_MB_PLANE; ++i)
@@ -11027,7 +11333,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 
           mbmi->ref_mv_idx = 1 + ref_idx;
 
-#if CONFIG_EXT_INTER
           if (comp_pred) {
             int ref_mv_idx = mbmi->ref_mv_idx;
             // Special case: NEAR_NEWMV and NEW_NEARMV modes use
@@ -11092,7 +11397,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
             }
 #endif  // CONFIG_COMPOUND_SINGLEREF
           } else {
-#endif  // CONFIG_EXT_INTER
             for (ref = 0; ref < 1 + comp_pred; ++ref) {
               int_mv this_mv =
                   (ref == 0)
@@ -11104,9 +11408,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
                            xd->n8_h << MI_SIZE_LOG2, xd);
               mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
             }
-#if CONFIG_EXT_INTER
           }
-#endif
 
           cur_mv =
               mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
@@ -11115,39 +11417,25 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 
           if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
             int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
-#if CONFIG_EXT_INTER
             int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
-#endif  // CONFIG_EXT_INTER
 
             frame_mv[NEARMV][ref_frame] = cur_mv;
             av1_init_rd_stats(&tmp_rd_stats);
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-            // With the same reason as 'rd_stats_y' passed to above
-            // handle_inter_mode(), tmp_rd_stats_y.dist and
-            // tmp_rd_stats_y.sse are sometimes not initialized, esp. when
-            // tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
-            // represent combined luma and chroma .dist and .sse,
-            // we should initialized tmp_rd_stats_y.
-            if (bsize < BLOCK_8X8) av1_init_rd_stats(&tmp_rd_stats_y);
-#endif
+
             // Point to variables that are not maintained between iterations
             args.single_newmv = dummy_single_newmv;
-#if CONFIG_EXT_INTER
             args.single_newmv_rate = dummy_single_newmv_rate;
             args.modelled_rd = NULL;
-#endif  // CONFIG_EXT_INTER
             tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats,
                                            &tmp_rd_stats_y, &tmp_rd_stats_uv,
                                            &dummy_disable_skip, frame_mv,
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
                                            frame_comp_mv,
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
                                            mi_row, mi_col, &args, best_rd);
             // Prevent pointers from escaping local scope
             args.single_newmv = NULL;
-#if CONFIG_EXT_INTER
             args.single_newmv_rate = NULL;
-#endif  // CONFIG_EXT_INTER
           }
 
           for (i = 0; i < mbmi->ref_mv_idx; ++i) {
@@ -11155,7 +11443,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
             drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
                                    i + idx_offset);
             tmp_rd_stats.rate +=
-                (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][1]
+                (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][1]
                                              : 0);
           }
 
@@ -11166,7 +11454,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
                 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
                             mbmi->ref_mv_idx + idx_offset);
             tmp_rd_stats.rate +=
-                (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][0]
+                (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][0]
                                              : 0);
           }
 
@@ -11178,16 +11466,18 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
             if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
                        tmp_rd_stats.dist) <
                 RDCOST(x->rdmult, 0, tmp_rd_stats.sse))
-              tmp_alt_rd = RDCOST(
-                  x->rdmult, tmp_rd_stats.rate +
-                                 av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
-                  tmp_rd_stats.dist);
+              tmp_alt_rd =
+                  RDCOST(x->rdmult,
+                         tmp_rd_stats.rate +
+                             av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
+                         tmp_rd_stats.dist);
             else
-              tmp_alt_rd = RDCOST(
-                  x->rdmult, tmp_rd_stats.rate +
-                                 av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
-                                 tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
-                  tmp_rd_stats.sse);
+              tmp_alt_rd =
+                  RDCOST(x->rdmult,
+                         tmp_rd_stats.rate +
+                             av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
+                             tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
+                         tmp_rd_stats.sse);
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
           }
 
@@ -11203,16 +11493,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
             tmp_ref_rd = tmp_alt_rd;
             backup_mbmi = *mbmi;
             backup_skip = x->skip;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-            if (bsize < BLOCK_8X8) {
-              if (tmp_rd_stats_y.rate != INT_MAX) {
-                assert(tmp_rd_stats_y.sse < INT64_MAX);
-                assert(tmp_rd_stats_y.dist < INT64_MAX);
-              }
-              total_sse_y = tmp_rd_stats_y.sse;
-              distortion2_y = tmp_rd_stats_y.dist;
-            }
-#endif
 #if CONFIG_VAR_TX
             for (i = 0; i < MAX_MB_PLANE; ++i)
               memcpy(x->blk_skip_drl[i], x->blk_skip[i],
@@ -11238,12 +11518,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 
       if (this_rd == INT64_MAX) continue;
 
-#if SUB8X8_COMP_REF
-      compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
-#else
-      if (mbmi->sb_type != BLOCK_4X4)
+      if (is_comp_ref_allowed(mbmi->sb_type))
         compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
-#endif  // SUB8X8_COMP_REF
 
       if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
     }
@@ -11263,14 +11539,14 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       rate2 += ref_costs_single[ref_frame];
     }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     // Add the cost to signal single/comp mode in single ref.
     if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) {
       aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd);
       rate2 += av1_cost_bit(singleref_comp_mode_p,
                             is_inter_singleref_comp_mode(mbmi->mode));
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
     if (ref_frame == INTRA_FRAME)
@@ -11299,12 +11575,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
           this_skip2 = 1;
           rate_y = 0;
           rate_uv = 0;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-          if (bsize < BLOCK_8X8) {
-            assert(total_sse_y < INT64_MAX);
-            distortion2_y = total_sse_y;
-          }
-#endif
         }
       } else {
         // Add in the cost of the no skip flag.
@@ -11324,25 +11594,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
     }
 
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-    if ((bsize < BLOCK_8X8) && (rate2 != INT_MAX)) {
-      assert(distortion2_y < INT64_MAX);
-    }
-#endif
-
     if (ref_frame == INTRA_FRAME) {
       // Keep record of best intra rd
       if (this_rd < best_intra_rd) {
         best_intra_rd = this_rd;
         best_intra_mode = mbmi->mode;
       }
-#if CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#if CONFIG_INTERINTRA
     } else if (second_ref_frame == NONE_FRAME) {
       if (this_rd < best_single_inter_rd) {
         best_single_inter_rd = this_rd;
         best_single_inter_ref = mbmi->ref_frame[0];
       }
-#endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
+#endif  // CONFIG_INTERINTRA
     }
 
     if (!disable_skip && ref_frame == INTRA_FRAME) {
@@ -11388,12 +11652,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif
             mi);
         if (motion_allowed == WARPED_CAUSAL)
-          *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+          *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
         else if (motion_allowed == OBMC_CAUSAL)
-          *returnrate_nocoef -=
-              cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
+          *returnrate_nocoef -= x->motion_mode_cost1[bsize][mbmi->motion_mode];
 #else
-        *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+        *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #endif  // CONFIG_SUPERTX
@@ -11406,12 +11669,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
                                             this_skip2 || skippable);
         best_rate_uv = rate_uv;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-        if (bsize < BLOCK_8X8) {
-          assert(distortion2_y < INT64_MAX);
-          rd_cost->dist_y = distortion2_y;
-        }
-#endif
 #if CONFIG_VAR_TX
         for (i = 0; i < MAX_MB_PLANE; ++i)
           memcpy(ctx->blk_skip[i], x->blk_skip[i],
@@ -11419,11 +11676,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif  // CONFIG_VAR_TX
       }
     }
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-    if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
-      assert(rd_cost->dist_y < INT64_MAX);
-    }
-#endif
+
     /* keep record of best compound/single-only prediction */
     if (!disable_skip && ref_frame != INTRA_FRAME) {
       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
@@ -11475,14 +11728,14 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
         xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
     }
 
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
     // Single ref compound mode
     if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) {
       xd->block_refs[1] = xd->block_refs[0];
       for (i = 0; i < MAX_MB_PLANE; i++)
         xd->plane[i].pre[1] = xd->plane[i].pre[0];
     }
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#endif  // CONFIG_COMPOUND_SINGLEREF
 
     if (is_inter_mode(mbmi->mode)) {
       av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
@@ -11497,6 +11750,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #if CONFIG_VAR_TX
       if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
         select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+        assert(rd_stats_y.rate != INT_MAX);
       } else {
         int idx, idy;
         super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
@@ -11538,6 +11792,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif  // CONFIG_VAR_TX
       best_mbmode.tx_type = mbmi->tx_type;
       best_mbmode.tx_size = mbmi->tx_size;
+#if CONFIG_LGT_FROM_PRED
+      best_mbmode.use_lgt = mbmi->use_lgt;
+#endif
 #if CONFIG_VAR_TX
       for (idy = 0; idy < xd->n8_h; ++idy)
         for (idx = 0; idx < xd->n8_w; ++idx)
@@ -11554,23 +11811,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
       rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
       rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
       best_skip2 = skip_blk;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-      if (bsize < BLOCK_8X8) {
-        assert(rd_cost->rate != INT_MAX);
-        assert(rd_cost->dist_y < INT64_MAX);
-        rd_cost->dist_y = rd_stats_y.dist;
-      }
-#endif
     }
   }
 
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  if ((bsize < BLOCK_8X8) && (rd_cost->rate != INT_MAX)) {
-    assert(rd_cost->dist_y < INT64_MAX);
-  }
-#endif
-
-#if CONFIG_PALETTE
   // Only try palette mode when the best mode so far is an intra mode.
   if (try_palette && !is_inter_mode(best_mbmode.mode)) {
     int rate2 = 0;
@@ -11603,7 +11846,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
                             [xd->plane[1].subsampling_y];
     if (rate_uv_intra[uv_tx] == INT_MAX) {
-      choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+      choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
                            &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
                            &skip_uvs[uv_tx], &mode_uv[uv_tx]);
       pmi_uv[uv_tx] = *pmi;
@@ -11666,28 +11909,21 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
     }
   }
 PALETTE_EXIT:
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
   // TODO(huisu): filter-intra is turned off in lossless mode for now to
   // avoid a unit test failure
-  if (!xd->lossless[mbmi->segment_id] &&
-#if CONFIG_PALETTE
-      pmi->palette_size[0] == 0 &&
-#endif  // CONFIG_PALETTE
+  if (!xd->lossless[mbmi->segment_id] && pmi->palette_size[0] == 0 &&
       !dc_skipped && best_mode_index >= 0 &&
       best_intra_rd < (best_rd + (best_rd >> 3))) {
     pick_filter_intra_interframe(
-        cpi, x, ctx, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
+        cpi, x, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
         dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
 #if CONFIG_EXT_INTRA
         uv_angle_delta,
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_PALETTE
-        pmi_uv, palette_ctx,
-#endif  // CONFIG_PALETTE
-        0, ref_costs_single, &best_rd, &best_intra_rd, &best_intra_mode,
-        &best_mode_index, &best_skip2, &best_mode_skippable,
+        pmi_uv, palette_ctx, 0, ref_costs_single, &best_rd, &best_intra_rd,
+        &best_intra_mode, &best_mode_index, &best_skip2, &best_mode_skippable,
 #if CONFIG_SUPERTX
         returnrate_nocoef,
 #endif  // CONFIG_SUPERTX
@@ -11699,15 +11935,11 @@ PALETTE_EXIT:
 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
 // ZEROMV. Here, checks are added for those cases, and the mode decisions
 // are corrected.
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
 // NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref
 //       are surely different from each other.
-#endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
-  if (best_mbmode.mode == NEWMV
-#if CONFIG_EXT_INTER
-      || best_mbmode.mode == NEW_NEWMV
-#endif  // CONFIG_EXT_INTER
-      ) {
+#endif  // CONFIG_COMPOUND_SINGLEREF
+  if (best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV) {
     const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
                                          best_mbmode.ref_frame[1] };
     int comp_pred_mode = refs[1] > INTRA_FRAME;
@@ -11716,14 +11948,25 @@ PALETTE_EXIT:
 #if CONFIG_GLOBAL_MOTION
     zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
                                             cm->allow_high_precision_mv, bsize,
-                                            mi_col, mi_row, 0)
+                                            mi_col, mi_row, 0
+#if CONFIG_AMVR
+                                            ,
+                                            cm->cur_frame_mv_precision_level
+#endif
+                                            )
                            .as_int;
-    zeromv[1].as_int = comp_pred_mode
-                           ? gm_get_motion_vector(&cm->global_motion[refs[1]],
-                                                  cm->allow_high_precision_mv,
-                                                  bsize, mi_col, mi_row, 0)
-                                 .as_int
-                           : 0;
+    zeromv[1].as_int =
+        comp_pred_mode
+            ? gm_get_motion_vector(&cm->global_motion[refs[1]],
+                                   cm->allow_high_precision_mv, bsize, mi_col,
+                                   mi_row, 0
+#if CONFIG_AMVR
+                                   ,
+                                   cm->cur_frame_mv_precision_level
+#endif
+                                   )
+                  .as_int
+            : 0;
 #else
     zeromv[0].as_int = 0;
     zeromv[1].as_int = 0;
@@ -11749,7 +11992,6 @@ PALETTE_EXIT:
       int_mv nearestmv[2];
       int_mv nearmv[2];
 
-#if CONFIG_EXT_INTER
       if (mbmi_ext->ref_mv_count[rf_type] > 1) {
         nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
         nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
@@ -11757,22 +11999,6 @@ PALETTE_EXIT:
         nearmv[0] = frame_mv[NEARMV][refs[0]];
         nearmv[1] = frame_mv[NEARMV][refs[1]];
       }
-#else
-      int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
-                        ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
-                        : INT_MAX;
-
-      for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
-        nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
-        nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
-
-        if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
-            nearmv[1].as_int == best_mbmode.mv[1].as_int) {
-          best_mbmode.mode = NEARMV;
-          best_mbmode.ref_mv_idx = i;
-        }
-      }
-#endif  // CONFIG_EXT_INTER
       if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
         nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
         nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
@@ -11782,9 +12008,7 @@ PALETTE_EXIT:
       }
 
       if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
-          nearestmv[1].as_int == best_mbmode.mv[1].as_int)
-#if CONFIG_EXT_INTER
-      {
+          nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
         best_mbmode.mode = NEAREST_NEARESTMV;
       } else {
         int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
@@ -11808,21 +12032,12 @@ PALETTE_EXIT:
             best_mbmode.mv[1].as_int == zeromv[1].as_int)
           best_mbmode.mode = ZERO_ZEROMV;
       }
-#else
-      {
-        best_mbmode.mode = NEARESTMV;
-      } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
-                 best_mbmode.mv[1].as_int == zeromv[1].as_int) {
-        best_mbmode.mode = ZEROMV;
-      }
-#endif  // CONFIG_EXT_INTER
     }
   }
 
   // Make sure that the ref_mv_idx is only nonzero when we're
   // using a mode which can support ref_mv_idx
   if (best_mbmode.ref_mv_idx != 0 &&
-#if CONFIG_EXT_INTER
 #if CONFIG_COMPOUND_SINGLEREF
       !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV ||
         best_mbmode.mode == NEW_NEWMV ||
@@ -11831,45 +12046,31 @@ PALETTE_EXIT:
       !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
         have_nearmv_in_inter_mode(best_mbmode.mode)))
 #endif  // CONFIG_COMPOUND_SINGLEREF
-#else   // !CONFIG_EXT_INTER
-      !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV))
-#endif  // CONFIG_EXT_INTER
   {
     best_mbmode.ref_mv_idx = 0;
   }
 
-  {
+  if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
+      best_mbmode.ref_frame[1] <= INTRA_FRAME) {
     int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
     int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
     if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
-      int_mv zeromv[2];
+      int_mv zeromv;
 #if CONFIG_GLOBAL_MOTION
-      const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
-                                           best_mbmode.ref_frame[1] };
-      zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
-                                              cm->allow_high_precision_mv,
-                                              bsize, mi_col, mi_row, 0)
-                             .as_int;
-      zeromv[1].as_int = (refs[1] != NONE_FRAME)
-                             ? gm_get_motion_vector(&cm->global_motion[refs[1]],
-                                                    cm->allow_high_precision_mv,
-                                                    bsize, mi_col, mi_row, 0)
-                                   .as_int
-                             : 0;
-      lower_mv_precision(&zeromv[0].as_mv, cm->allow_high_precision_mv);
-      lower_mv_precision(&zeromv[1].as_mv, cm->allow_high_precision_mv);
+      const MV_REFERENCE_FRAME ref = best_mbmode.ref_frame[0];
+      zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
+                                           cm->allow_high_precision_mv, bsize,
+                                           mi_col, mi_row, 0
+#if CONFIG_AMVR
+                                           ,
+                                           cm->cur_frame_mv_precision_level
+#endif
+                                           )
+                          .as_int;
 #else
-      zeromv[0].as_int = zeromv[1].as_int = 0;
+      zeromv.as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
-      if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
-          best_mbmode.mv[0].as_int == zeromv[0].as_int &&
-#if CONFIG_EXT_INTER
-          (best_mbmode.ref_frame[1] <= INTRA_FRAME)
-#else
-          (best_mbmode.ref_frame[1] == NONE_FRAME ||
-           best_mbmode.mv[1].as_int == zeromv[1].as_int)
-#endif  // CONFIG_EXT_INTER
-              ) {
+      if (best_mbmode.mv[0].as_int == zeromv.as_int) {
         best_mbmode.mode = ZEROMV;
       }
     }
@@ -11881,24 +12082,14 @@ PALETTE_EXIT:
     return;
   }
 
-#if CONFIG_DUAL_FILTER
   assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter == best_mbmode.interp_filter[0]) ||
+         (cm->interp_filter ==
+          av1_extract_interp_filter(best_mbmode.interp_filters, 0)) ||
          !is_inter_block(&best_mbmode));
+#if CONFIG_DUAL_FILTER
   assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter == best_mbmode.interp_filter[1]) ||
-         !is_inter_block(&best_mbmode));
-  if (best_mbmode.ref_frame[1] > INTRA_FRAME) {
-    assert((cm->interp_filter == SWITCHABLE) ||
-           (cm->interp_filter == best_mbmode.interp_filter[2]) ||
-           !is_inter_block(&best_mbmode));
-    assert((cm->interp_filter == SWITCHABLE) ||
-           (cm->interp_filter == best_mbmode.interp_filter[3]) ||
-           !is_inter_block(&best_mbmode));
-  }
-#else
-  assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter == best_mbmode.interp_filter) ||
+         (cm->interp_filter ==
+          av1_extract_interp_filter(best_mbmode.interp_filters, 1)) ||
          !is_inter_block(&best_mbmode));
 #endif  // CONFIG_DUAL_FILTER
 
@@ -11913,11 +12104,7 @@ PALETTE_EXIT:
 // Note: this section is needed since the mode may have been forced to
 // ZEROMV by the all-zero mode handling of ref-mv.
 #if CONFIG_GLOBAL_MOTION
-  if (mbmi->mode == ZEROMV
-#if CONFIG_EXT_INTER
-      || mbmi->mode == ZERO_ZEROMV
-#endif  // CONFIG_EXT_INTER
-      ) {
+  if (mbmi->mode == ZEROMV || mbmi->mode == ZERO_ZEROMV) {
 #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
     // Correct the motion mode for ZEROMV
     const MOTION_MODE last_motion_mode_allowed =
@@ -11932,17 +12119,8 @@ PALETTE_EXIT:
 
     // Correct the interpolation filter for ZEROMV
     if (is_nontrans_global_motion(xd)) {
-#if CONFIG_DUAL_FILTER
-      mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
-                                   ? EIGHTTAP_REGULAR
-                                   : cm->interp_filter;
-      mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
-                                   ? EIGHTTAP_REGULAR
-                                   : cm->interp_filter;
-#else
-      mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
-                                                            : cm->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+      mbmi->interp_filters = av1_broadcast_interp_filter(
+          av1_unswitchable_filter(cm->interp_filter));
     }
   }
 #endif  // CONFIG_GLOBAL_MOTION
@@ -11968,11 +12146,10 @@ PALETTE_EXIT:
   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
                        best_mode_skippable);
 
-#if CONFIG_PALETTE
-  if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
+  if (pmi->palette_size[1] > 0) {
+    assert(try_palette);
     restore_uv_color_map(cpi, x);
   }
-#endif  // CONFIG_PALETTE
 }
 
 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
@@ -12013,10 +12190,8 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
 
   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
 
-#if CONFIG_PALETTE
   mbmi->palette_mode_info.palette_size[0] = 0;
   mbmi->palette_mode_info.palette_size[1] = 0;
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_FILTER_INTRA
   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
@@ -12030,8 +12205,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
 #if CONFIG_GLOBAL_MOTION
   mbmi->mv[0].as_int =
       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
-                           cm->allow_high_precision_mv, bsize, mi_col, mi_row,
-                           0)
+                           cm->allow_high_precision_mv, bsize, mi_col, mi_row, 0
+#if CONFIG_AMVR
+                           ,
+                           cm->cur_frame_mv_precision_level
+#endif
+                           )
           .as_int;
 #else   // CONFIG_GLOBAL_MOTION
   mbmi->mv[0].as_int = 0;
@@ -12041,6 +12220,9 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
 
   mbmi->ref_mv_idx = 0;
   mbmi->pred_mv[0].as_int = 0;
+#if CONFIG_LGT_FROM_PRED
+  mbmi->use_lgt = 0;
+#endif
 
   mbmi->motion_mode = SIMPLE_TRANSLATION;
 #if CONFIG_MOTION_VAR
@@ -12074,31 +12256,18 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
       int rs;
       int best_rs = INT_MAX;
       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
-#if CONFIG_DUAL_FILTER
-        int k;
-        for (k = 0; k < 4; ++k) mbmi->interp_filter[k] = i;
-#else
-        mbmi->interp_filter = i;
-#endif  // CONFIG_DUAL_FILTER
-        rs = av1_get_switchable_rate(cpi, xd);
+        mbmi->interp_filters = av1_broadcast_interp_filter(i);
+        rs = av1_get_switchable_rate(cm, x, xd);
         if (rs < best_rs) {
           best_rs = rs;
-#if CONFIG_DUAL_FILTER
-          best_filter = mbmi->interp_filter[0];
-#else
-          best_filter = mbmi->interp_filter;
-#endif  // CONFIG_DUAL_FILTER
+          best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
         }
       }
     }
   }
-// Set the appropriate filter
-#if CONFIG_DUAL_FILTER
-  for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = best_filter;
-#else
-  mbmi->interp_filter = best_filter;
-#endif  // CONFIG_DUAL_FILTER
-  rate2 += av1_get_switchable_rate(cpi, xd);
+  // Set the appropriate filter
+  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
+  rate2 += av1_get_switchable_rate(cm, x, xd);
 
   if (cm->reference_mode == REFERENCE_MODE_SELECT)
     rate2 += av1_cost_bit(comp_mode_p, comp_pred);
@@ -12111,22 +12280,16 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
   rd_cost->rate = rate2;
   rd_cost->dist = distortion2;
   rd_cost->rdcost = this_rd;
-#if CONFIG_DIST_8X8 && CONFIG_CB4X4
-  if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
-#endif
+
   if (this_rd >= best_rd_so_far) {
     rd_cost->rate = INT_MAX;
     rd_cost->rdcost = INT64_MAX;
     return;
   }
 
-#if CONFIG_DUAL_FILTER
   assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter == mbmi->interp_filter[0]));
-#else
-  assert((cm->interp_filter == SWITCHABLE) ||
-         (cm->interp_filter == mbmi->interp_filter));
-#endif  // CONFIG_DUAL_FILTER
+         (cm->interp_filter ==
+          av1_extract_interp_filter(mbmi->interp_filters, 0)));
 
   av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
                             cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
@@ -12137,6 +12300,124 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
 }
 
 #if CONFIG_MOTION_VAR
+
+struct calc_target_weighted_pred_ctxt {
+  const MACROBLOCK *x;
+  const uint8_t *tmp;
+  int tmp_stride;
+  int overlap;
+};
+
+static INLINE void calc_target_weighted_pred_above(MACROBLOCKD *xd,
+                                                   int rel_mi_col,
+                                                   uint8_t nb_mi_width,
+                                                   MODE_INFO *nb_mi,
+                                                   void *fun_ctxt) {
+  (void)nb_mi;
+
+  struct calc_target_weighted_pred_ctxt *ctxt =
+      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
+
+#if CONFIG_HIGHBITDEPTH
+  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#else
+  const int is_hbd = 0;
+#endif  // CONFIG_HIGHBITDEPTH
+
+  const int bw = xd->n8_w << MI_SIZE_LOG2;
+  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
+
+  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
+  int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
+  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
+
+  if (!is_hbd) {
+    for (int row = 0; row < ctxt->overlap; ++row) {
+      const uint8_t m0 = mask1d[row];
+      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
+      for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
+        wsrc[col] = m1 * tmp[col];
+        mask[col] = m0;
+      }
+      wsrc += bw;
+      mask += bw;
+      tmp += ctxt->tmp_stride;
+    }
+#if CONFIG_HIGHBITDEPTH
+  } else {
+    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+    for (int row = 0; row < ctxt->overlap; ++row) {
+      const uint8_t m0 = mask1d[row];
+      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
+      for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
+        wsrc[col] = m1 * tmp16[col];
+        mask[col] = m0;
+      }
+      wsrc += bw;
+      mask += bw;
+      tmp16 += ctxt->tmp_stride;
+    }
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+}
+
+static INLINE void calc_target_weighted_pred_left(MACROBLOCKD *xd,
+                                                  int rel_mi_row,
+                                                  uint8_t nb_mi_height,
+                                                  MODE_INFO *nb_mi,
+                                                  void *fun_ctxt) {
+  (void)nb_mi;
+
+  struct calc_target_weighted_pred_ctxt *ctxt =
+      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
+
+#if CONFIG_HIGHBITDEPTH
+  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#else
+  const int is_hbd = 0;
+#endif  // CONFIG_HIGHBITDEPTH
+
+  const int bw = xd->n8_w << MI_SIZE_LOG2;
+  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
+
+  int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
+  int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
+  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
+
+  if (!is_hbd) {
+    for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
+      for (int col = 0; col < ctxt->overlap; ++col) {
+        const uint8_t m0 = mask1d[col];
+        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
+        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
+                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
+        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
+      }
+      wsrc += bw;
+      mask += bw;
+      tmp += ctxt->tmp_stride;
+    }
+#if CONFIG_HIGHBITDEPTH
+  } else {
+    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+    for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
+      for (int col = 0; col < ctxt->overlap; ++col) {
+        const uint8_t m0 = mask1d[col];
+        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
+        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
+                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
+        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
+      }
+      wsrc += bw;
+      mask += bw;
+      tmp16 += ctxt->tmp_stride;
+    }
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+}
+
 // This function has a structure similar to av1_build_obmc_inter_prediction
 //
 // The OBMC predictor is computed as:
@@ -12181,13 +12462,11 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
                                       int above_stride, const uint8_t *left,
                                       int left_stride) {
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
-  int row, col, i;
   const int bw = xd->n8_w << MI_SIZE_LOG2;
   const int bh = xd->n8_h << MI_SIZE_LOG2;
   int32_t *mask_buf = x->mask_buf;
   int32_t *wsrc_buf = x->wsrc_buf;
-  const int wsrc_stride = bw;
-  const int mask_stride = bw;
+
   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
 #if CONFIG_HIGHBITDEPTH
   const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
@@ -12200,86 +12479,20 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
   assert(xd->plane[0].subsampling_y == 0);
 
   av1_zero_array(wsrc_buf, bw * bh);
-  for (i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
+  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
 
   // handle above row
   if (xd->up_available) {
     const int overlap =
-        AOMMIN(block_size_high[bsize] >> 1, block_size_high[BLOCK_64X64] >> 1);
-    const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
-    const int mi_row_offset = -1;
-    const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
-    const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
-    int neighbor_count = 0;
-
-    assert(miw > 0);
-
-    i = 0;
-    do {  // for each mi in the above row
-      const int mi_col_offset = i;
-      const MB_MODE_INFO *above_mbmi =
-          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-#if CONFIG_CHROMA_SUB8X8
-      if (above_mbmi->sb_type < BLOCK_8X8)
-        above_mbmi =
-            &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
-#endif
-      const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
-      const int above_step =
-          AOMMIN(mi_size_wide[a_bsize], mi_size_wide[BLOCK_64X64]);
-      const int mi_step = AOMMIN(xd->n8_w, above_step);
-      const int neighbor_bw = mi_step * MI_SIZE;
-
-      if (is_neighbor_overlappable(above_mbmi)) {
-        if (!CONFIG_CB4X4 && (a_bsize == BLOCK_4X4 || a_bsize == BLOCK_4X8))
-          neighbor_count += 2;
-        else
-          neighbor_count++;
-        if (neighbor_count > neighbor_limit) break;
-
-        const int tmp_stride = above_stride;
-        int32_t *wsrc = wsrc_buf + (i * MI_SIZE);
-        int32_t *mask = mask_buf + (i * MI_SIZE);
-
-        if (!is_hbd) {
-          const uint8_t *tmp = above;
-
-          for (row = 0; row < overlap; ++row) {
-            const uint8_t m0 = mask1d[row];
-            const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-            for (col = 0; col < neighbor_bw; ++col) {
-              wsrc[col] = m1 * tmp[col];
-              mask[col] = m0;
-            }
-            wsrc += wsrc_stride;
-            mask += mask_stride;
-            tmp += tmp_stride;
-          }
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          const uint16_t *tmp = CONVERT_TO_SHORTPTR(above);
-
-          for (row = 0; row < overlap; ++row) {
-            const uint8_t m0 = mask1d[row];
-            const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-            for (col = 0; col < neighbor_bw; ++col) {
-              wsrc[col] = m1 * tmp[col];
-              mask[col] = m0;
-            }
-            wsrc += wsrc_stride;
-            mask += mask_stride;
-            tmp += tmp_stride;
-          }
-#endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
-
-      above += neighbor_bw;
-      i += mi_step;
-    } while (i < miw);
+        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
+    struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
+                                                   overlap };
+    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
+                                  max_neighbor_obmc[b_width_log2_lookup[bsize]],
+                                  calc_target_weighted_pred_above, &ctxt);
   }
 
-  for (i = 0; i < bw * bh; ++i) {
+  for (int i = 0; i < bw * bh; ++i) {
     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
   }
@@ -12287,102 +12500,33 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
   // handle left column
   if (xd->left_available) {
     const int overlap =
-        AOMMIN(block_size_wide[bsize] >> 1, block_size_wide[BLOCK_64X64] >> 1);
-    const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
-    const int mi_col_offset = -1;
-    const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
-    const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
-    int neighbor_count = 0;
-
-    assert(mih > 0);
-
-    i = 0;
-    do {  // for each mi in the left column
-      const int mi_row_offset = i;
-      MB_MODE_INFO *left_mbmi =
-          &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
-
-#if CONFIG_CHROMA_SUB8X8
-      if (left_mbmi->sb_type < BLOCK_8X8)
-        left_mbmi =
-            &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
-#endif
-      const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
-      const int left_step =
-          AOMMIN(mi_size_high[l_bsize], mi_size_high[BLOCK_64X64]);
-      const int mi_step = AOMMIN(xd->n8_h, left_step);
-      const int neighbor_bh = mi_step * MI_SIZE;
-
-      if (is_neighbor_overlappable(left_mbmi)) {
-        if (!CONFIG_CB4X4 && (l_bsize == BLOCK_4X4 || l_bsize == BLOCK_8X4))
-          neighbor_count += 2;
-        else
-          neighbor_count++;
-        if (neighbor_count > neighbor_limit) break;
-
-        const int tmp_stride = left_stride;
-        int32_t *wsrc = wsrc_buf + (i * MI_SIZE * wsrc_stride);
-        int32_t *mask = mask_buf + (i * MI_SIZE * mask_stride);
-
-        if (!is_hbd) {
-          const uint8_t *tmp = left;
-
-          for (row = 0; row < neighbor_bh; ++row) {
-            for (col = 0; col < overlap; ++col) {
-              const uint8_t m0 = mask1d[col];
-              const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-              wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
-                          (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
-              mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
-            }
-            wsrc += wsrc_stride;
-            mask += mask_stride;
-            tmp += tmp_stride;
-          }
-#if CONFIG_HIGHBITDEPTH
-        } else {
-          const uint16_t *tmp = CONVERT_TO_SHORTPTR(left);
-
-          for (row = 0; row < neighbor_bh; ++row) {
-            for (col = 0; col < overlap; ++col) {
-              const uint8_t m0 = mask1d[col];
-              const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
-              wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
-                          (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
-              mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
-            }
-            wsrc += wsrc_stride;
-            mask += mask_stride;
-            tmp += tmp_stride;
-          }
-#endif  // CONFIG_HIGHBITDEPTH
-        }
-      }
-
-      left += neighbor_bh * left_stride;
-      i += mi_step;
-    } while (i < mih);
+        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
+    struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
+                                                   overlap };
+    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
+                                 max_neighbor_obmc[b_height_log2_lookup[bsize]],
+                                 calc_target_weighted_pred_left, &ctxt);
   }
 
   if (!is_hbd) {
     const uint8_t *src = x->plane[0].src.buf;
 
-    for (row = 0; row < bh; ++row) {
-      for (col = 0; col < bw; ++col) {
+    for (int row = 0; row < bh; ++row) {
+      for (int col = 0; col < bw; ++col) {
         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
       }
-      wsrc_buf += wsrc_stride;
+      wsrc_buf += bw;
       src += x->plane[0].src.stride;
     }
 #if CONFIG_HIGHBITDEPTH
   } else {
     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
 
-    for (row = 0; row < bh; ++row) {
-      for (col = 0; col < bw; ++col) {
+    for (int row = 0; row < bh; ++row) {
+      for (int col = 0; col < bw; ++col) {
         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
       }
-      wsrc_buf += wsrc_stride;
+      wsrc_buf += bw;
       src += x->plane[0].src.stride;
     }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -12508,8 +12652,9 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
   }
 
   if (rd_causal >
-      RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate +
-                            av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
+      RDCOST(x->rdmult,
+             rd_stats_y.rate + rd_stats_uv.rate +
+                 av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
              (rd_stats_y.dist + rd_stats_uv.dist))) {
     x->skip = skip_blk;
   } else {
@@ -12518,4 +12663,328 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
   }
 }
 #endif  // CONFIG_NCOBMC
+
+int64_t get_prediction_rd_cost(const struct AV1_COMP *cpi, struct macroblock *x,
+                               int mi_row, int mi_col, int *skip_blk,
+                               MB_MODE_INFO *backup_mbmi) {
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  BLOCK_SIZE bsize = mbmi->sb_type;
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  const MOTION_MODE motion_allowed = motion_mode_allowed(
+#if CONFIG_GLOBAL_MOTION
+      0, xd->global_motion,
+#endif  // CONFIG_GLOBAL_MOTION
+#if CONFIG_WARPED_MOTION
+      xd,
+#endif
+      xd->mi[0]);
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  RD_STATS rd_stats_y, rd_stats_uv;
+  int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
+  int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
+  int64_t this_rd;
+  int ref;
+
+#if CONFIG_CB4X4
+  x->skip_chroma_rd =
+      !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
+                           xd->plane[1].subsampling_y);
+#endif
+
+  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+  for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+    YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
+    assert(cfg != NULL);
+    av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
+                         &xd->block_refs[ref]->sf);
+  }
+  av1_setup_dst_planes(x->e_mbd.plane, bsize,
+                       get_frame_new_buffer(&cpi->common), mi_row, mi_col);
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT)
+#endif
+    av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
+
+#if CONFIG_MOTION_VAR
+  if (mbmi->motion_mode == OBMC_CAUSAL) {
+#if CONFIG_NCOBMC
+    av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
+#else
+    av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
+#endif
+  }
+#endif  // CONFIG_MOTION_VAR
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT)
+    for (int plane = 0; plane < MAX_MB_PLANE; ++plane)
+      get_pred_from_intrpl_buf(xd, mi_row, mi_col, bsize, plane);
+#endif
+  av1_subtract_plane(x, bsize, 0);
+
+#if CONFIG_VAR_TX
+  if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+    select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+  } else {
+    int idx, idy;
+    super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+    for (idy = 0; idy < xd->n8_h; ++idy)
+      for (idx = 0; idx < xd->n8_w; ++idx)
+        mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+    memset(x->blk_skip[0], rd_stats_y.skip,
+           sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+  }
+  inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#else
+  super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+  super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+#endif
+  assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
+
+  if (rd_stats_y.skip && rd_stats_uv.skip) {
+    rd_stats_y.rate = rate_skip1;
+    rd_stats_uv.rate = 0;
+    rd_stats_y.dist = rd_stats_y.sse;
+    rd_stats_uv.dist = rd_stats_uv.sse;
+    *skip_blk = 1;
+  } else if (RDCOST(x->rdmult,
+                    (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
+                    (rd_stats_y.dist + rd_stats_uv.dist)) >
+             RDCOST(x->rdmult, rate_skip1,
+                    (rd_stats_y.sse + rd_stats_uv.sse))) {
+    rd_stats_y.rate = rate_skip1;
+    rd_stats_uv.rate = 0;
+    rd_stats_y.dist = rd_stats_y.sse;
+    rd_stats_uv.dist = rd_stats_uv.sse;
+    *skip_blk = 1;
+  } else {
+    rd_stats_y.rate += rate_skip0;
+    *skip_blk = 0;
+  }
+
+  if (backup_mbmi) *backup_mbmi = *mbmi;
+
+  this_rd = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate),
+                   (rd_stats_y.dist + rd_stats_uv.dist));
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  if (motion_allowed == NCOBMC_ADAPT_WEIGHT) {
+    assert(mbmi->motion_mode <= NCOBMC_ADAPT_WEIGHT);
+    this_rd +=
+        RDCOST(x->rdmult, x->motion_mode_cost2[bsize][mbmi->motion_mode], 0);
+  } else if (motion_allowed == OBMC_CAUSAL) {
+    assert(mbmi->motion_mode <= OBMC_CAUSAL);
+    this_rd +=
+        RDCOST(x->rdmult, x->motion_mode_cost1[bsize][mbmi->motion_mode], 0);
+  } else {
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+    this_rd +=
+        RDCOST(x->rdmult, x->motion_mode_cost[bsize][mbmi->motion_mode], 0);
+#if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  }
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
+  return this_rd;
+}
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+void av1_check_ncobmc_adapt_weight_rd(const struct AV1_COMP *cpi,
+                                      struct macroblock *x, int mi_row,
+                                      int mi_col) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  BLOCK_SIZE bsize = mbmi->sb_type;
+#if CONFIG_VAR_TX
+  const int n4 = bsize_to_num_blk(bsize);
+  uint8_t st_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+  uint8_t obmc_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+  uint8_t ncobmc_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+#endif
+  MB_MODE_INFO st_mbmi, obmc_mbmi, ncobmc_mbmi;
+  int st_skip, obmc_skip, ncobmc_skip;
+  int64_t st_rd, obmc_rd, ncobmc_rd;
+#if CONFIG_WARPED_MOTION
+  const AV1_COMMON *const cm = &cpi->common;
+  const int is_warp_motion = mbmi->motion_mode == WARPED_CAUSAL;
+  const int rs = RDCOST(x->rdmult, av1_get_switchable_rate(cm, x, xd), 0);
+  MB_MODE_INFO warp_mbmi;
+  int64_t warp_rd;
+  int warp_skip;
+#endif
+
+  // Recompute the rd for the motion mode decided in rd loop
+  mbmi->motion_mode = SIMPLE_TRANSLATION;
+  st_rd = get_prediction_rd_cost(cpi, x, mi_row, mi_col, &st_skip, &st_mbmi);
+#if CONFIG_WARPED_MOTION
+  st_rd += rs;
+#endif
+#if CONFIG_VAR_TX
+  memcpy(st_blk_skip, x->blk_skip[0], sizeof(st_blk_skip[0]) * n4);
+#endif
+
+  mbmi->motion_mode = OBMC_CAUSAL;
+  obmc_rd =
+      get_prediction_rd_cost(cpi, x, mi_row, mi_col, &obmc_skip, &obmc_mbmi);
+#if CONFIG_WARPED_MOTION
+  obmc_rd += rs;
+#endif
+#if CONFIG_VAR_TX
+  memcpy(obmc_blk_skip, x->blk_skip[0], sizeof(obmc_blk_skip[0]) * n4);
+#endif
+
+  // Compute the rd cost for ncobmc adaptive weight
+  mbmi->motion_mode = NCOBMC_ADAPT_WEIGHT;
+  ncobmc_rd = get_prediction_rd_cost(cpi, x, mi_row, mi_col, &ncobmc_skip,
+                                     &ncobmc_mbmi);
+#if CONFIG_WARPED_MOTION
+  ncobmc_rd += rs;
+#endif
+  // Calculate the ncobmc mode costs
+  {
+    ADAPT_OVERLAP_BLOCK aob = adapt_overlap_block_lookup[bsize];
+    ncobmc_rd +=
+        RDCOST(x->rdmult, x->ncobmc_mode_cost[aob][mbmi->ncobmc_mode[0]], 0);
+    if (mi_size_wide[bsize] != mi_size_high[bsize])
+      ncobmc_rd +=
+          RDCOST(x->rdmult, x->ncobmc_mode_cost[aob][mbmi->ncobmc_mode[1]], 0);
+  }
+#if CONFIG_VAR_TX
+  memcpy(ncobmc_blk_skip, x->blk_skip[0], sizeof(ncobmc_blk_skip[0]) * n4);
+#endif
+
+#if CONFIG_WARPED_MOTION
+  if (is_warp_motion) {
+    mbmi->motion_mode = WARPED_CAUSAL;
+    warp_rd =
+        get_prediction_rd_cost(cpi, x, mi_row, mi_col, &warp_skip, &warp_mbmi);
+  } else {
+    warp_rd = INT64_MAX;
+  }
+#endif
+
+#if CONFIG_WARPED_MOTION
+  if (AOMMIN(ncobmc_rd, warp_rd) < AOMMIN(st_rd, obmc_rd)) {
+    if (ncobmc_rd < warp_rd) {
+      x->skip = ncobmc_skip;
+      *mbmi = ncobmc_mbmi;
+#if CONFIG_VAR_TX
+      memcpy(x->blk_skip[0], ncobmc_blk_skip, sizeof(ncobmc_blk_skip[0]) * n4);
+#endif
+    } else {
+      x->skip = warp_skip;
+      *mbmi = warp_mbmi;
+    }
+#else
+  if (ncobmc_rd < AOMMIN(st_rd, obmc_rd)) {
+    x->skip = ncobmc_skip;
+    *mbmi = ncobmc_mbmi;
+#if CONFIG_VAR_TX
+    memcpy(x->blk_skip[0], ncobmc_blk_skip, sizeof(ncobmc_blk_skip[0]) * n4);
+#endif
+#endif  // CONFIG_WARPED_MOTION
+  } else {
+    if (obmc_rd < st_rd) {
+      *mbmi = obmc_mbmi;
+      x->skip = obmc_skip;
+#if CONFIG_VAR_TX
+      memcpy(x->blk_skip[0], obmc_blk_skip, sizeof(obmc_blk_skip[0]) * n4);
+#endif
+    } else {
+      *mbmi = st_mbmi;
+      x->skip = st_skip;
+#if CONFIG_VAR_TX
+      memcpy(x->blk_skip[0], st_blk_skip, sizeof(st_blk_skip[0]) * n4);
+#endif
+    }
+  }
+}
+
+int64_t get_ncobmc_error(MACROBLOCKD *xd, int pxl_row, int pxl_col,
+                         BLOCK_SIZE bsize, int plane, struct buf_2d *src) {
+  const int wide = AOMMIN(mi_size_wide[bsize] * MI_SIZE,
+                          (xd->sb_mi_bd.mi_col_end + 1) * MI_SIZE - pxl_col);
+  const int high = AOMMIN(mi_size_high[bsize] * MI_SIZE,
+                          (xd->sb_mi_bd.mi_row_end + 1) * MI_SIZE - pxl_row);
+  const int ss_x = xd->plane[plane].subsampling_x;
+  const int ss_y = xd->plane[plane].subsampling_y;
+  int row_offset = (pxl_row - xd->sb_mi_bd.mi_row_begin * MI_SIZE) >> ss_y;
+  int col_offset = (pxl_col - xd->sb_mi_bd.mi_col_begin * MI_SIZE) >> ss_x;
+  int dst_stride = xd->ncobmc_pred_buf_stride[plane];
+  int dst_offset = row_offset * dst_stride + col_offset;
+  int src_stride = src->stride;
+
+  int r, c;
+  int64_t tmp, error = 0;
+
+  for (r = 0; r < (high >> ss_y); ++r) {
+    for (c = 0; c < (wide >> ss_x); ++c) {
+      tmp = xd->ncobmc_pred_buf[plane][r * dst_stride + c + dst_offset] -
+            src->buf[r * src_stride + c];
+      error += tmp * tmp;
+    }
+  }
+  return error;
+}
+
+int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                    MACROBLOCKD *xd, int mi_row, int mi_col, int bsize) {
+  const AV1_COMMON *const cm = &cpi->common;
+  uint8_t *pred_buf[4][MAX_MB_PLANE];
+
+  // TODO(weitinglin): stride size needs to be fixed for high-bit depth
+  int pred_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+
+  // target block in pxl
+  int pxl_row = mi_row << MI_SIZE_LOG2;
+  int pxl_col = mi_col << MI_SIZE_LOG2;
+  int64_t error, best_error = INT64_MAX;
+  int plane, tmp_mode, best_mode = 0;
+#if CONFIG_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    int len = sizeof(uint16_t);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE,
+                            len);
+    ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE,
+                            len);
+  } else {
+#endif  // CONFIG_HIGHBITDEPTH
+    ASSIGN_ALIGNED_PTRS(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE);
+    ASSIGN_ALIGNED_PTRS(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE);
+#if CONFIG_HIGHBITDEPTH
+  }
+#endif
+
+  av1_get_ext_blk_preds(cm, xd, bsize, mi_row, mi_col, pred_buf, pred_stride);
+  av1_get_ori_blk_pred(cm, xd, bsize, mi_row, mi_col, pred_buf[3], pred_stride);
+
+  for (tmp_mode = 0; tmp_mode < MAX_NCOBMC_MODES; ++tmp_mode) {
+    error = 0;
+    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+      build_ncobmc_intrpl_pred(cm, xd, plane, pxl_row, pxl_col, bsize, pred_buf,
+                               pred_stride, tmp_mode);
+      error += get_ncobmc_error(xd, pxl_row, pxl_col, bsize, plane,
+                                &x->plane[plane].src);
+    }
+    if (error < best_error) {
+      best_mode = tmp_mode;
+      best_error = error;
+    }
+  }
+
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    build_ncobmc_intrpl_pred(cm, xd, plane, pxl_row, pxl_col, bsize, pred_buf,
+                             pred_stride, best_mode);
+  }
+
+  return best_mode;
+}
+
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR
diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h
index 43a6a3794..dbc7527fb 100644
--- a/third_party/aom/av1/encoder/rdopt.h
+++ b/third_party/aom/av1/encoder/rdopt.h
@@ -57,7 +57,6 @@ typedef enum OUTPUT_STATUS {
   OUTPUT_HAS_DECODED_PIXELS
 } OUTPUT_STATUS;
 
-#if CONFIG_PALETTE || CONFIG_INTRABC
 // Returns the number of colors in 'src'.
 int av1_count_colors(const uint8_t *src, int stride, int rows, int cols);
 #if CONFIG_HIGHBITDEPTH
@@ -65,7 +64,6 @@ int av1_count_colors(const uint8_t *src, int stride, int rows, int cols);
 int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
                             int bit_depth);
 #endif  // CONFIG_HIGHBITDEPTH
-#endif  // CONFIG_PALETTE || CONFIG_INTRABC
 
 void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                     BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
@@ -73,7 +71,7 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                     OUTPUT_STATUS output_status);
 
 #if CONFIG_DIST_8X8
-int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
+int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
                      const uint8_t *src, int src_stride, const uint8_t *dst,
                      int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
                      int bsh, int visible_w, int visible_h, int qindex);
@@ -142,8 +140,21 @@ void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
 }  // extern "C"
 #endif
 
-int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
-                     BLOCK_SIZE bsize, int plane, TX_SIZE tx_size,
-                     TX_TYPE tx_type);
+int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
+                     const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+                     TX_SIZE tx_size, TX_TYPE tx_type);
+
+int64_t get_prediction_rd_cost(const struct AV1_COMP *cpi, struct macroblock *x,
+                               int mi_row, int mi_col, int *skip_blk,
+                               MB_MODE_INFO *backup_mbmi);
+
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+void av1_check_ncobmc_adapt_weight_rd(const struct AV1_COMP *cpi,
+                                      struct macroblock *x, int mi_row,
+                                      int mi_col);
+int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                    MACROBLOCKD *xd, int mi_row, int mi_col, int bsize);
+
+#endif
 
 #endif  // AV1_ENCODER_RDOPT_H_
diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c
index b61df43fa..4f01fbba4 100644
--- a/third_party/aom/av1/encoder/segmentation.c
+++ b/third_party/aom/av1/encoder/segmentation.c
@@ -32,7 +32,7 @@ void av1_disable_segmentation(struct segmentation *seg) {
   seg->update_data = 0;
 }
 
-void av1_set_segment_data(struct segmentation *seg, signed char *feature_data,
+void av1_set_segment_data(struct segmentation *seg, int8_t *feature_data,
                           unsigned char abs_delta) {
   seg->abs_delta = abs_delta;
 
@@ -167,76 +167,78 @@ static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
   const int bs = mi_size_wide[bsize], hbs = bs / 2;
 #if CONFIG_EXT_PARTITION_TYPES
   PARTITION_TYPE partition;
+#if CONFIG_EXT_PARTITION_TYPES_AB
+  const int qbs = bs / 4;
+#endif  // CONFIG_EXT_PARTITION_TYPES_AB
 #else
   int bw, bh;
 #endif  // CONFIG_EXT_PARTITION_TYPES
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
 
+#define CSEGS(cs_bw, cs_bh, cs_rowoff, cs_coloff)                              \
+  count_segs(cm, xd, tile, mi + mis * (cs_rowoff) + (cs_coloff),               \
+             no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, \
+             (cs_bw), (cs_bh), mi_row + (cs_rowoff), mi_col + (cs_coloff));
+
 #if CONFIG_EXT_PARTITION_TYPES
   if (bsize == BLOCK_8X8)
     partition = PARTITION_NONE;
   else
     partition = get_partition(cm, mi_row, mi_col, bsize);
   switch (partition) {
-    case PARTITION_NONE:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, bs, bs, mi_row, mi_col);
-      break;
+    case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break;
     case PARTITION_HORZ:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
-                 mi_row + hbs, mi_col);
+      CSEGS(bs, hbs, 0, 0);
+      CSEGS(bs, hbs, hbs, 0);
       break;
     case PARTITION_VERT:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
-                 mi_col + hbs);
+      CSEGS(hbs, bs, 0, 0);
+      CSEGS(hbs, bs, 0, hbs);
       break;
+#if CONFIG_EXT_PARTITION_TYPES_AB
     case PARTITION_HORZ_A:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row, mi_col + hbs);
-      count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
-                 mi_row + hbs, mi_col);
+      CSEGS(bs, qbs, 0, 0);
+      CSEGS(bs, qbs, qbs, 0);
+      CSEGS(bs, hbs, hbs, 0);
       break;
     case PARTITION_HORZ_B:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row + hbs, mi_col);
-      count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row + hbs, mi_col + hbs);
+      CSEGS(bs, hbs, 0, 0);
+      CSEGS(bs, qbs, hbs, 0);
+      if (mi_row + 3 * qbs < cm->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0);
       break;
     case PARTITION_VERT_A:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row + hbs, mi_col);
-      count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
-                 mi_col + hbs);
+      CSEGS(qbs, bs, 0, 0);
+      CSEGS(qbs, bs, 0, qbs);
+      CSEGS(hbs, bs, 0, hbs);
       break;
     case PARTITION_VERT_B:
-      count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-                 t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-      count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row, mi_col + hbs);
-      count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
-                 temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
-                 mi_row + hbs, mi_col + hbs);
+      CSEGS(hbs, bs, 0, 0);
+      CSEGS(qbs, bs, 0, hbs);
+      if (mi_col + 3 * qbs < cm->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs);
+      break;
+#else
+    case PARTITION_HORZ_A:
+      CSEGS(hbs, hbs, 0, 0);
+      CSEGS(hbs, hbs, 0, hbs);
+      CSEGS(bs, hbs, hbs, 0);
+      break;
+    case PARTITION_HORZ_B:
+      CSEGS(bs, hbs, 0, 0);
+      CSEGS(hbs, hbs, hbs, 0);
+      CSEGS(hbs, hbs, hbs, hbs);
       break;
+    case PARTITION_VERT_A:
+      CSEGS(hbs, hbs, 0, 0);
+      CSEGS(hbs, hbs, hbs, 0);
+      CSEGS(hbs, bs, 0, hbs);
+      break;
+    case PARTITION_VERT_B:
+      CSEGS(hbs, bs, 0, 0);
+      CSEGS(hbs, hbs, 0, hbs);
+      CSEGS(hbs, hbs, hbs, hbs);
+      break;
+#endif
     case PARTITION_SPLIT: {
       const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
       int n;
@@ -260,20 +262,13 @@ static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
   bh = mi_size_high[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
-    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-               t_unpred_seg_counts, bs, bs, mi_row, mi_col);
+    CSEGS(bs, bs, 0, 0);
   } else if (bw == bs && bh < bs) {
-    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-               t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-    count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
-               temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
-               mi_row + hbs, mi_col);
+    CSEGS(bs, hbs, 0, 0);
+    CSEGS(bs, hbs, hbs, 0);
   } else if (bw < bs && bh == bs) {
-    count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
-               t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-    count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
-               temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
-               mi_col + hbs);
+    CSEGS(hbs, bs, 0, 0);
+    CSEGS(hbs, bs, 0, hbs);
   } else {
     const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
     int n;
@@ -290,6 +285,8 @@ static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
     }
   }
 #endif  // CONFIG_EXT_PARTITION_TYPES
+
+#undef CSEGS
 }
 
 void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
diff --git a/third_party/aom/av1/encoder/segmentation.h b/third_party/aom/av1/encoder/segmentation.h
index c1491ca2a..1d24ed1d1 100644
--- a/third_party/aom/av1/encoder/segmentation.h
+++ b/third_party/aom/av1/encoder/segmentation.h
@@ -37,7 +37,7 @@ void av1_clear_segdata(struct segmentation *seg, int segment_id,
 //
 // abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
 // the absolute values given).
-void av1_set_segment_data(struct segmentation *seg, signed char *feature_data,
+void av1_set_segment_data(struct segmentation *seg, int8_t *feature_data,
                           unsigned char abs_delta);
 
 void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd);
diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c
index eeab33a95..5608d031e 100644
--- a/third_party/aom/av1/encoder/speed_features.c
+++ b/third_party/aom/av1/encoder/speed_features.c
@@ -172,20 +172,20 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
 #if CONFIG_TX64X64
     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
 #if CONFIG_CFL
-    sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
 #else
     sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC_H_V;
 #endif  // CONFIG_CFL
 #endif  // CONFIG_TX64X64
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
 #if CONFIG_CFL
-    sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
 #else
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
 #endif
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
 #if CONFIG_CFL
-    sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
 #else
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
 #endif
@@ -196,10 +196,8 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
     // Use transform domain distortion.
     // Note var-tx expt always uses pixel domain distortion.
     sf->use_transform_domain_distortion = 1;
-#if CONFIG_EXT_INTER
     sf->disable_wedge_search_var_thresh = 100;
     sf->fast_wedge_sign_estimate = 1;
-#endif  // CONFIG_EXT_INTER
   }
 
   if (speed >= 3) {
@@ -240,14 +238,14 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
 #if CONFIG_TX64X64
     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
 #if CONFIG_CFL
-    sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
+    sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_CFL;
 #else
     sf->intra_uv_mode_mask[TX_64X64] = INTRA_DC;
 #endif  // CONFIG_CFL
 #endif  // CONFIG_TX64X64
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
 #if CONFIG_CFL
-    sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
+    sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_CFL;
 #else
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
 #endif  // CONFIG_CFL
@@ -276,7 +274,7 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
     for (i = 0; i < TX_SIZES; ++i) {
       sf->intra_y_mode_mask[i] = INTRA_DC;
 #if CONFIG_CFL
-      sf->intra_uv_mode_mask[i] = UV_INTRA_DC;
+      sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
 #else
       sf->intra_uv_mode_mask[i] = INTRA_DC;
 #endif  // CONFIG_CFL
@@ -404,6 +402,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
   sf->alt_ref_search_fp = 0;
   sf->partition_search_type = SEARCH_PARTITION;
   sf->tx_type_search.prune_mode = NO_PRUNE;
+  sf->tx_type_search.use_skip_flag_prediction = 1;
   sf->tx_type_search.fast_intra_tx_type_search = 0;
   sf->tx_type_search.fast_inter_tx_type_search = 0;
   sf->less_rectangular_check = 0;
@@ -422,10 +421,8 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
   sf->adaptive_interp_filter_search = 0;
   sf->allow_partition_search_skip = 0;
   sf->use_upsampled_references = 1;
-#if CONFIG_EXT_INTER
   sf->disable_wedge_search_var_thresh = 0;
   sf->fast_wedge_sign_estimate = 0;
-#endif  // CONFIG_EXT_INTER
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
index 2c89f4e5c..edd79cd16 100644
--- a/third_party/aom/av1/encoder/speed_features.h
+++ b/third_party/aom/av1/encoder/speed_features.h
@@ -21,31 +21,34 @@ extern "C" {
 enum {
   INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
               (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) |
-              (1 << D207_PRED) | (1 << D63_PRED) |
-#if CONFIG_ALT_INTRA
-              (1 << SMOOTH_PRED) |
+              (1 << D207_PRED) | (1 << D63_PRED) | (1 << SMOOTH_PRED) |
 #if CONFIG_SMOOTH_HV
               (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) |
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
               (1 << TM_PRED),
 #if CONFIG_CFL
   UV_INTRA_ALL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
                  (1 << UV_D45_PRED) | (1 << UV_D135_PRED) |
                  (1 << UV_D117_PRED) | (1 << UV_D153_PRED) |
                  (1 << UV_D207_PRED) | (1 << UV_D63_PRED) |
-#if CONFIG_ALT_INTRA
                  (1 << UV_SMOOTH_PRED) |
 #if CONFIG_SMOOTH_HV
                  (1 << UV_SMOOTH_V_PRED) | (1 << UV_SMOOTH_H_PRED) |
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                 (1 << UV_TM_PRED),
+                 (1 << UV_TM_PRED) | (1 << UV_CFL_PRED),
   UV_INTRA_DC = (1 << UV_DC_PRED),
+  UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED),
   UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_TM_PRED),
+  UV_INTRA_DC_TM_CFL =
+      (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | (1 << UV_CFL_PRED),
   UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
+  UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) |
+                        (1 << UV_H_PRED) | (1 << UV_CFL_PRED),
   UV_INTRA_DC_TM_H_V = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) |
                        (1 << UV_V_PRED) | (1 << UV_H_PRED),
+  UV_INTRA_DC_TM_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) |
+                           (1 << UV_V_PRED) | (1 << UV_H_PRED) |
+                           (1 << UV_CFL_PRED),
 #endif  // CONFIG_CFL
   INTRA_DC = (1 << DC_PRED),
   INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
@@ -54,7 +57,6 @@ enum {
       (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED)
 };
 
-#if CONFIG_EXT_INTER
 enum {
 #if CONFIG_COMPOUND_SINGLEREF
 // TODO(zoeliu): To further consider following single ref comp modes:
@@ -90,17 +92,6 @@ enum {
                             (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
                             (1 << NEAR_NEARMV),
 };
-#else   // !CONFIG_EXT_INTER
-enum {
-  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
-  INTER_NEAREST = (1 << NEARESTMV),
-  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
-  INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),
-  INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),
-  INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
-  INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
-};
-#endif  // CONFIG_EXT_INTER
 
 enum {
   DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
@@ -209,6 +200,10 @@ typedef struct {
   TX_TYPE_PRUNE_MODE prune_mode;
   int fast_intra_tx_type_search;
   int fast_inter_tx_type_search;
+
+  // Use a skip flag prediction model to detect blocks with skip = 1 early
+  // and avoid doing full TX type search for such blocks.
+  int use_skip_flag_prediction;
 } TX_TYPE_SEARCH;
 
 typedef enum {
@@ -409,13 +404,11 @@ typedef struct SPEED_FEATURES {
   // Choose a very large value (UINT_MAX) to use 8-tap always
   unsigned int disable_filter_search_var_thresh;
 
-#if CONFIG_EXT_INTER
   // A source variance threshold below which wedge search is disabled
   unsigned int disable_wedge_search_var_thresh;
 
   // Whether fast wedge sign estimate is used
   int fast_wedge_sign_estimate;
-#endif  // CONFIG_EXT_INTER
 
   // These bit masks allow you to enable or disable intra modes for each
   // transform size separately.
diff --git a/third_party/aom/av1/encoder/subexp.c b/third_party/aom/av1/encoder/subexp.c
index 6a8ba12d8..dc96d712a 100644
--- a/third_party/aom/av1/encoder/subexp.c
+++ b/third_party/aom/av1/encoder/subexp.c
@@ -138,47 +138,6 @@ int av1_prob_diff_update_savings_search(const unsigned int *ct, aom_prob oldp,
   return bestsavings;
 }
 
-int av1_prob_diff_update_savings_search_model(const unsigned int *ct,
-                                              const aom_prob oldp,
-                                              aom_prob *bestp, aom_prob upd,
-                                              int stepsize, int probwt) {
-  int i, old_b, new_b, update_b, savings, bestsavings;
-  int newp;
-  const int step_sign = *bestp > oldp ? -1 : 1;
-  const int step = stepsize * step_sign;
-  const int upd_cost = av1_cost_one(upd) - av1_cost_zero(upd);
-  const aom_prob *newplist, *oldplist;
-  aom_prob bestnewp;
-  oldplist = av1_pareto8_full[oldp - 1];
-  old_b = cost_branch256(ct + 2 * PIVOT_NODE, oldp);
-  for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i)
-    old_b += cost_branch256(ct + 2 * i, oldplist[i - UNCONSTRAINED_NODES]);
-
-  bestsavings = 0;
-  bestnewp = oldp;
-
-  assert(stepsize > 0);
-
-  if (old_b > upd_cost + (MIN_DELP_BITS << AV1_PROB_COST_SHIFT)) {
-    for (newp = *bestp; (newp - oldp) * step_sign < 0; newp += step) {
-      if (newp < 1 || newp > 255) continue;
-      newplist = av1_pareto8_full[newp - 1];
-      new_b = cost_branch256(ct + 2 * PIVOT_NODE, newp);
-      for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i)
-        new_b += cost_branch256(ct + 2 * i, newplist[i - UNCONSTRAINED_NODES]);
-      update_b = prob_diff_update_cost(newp, oldp) + upd_cost;
-      savings = old_b - new_b - update_b * probwt;
-      if (savings > bestsavings) {
-        bestsavings = savings;
-        bestnewp = newp;
-      }
-    }
-  }
-
-  *bestp = bestnewp;
-  return bestsavings;
-}
-
 void av1_cond_prob_diff_update(aom_writer *w, aom_prob *oldp,
                                const unsigned int ct[2], int probwt) {
   const aom_prob upd = DIFF_UPDATE_PROB;
diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c
index 604647922..daa647689 100644
--- a/third_party/aom/av1/encoder/temporal_filter.c
+++ b/third_party/aom/av1/encoder/temporal_filter.c
@@ -44,18 +44,13 @@ static void temporal_filter_predictors_mb_c(
   ConvolveParams conv_params = get_conv_params(which_mv, which_mv, 0);
 
 #if USE_TEMPORALFILTER_12TAP
-#if CONFIG_DUAL_FILTER
-  const InterpFilter interp_filter[4] = { TEMPORALFILTER_12TAP,
-                                          TEMPORALFILTER_12TAP,
-                                          TEMPORALFILTER_12TAP,
-                                          TEMPORALFILTER_12TAP };
-#else
-  const InterpFilter interp_filter = TEMPORALFILTER_12TAP;
-#endif
+  const InterpFilters interp_filters =
+      av1_broadcast_interp_filter(TEMPORALFILTER_12TAP);
   (void)xd;
 #else
-  const InterpFilter interp_filter = xd->mi[0]->mbmi.interp_filter;
+  const InterpFilters interp_filters = xd->mi[0]->mbmi.interp_filters;
 #endif  // USE_TEMPORALFILTER_12TAP
+
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
   WarpTypesAllowed warp_types;
   memset(&warp_types, 0, sizeof(WarpTypesAllowed));
@@ -72,7 +67,7 @@ static void temporal_filter_predictors_mb_c(
 #if CONFIG_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     av1_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale,
-                                     16, 16, which_mv, interp_filter,
+                                     16, 16, which_mv, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                      &warp_types, x, y,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -80,7 +75,7 @@ static void temporal_filter_predictors_mb_c(
 
     av1_highbd_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256],
                                      uv_block_width, &mv, scale, uv_block_width,
-                                     uv_block_height, which_mv, interp_filter,
+                                     uv_block_height, which_mv, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                      &warp_types, x, y,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -88,7 +83,7 @@ static void temporal_filter_predictors_mb_c(
 
     av1_highbd_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512],
                                      uv_block_width, &mv, scale, uv_block_width,
-                                     uv_block_height, which_mv, interp_filter,
+                                     uv_block_height, which_mv, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                      &warp_types, x, y,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -97,7 +92,7 @@ static void temporal_filter_predictors_mb_c(
   }
 #endif  // CONFIG_HIGHBITDEPTH
   av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
-                            &conv_params, interp_filter,
+                            &conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                             &warp_types, x, y, 0, 0,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -105,7 +100,7 @@ static void temporal_filter_predictors_mb_c(
 
   av1_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
                             &mv, scale, uv_block_width, uv_block_height,
-                            &conv_params, interp_filter,
+                            &conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                             &warp_types, x, y, 1, 0,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -113,7 +108,7 @@ static void temporal_filter_predictors_mb_c(
 
   av1_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
                             &mv, scale, uv_block_width, uv_block_height,
-                            &conv_params, interp_filter,
+                            &conv_params, interp_filters,
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                             &warp_types, x, y, 2, 0,
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
@@ -291,15 +286,30 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
 
   x->mv_limits = tmp_mv_limits;
 
-  // Ignore mv costing by sending NULL pointer instead of cost array
-  bestsme = cpi->find_fractional_mv_step(
-      x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
-      &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
-      cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
-#if CONFIG_EXT_INTER
-      NULL, 0, 0,
+// Ignore mv costing by sending NULL pointer instead of cost array
+#if CONFIG_AMVR
+  if (cpi->common.cur_frame_mv_precision_level == 1) {
+    const uint8_t *const src_address = x->plane[0].src.buf;
+    const int src_stride = x->plane[0].src.stride;
+    const uint8_t *const y = xd->plane[0].pre[0].buf;
+    const int y_stride = xd->plane[0].pre[0].stride;
+    const int offset = x->best_mv.as_mv.row * y_stride + x->best_mv.as_mv.col;
+
+    x->best_mv.as_mv.row *= 8;
+    x->best_mv.as_mv.col *= 8;
+
+    bestsme = cpi->fn_ptr[BLOCK_16X16].vf(y + offset, y_stride, src_address,
+                                          src_stride, &sse);
+  } else {
+#endif
+    bestsme = cpi->find_fractional_mv_step(
+        x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+        &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
+        cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
+        NULL, 0, 0, 0, 0, 0);
+#if CONFIG_AMVR
+  }
 #endif
-      0, 0, 0);
 
   x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv;
 
@@ -311,6 +321,9 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
 }
 
 static void temporal_filter_iterate_c(AV1_COMP *cpi,
+#if CONFIG_BGSPRITE
+                                      YV12_BUFFER_CONFIG *target,
+#endif  // CONFIG_BGSPRITE
                                       YV12_BUFFER_CONFIG **frames,
                                       int frame_count, int alt_ref_index,
                                       int strength,
@@ -452,9 +465,17 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
       if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         uint16_t *dst1_16;
         uint16_t *dst2_16;
+#if CONFIG_BGSPRITE
+        dst1 = target->y_buffer;
+#else
         dst1 = cpi->alt_ref_buffer.y_buffer;
+#endif  // CONFIG_BGSPRITE
         dst1_16 = CONVERT_TO_SHORTPTR(dst1);
+#if CONFIG_BGSPRITE
+        stride = target->y_stride;
+#else
         stride = cpi->alt_ref_buffer.y_stride;
+#endif  // CONFIG_BGSPRITE
         byte = mb_y_offset;
         for (i = 0, k = 0; i < 16; i++) {
           for (j = 0; j < 16; j++, k++) {
@@ -494,8 +515,13 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
         }
       } else {
 #endif  // CONFIG_HIGHBITDEPTH
-        dst1 = cpi->alt_ref_buffer.y_buffer;
-        stride = cpi->alt_ref_buffer.y_stride;
+#if CONFIG_BGSPRITE
+        dst1 = target->y_buffer;
+        stride = target->y_stride;
+#else
+      dst1 = cpi->alt_ref_buffer.y_buffer;
+      stride = cpi->alt_ref_buffer.y_stride;
+#endif  // CONFIG_BGSPRITE
         byte = mb_y_offset;
         for (i = 0, k = 0; i < 16; i++) {
           for (j = 0; j < 16; j++, k++) {
@@ -507,10 +533,15 @@ static void temporal_filter_iterate_c(AV1_COMP *cpi,
           }
           byte += stride - 16;
         }
-
-        dst1 = cpi->alt_ref_buffer.u_buffer;
-        dst2 = cpi->alt_ref_buffer.v_buffer;
-        stride = cpi->alt_ref_buffer.uv_stride;
+#if CONFIG_BGSPRITE
+        dst1 = target->u_buffer;
+        dst2 = target->v_buffer;
+        stride = target->uv_stride;
+#else
+      dst1 = cpi->alt_ref_buffer.u_buffer;
+      dst2 = cpi->alt_ref_buffer.v_buffer;
+      stride = cpi->alt_ref_buffer.uv_stride;
+#endif  // CONFIG_BGSPRITE
         byte = mb_uv_offset;
         for (i = 0, k = 256; i < mb_uv_height; i++) {
           for (j = 0; j < mb_uv_width; j++, k++) {
@@ -604,7 +635,7 @@ static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost,
 
 void av1_temporal_filter(AV1_COMP *cpi,
 #if CONFIG_BGSPRITE
-                         YV12_BUFFER_CONFIG *bg,
+                         YV12_BUFFER_CONFIG *bg, YV12_BUFFER_CONFIG *target,
 #endif  // CONFIG_BGSPRITE
                          int distance) {
   RATE_CONTROL *const rc = &cpi->rc;
@@ -618,7 +649,7 @@ void av1_temporal_filter(AV1_COMP *cpi,
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
 #if CONFIG_EXT_REFS
   const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
-#endif
+#endif  // CONFIG_EXT_REFS
 
   // Apply context specific adjustments to the arnr filter parameters.
   adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
@@ -627,19 +658,34 @@ void av1_temporal_filter(AV1_COMP *cpi,
 //                   case it is more beneficial to use non-zero strength
 //                   filtering.
 #if CONFIG_EXT_REFS
-  if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
+  if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
     strength = 0;
     frames_to_blur = 1;
   }
-#endif
 
-#if CONFIG_EXT_REFS
-  if (strength == 0 && frames_to_blur == 1) {
-    cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
-  } else {
-    cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 0;
+  int which_arf = gf_group->arf_update_idx[gf_group->index];
+
+#if USE_GF16_MULTI_LAYER
+  if (cpi->rc.baseline_gf_interval == 16) {
+    // Identify the index to the current ARF.
+    const int num_arfs_in_gf = cpi->num_extra_arfs + 1;
+    int arf_idx;
+    for (arf_idx = 0; arf_idx < num_arfs_in_gf; arf_idx++) {
+      if (gf_group->index == cpi->arf_pos_in_gf[arf_idx]) {
+        which_arf = arf_idx;
+        break;
+      }
+    }
+    assert(arf_idx < num_arfs_in_gf);
   }
-#endif
+#endif  // USE_GF16_MULTI_LAYER
+
+  // Set the temporal filtering status for the corresponding OVERLAY frame
+  if (strength == 0 && frames_to_blur == 1)
+    cpi->is_arf_filter_off[which_arf] = 1;
+  else
+    cpi->is_arf_filter_off[which_arf] = 0;
+#endif  // CONFIG_EXT_REFS
 
   frames_to_blur_backward = (frames_to_blur / 2);
   frames_to_blur_forward = ((frames_to_blur - 1) / 2);
@@ -678,6 +724,10 @@ void av1_temporal_filter(AV1_COMP *cpi,
 #endif  // CONFIG_HIGHBITDEPTH
   }
 
-  temporal_filter_iterate_c(cpi, frames, frames_to_blur,
-                            frames_to_blur_backward, strength, &sf);
+  temporal_filter_iterate_c(cpi,
+#if CONFIG_BGSPRITE
+                            target,
+#endif  // CONFIG_BGSPRITE
+                            frames, frames_to_blur, frames_to_blur_backward,
+                            strength, &sf);
 }
diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h
index ebb24703f..7dd9fad58 100644
--- a/third_party/aom/av1/encoder/temporal_filter.h
+++ b/third_party/aom/av1/encoder/temporal_filter.h
@@ -18,7 +18,7 @@ extern "C" {
 
 void av1_temporal_filter(AV1_COMP *cpi,
 #if CONFIG_BGSPRITE
-                         YV12_BUFFER_CONFIG *bg,
+                         YV12_BUFFER_CONFIG *bg, YV12_BUFFER_CONFIG *target,
 #endif  // CONFIG_BGSPRITE
                          int distance);
 
diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c
index b9db891b3..a2e24d66b 100644
--- a/third_party/aom/av1/encoder/tokenize.c
+++ b/third_party/aom/av1/encoder/tokenize.c
@@ -315,36 +315,30 @@ static INLINE void add_token(TOKENEXTRA **t,
   (*t)->eob_val = eob_val;
   (*t)->first_val = first_val;
   (*t)++;
+
+  if (token == BLOCK_Z_TOKEN) {
+    update_cdf(*head_cdf, 0, HEAD_TOKENS + 1);
+  } else {
+    if (eob_val != LAST_EOB) {
+      const int symb = 2 * AOMMIN(token, TWO_TOKEN) - eob_val + first_val;
+      update_cdf(*head_cdf, symb, HEAD_TOKENS + first_val);
+    }
+    if (token > ONE_TOKEN)
+      update_cdf(*tail_cdf, token - TWO_TOKEN, TAIL_TOKENS);
+  }
 }
 #endif  // !CONFIG_PVQ || CONFIG_VAR_TX
 
-#if CONFIG_PALETTE
-void av1_tokenize_palette_sb(const AV1_COMP *cpi,
-                             const struct ThreadData *const td, int plane,
-                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                             int *rate) {
-  assert(plane == 0 || plane == 1);
-  const MACROBLOCK *const x = &td->mb;
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const uint8_t *const color_map = xd->plane[plane].color_index_map;
-  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  aom_cdf_prob(
-      *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
-      plane ? xd->tile_ctx->palette_uv_color_index_cdf
-            : xd->tile_ctx->palette_y_color_index_cdf;
-  int plane_block_width, rows, cols;
-  av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows,
-                           &cols);
+static int cost_and_tokenize_map(Av1ColorMapParam *param, TOKENEXTRA **t,
+                                 int calc_rate) {
+  const uint8_t *const color_map = param->color_map;
+  MapCdf map_cdf = param->map_cdf;
+  ColorCost color_cost = param->color_cost;
+  const int plane_block_width = param->plane_width;
+  const int rows = param->rows;
+  const int cols = param->cols;
+  const int n = param->n_colors;
 
-  // The first color index does not use context or entropy.
-  (*t)->token = color_map[0];
-  (*t)->palette_cdf = NULL;
-  (*t)->skip_eob_node = 0;
-  ++(*t);
-
-  const int n = pmi->palette_size[plane];
-  const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS;
   int this_rate = 0;
   uint8_t color_order[PALETTE_MAX_SIZE];
 #if CONFIG_PALETTE_THROUGHPUT
@@ -360,18 +354,99 @@ void av1_tokenize_palette_sb(const AV1_COMP *cpi,
           color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
       assert(color_new_idx >= 0 && color_new_idx < n);
       if (calc_rate) {
-        this_rate += cpi->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
-                                              [color_new_idx];
+        this_rate +=
+            (*color_cost)[n - PALETTE_MIN_SIZE][color_ctx][color_new_idx];
+      } else {
+        (*t)->token = color_new_idx;
+        (*t)->color_map_cdf = map_cdf[n - PALETTE_MIN_SIZE][color_ctx];
+        ++(*t);
       }
-      (*t)->token = color_new_idx;
-      (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];
-      (*t)->skip_eob_node = 0;
-      ++(*t);
     }
   }
-  if (rate) *rate += this_rate;
+  if (calc_rate) return this_rate;
+  return 0;
+}
+
+static void get_palette_params(const MACROBLOCK *const x, int plane,
+                               BLOCK_SIZE bsize, Av1ColorMapParam *params) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+  params->color_map = xd->plane[plane].color_index_map;
+  params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
+                          : xd->tile_ctx->palette_y_color_index_cdf;
+  params->color_cost =
+      plane ? &x->palette_uv_color_cost : &x->palette_y_color_cost;
+  params->n_colors = pmi->palette_size[plane];
+  av1_get_block_dimensions(bsize, plane, xd, &params->plane_width, NULL,
+                           &params->rows, &params->cols);
+}
+
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+static void get_mrc_params(const MACROBLOCK *const x, int block,
+                           TX_SIZE tx_size, Av1ColorMapParam *params) {
+  memset(params, 0, sizeof(*params));
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const int is_inter = is_inter_block(mbmi);
+  params->color_map = BLOCK_OFFSET(xd->mrc_mask, block);
+  params->map_cdf = is_inter ? xd->tile_ctx->mrc_mask_inter_cdf
+                             : xd->tile_ctx->mrc_mask_intra_cdf;
+  params->color_cost =
+      is_inter ? &x->mrc_mask_inter_cost : &x->mrc_mask_intra_cost;
+  params->n_colors = 2;
+  params->plane_width = tx_size_wide[tx_size];
+  params->rows = tx_size_high[tx_size];
+  params->cols = tx_size_wide[tx_size];
+}
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+
+static void get_color_map_params(const MACROBLOCK *const x, int plane,
+                                 int block, BLOCK_SIZE bsize, TX_SIZE tx_size,
+                                 COLOR_MAP_TYPE type,
+                                 Av1ColorMapParam *params) {
+  (void)block;
+  (void)tx_size;
+  memset(params, 0, sizeof(*params));
+  switch (type) {
+    case PALETTE_MAP: get_palette_params(x, plane, bsize, params); break;
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+    case MRC_MAP: get_mrc_params(x, block, tx_size, params); break;
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+    default: assert(0 && "Invalid color map type"); return;
+  }
+}
+
+int av1_cost_color_map(const MACROBLOCK *const x, int plane, int block,
+                       BLOCK_SIZE bsize, TX_SIZE tx_size, COLOR_MAP_TYPE type) {
+  assert(plane == 0 || plane == 1);
+  Av1ColorMapParam color_map_params;
+  get_color_map_params(x, plane, block, bsize, tx_size, type,
+                       &color_map_params);
+  return cost_and_tokenize_map(&color_map_params, NULL, 1);
+}
+
+void av1_tokenize_color_map(const MACROBLOCK *const x, int plane, int block,
+                            TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
+                            COLOR_MAP_TYPE type) {
+  assert(plane == 0 || plane == 1);
+#if CONFIG_MRC_TX
+  if (type == MRC_MAP) {
+    const int is_inter = is_inter_block(&x->e_mbd.mi[0]->mbmi);
+    if ((is_inter && !SIGNAL_MRC_MASK_INTER) ||
+        (!is_inter && !SIGNAL_MRC_MASK_INTRA))
+      return;
+  }
+#endif  // CONFIG_MRC_TX
+  Av1ColorMapParam color_map_params;
+  get_color_map_params(x, plane, block, bsize, tx_size, type,
+                       &color_map_params);
+  // The first color index does not use context or entropy.
+  (*t)->token = color_map_params.color_map[0];
+  (*t)->color_map_cdf = NULL;
+  ++(*t);
+  cost_and_tokenize_map(&color_map_params, t, 0);
 }
-#endif  // CONFIG_PALETTE
 
 #if CONFIG_PVQ
 static void add_pvq_block(AV1_COMMON *const cm, MACROBLOCK *const x,
@@ -410,7 +485,7 @@ static void tokenize_pvq(int plane, int block, int blk_row, int blk_col,
 
   assert(block < MAX_PVQ_BLOCKS_IN_SB);
   pvq_info = &x->pvq[block][plane];
-  add_pvq_block((AV1_COMMON * const)cm, x, pvq_info);
+  add_pvq_block((AV1_COMMON * const) cm, x, pvq_info);
 }
 #endif  // CONFIG_PVQ
 
@@ -444,8 +519,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
       av1_get_tx_type(type, xd, blk_row, blk_col, block, tx_size);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, mbmi);
   const int ref = is_inter_block(mbmi);
-  unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
-      td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   aom_cdf_prob(
       *const coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
@@ -453,13 +526,9 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
   aom_cdf_prob(
       *const coef_tail_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] =
       ec_ctx->coef_tail_cdfs[txsize_sqr_map[tx_size]][type][ref];
-  unsigned int(*const blockz_count)[2] =
-      td->counts->blockz_count[txsize_sqr_map[tx_size]][type][ref];
   int eob_val;
   int first_val = 1;
-  const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
-  unsigned int(*const eob_branch)[COEFF_CONTEXTS] =
-      td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref];
+  const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
   const uint8_t *const band = get_band_translate(tx_size);
   int16_t token;
   EXTRABIT extra;
@@ -470,12 +539,15 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
   nb = scan_order->neighbors;
   c = 0;
 
+#if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+  if (tx_type == MRC_DCT)
+    av1_tokenize_color_map(x, plane, block, &t, plane_bsize, tx_size, MRC_MAP);
+#endif  // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
+
   if (eob == 0)
     add_token(&t, &coef_tail_cdfs[band[c]][pt], &coef_head_cdfs[band[c]][pt], 1,
               1, 0, BLOCK_Z_TOKEN);
 
-  ++blockz_count[pt][eob != 0];
-
   while (c < eob) {
     int v = qcoeff[scan[c]];
     first_val = (c == 0);
@@ -483,23 +555,13 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
     if (!v) {
       add_token(&t, &coef_tail_cdfs[band[c]][pt], &coef_head_cdfs[band[c]][pt],
                 0, first_val, 0, ZERO_TOKEN);
-      ++counts[band[c]][pt][ZERO_TOKEN];
       token_cache[scan[c]] = 0;
     } else {
       eob_val =
           (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
-
       av1_get_token_extra(v, &token, &extra);
-
       add_token(&t, &coef_tail_cdfs[band[c]][pt], &coef_head_cdfs[band[c]][pt],
                 eob_val, first_val, extra, (uint8_t)token);
-
-      if (eob_val != LAST_EOB) {
-        ++counts[band[c]][pt][token];
-        ++eob_branch[band[c]][pt];
-        counts[band[c]][pt][EOB_TOKEN] += eob_val != NO_EOB;
-      }
-
       token_cache[scan[c]] = av1_pt_energy_class[token];
     }
     ++c;
@@ -673,7 +735,7 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
     if (!is_chroma_reference(mi_row, mi_col, bsize,
                              xd->plane[plane].subsampling_x,
                              xd->plane[plane].subsampling_y)) {
-#if !CONFIG_PVQ || !CONFIG_LV_MAP
+#if !CONFIG_PVQ && !CONFIG_LV_MAP
       if (!dry_run) {
         (*t)->token = EOSB_TOKEN;
         (*t)++;
@@ -691,7 +753,8 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
 #endif
     const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
     const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
-    const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
+    const TX_SIZE max_tx_size = get_vartx_max_txsize(
+        mbmi, plane_bsize, pd->subsampling_x || pd->subsampling_y);
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
     int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
     int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h
index 73f0305fa..20000e502 100644
--- a/third_party/aom/av1/encoder/tokenize.h
+++ b/third_party/aom/av1/encoder/tokenize.h
@@ -37,15 +37,12 @@ typedef struct {
 typedef struct {
   aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
   aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)];
-#if CONFIG_PALETTE
-  aom_cdf_prob *palette_cdf;
-#endif  // CONFIG_PALETTE
+  aom_cdf_prob *color_map_cdf;
   int eob_val;
   int first_val;
   const aom_prob *context_tree;
   EXTRABIT extra;
   uint8_t token;
-  uint8_t skip_eob_node;
 } TOKENEXTRA;
 
 extern const aom_tree_index av1_coef_tree[];
@@ -77,12 +74,14 @@ void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td,
                            TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
                            int mi_col, BLOCK_SIZE bsize, int *rate);
 #endif
-#if CONFIG_PALETTE
-void av1_tokenize_palette_sb(const struct AV1_COMP *cpi,
-                             const struct ThreadData *const td, int plane,
-                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                             int *rate);
-#endif  // CONFIG_PALETTE
+
+int av1_cost_color_map(const MACROBLOCK *const x, int plane, int block,
+                       BLOCK_SIZE bsize, TX_SIZE tx_size, COLOR_MAP_TYPE type);
+
+void av1_tokenize_color_map(const MACROBLOCK *const x, int plane, int block,
+                            TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size,
+                            COLOR_MAP_TYPE type);
+
 void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
                      TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                      int *rate, const int mi_row, const int mi_col);
@@ -139,13 +138,11 @@ static INLINE int av1_get_token_cost(int v, int16_t *token, int cat6_bits) {
   return av1_dct_cat_lt_10_value_cost[v];
 }
 
-#if !CONFIG_PVQ || CONFIG_VAR_TX
-static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
-                             TX_SIZE tx_size) {
+static INLINE int av1_get_tx_eob(const struct segmentation *seg, int segment_id,
+                                 TX_SIZE tx_size) {
   const int eob_max = tx_size_2d[tx_size];
   return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
 }
-#endif
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
index 1c0a120ca..078a67510 100644
--- a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
+++ b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c
@@ -16,24 +16,24 @@
 #include "aom_dsp/aom_dsp_common.h"
 
 static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) {
-#if CONFIG_HIGHBITDEPTH
-  const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
-  const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
-  *c = _mm256_packs_epi32(x0, x1);
-  *c = _mm256_permute4x64_epi64(*c, 0xD8);
-#else
-  *c = _mm256_loadu_si256((const __m256i *)coeff);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
+    const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
+    *c = _mm256_packs_epi32(x0, x1);
+    *c = _mm256_permute4x64_epi64(*c, 0xD8);
+  } else {
+    *c = _mm256_loadu_si256((const __m256i *)coeff);
+  }
 }
 
 static INLINE void write_zero(tran_low_t *qcoeff) {
   const __m256i zero = _mm256_setzero_si256();
-#if CONFIG_HIGHBITDEPTH
-  _mm256_storeu_si256((__m256i *)qcoeff, zero);
-  _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
-#else
-  _mm256_storeu_si256((__m256i *)qcoeff, zero);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    _mm256_storeu_si256((__m256i *)qcoeff, zero);
+    _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
+  } else {
+    _mm256_storeu_si256((__m256i *)qcoeff, zero);
+  }
 }
 
 static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
@@ -83,19 +83,16 @@ static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) {
     _mm256_storeu_si256((__m256i *)addr + 1, x1);         \
   } while (0)
 
-#if CONFIG_HIGHBITDEPTH
-#define store_two_quan(q, addr1, dq, addr2) \
-  do {                                      \
-    store_quan(q, addr1);                   \
-    store_quan(dq, addr2);                  \
-  } while (0)
-#else
-#define store_two_quan(q, addr1, dq, addr2)    \
-  do {                                         \
-    _mm256_storeu_si256((__m256i *)addr1, q);  \
-    _mm256_storeu_si256((__m256i *)addr2, dq); \
+#define store_two_quan(q, addr1, dq, addr2)      \
+  do {                                           \
+    if (sizeof(tran_low_t) == 4) {               \
+      store_quan(q, addr1);                      \
+      store_quan(dq, addr2);                     \
+    } else {                                     \
+      _mm256_storeu_si256((__m256i *)addr1, q);  \
+      _mm256_storeu_si256((__m256i *)addr2, dq); \
+    }                                            \
   } while (0)
-#endif
 
 static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c,
                             const int16_t *iscan_ptr, tran_low_t *qcoeff,
diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
index 190317389..4f7c09546 100644
--- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
+++ b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c
@@ -18,53 +18,53 @@
 static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
                               __m128i *c0, __m128i *c1) {
   const tran_low_t *addr = coeff + offset;
-#if CONFIG_HIGHBITDEPTH
-  const __m128i x0 = _mm_load_si128((const __m128i *)addr);
-  const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1);
-  const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2);
-  const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3);
-  *c0 = _mm_packs_epi32(x0, x1);
-  *c1 = _mm_packs_epi32(x2, x3);
-#else
-  *c0 = _mm_load_si128((const __m128i *)addr);
-  *c1 = _mm_load_si128((const __m128i *)addr + 1);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    const __m128i x0 = _mm_load_si128((const __m128i *)addr);
+    const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1);
+    const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2);
+    const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3);
+    *c0 = _mm_packs_epi32(x0, x1);
+    *c1 = _mm_packs_epi32(x2, x3);
+  } else {
+    *c0 = _mm_load_si128((const __m128i *)addr);
+    *c1 = _mm_load_si128((const __m128i *)addr + 1);
+  }
 }
 
 static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1,
                                 tran_low_t *qcoeff, intptr_t offset) {
   tran_low_t *addr = qcoeff + offset;
-#if CONFIG_HIGHBITDEPTH
-  const __m128i zero = _mm_setzero_si128();
-  __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero);
-  __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits);
-  __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits);
-  _mm_store_si128((__m128i *)addr, y0);
-  _mm_store_si128((__m128i *)addr + 1, y1);
-
-  sign_bits = _mm_cmplt_epi16(*qc1, zero);
-  y0 = _mm_unpacklo_epi16(*qc1, sign_bits);
-  y1 = _mm_unpackhi_epi16(*qc1, sign_bits);
-  _mm_store_si128((__m128i *)addr + 2, y0);
-  _mm_store_si128((__m128i *)addr + 3, y1);
-#else
-  _mm_store_si128((__m128i *)addr, *qc0);
-  _mm_store_si128((__m128i *)addr + 1, *qc1);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    const __m128i zero = _mm_setzero_si128();
+    __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero);
+    __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits);
+    __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits);
+    _mm_store_si128((__m128i *)addr, y0);
+    _mm_store_si128((__m128i *)addr + 1, y1);
+
+    sign_bits = _mm_cmplt_epi16(*qc1, zero);
+    y0 = _mm_unpacklo_epi16(*qc1, sign_bits);
+    y1 = _mm_unpackhi_epi16(*qc1, sign_bits);
+    _mm_store_si128((__m128i *)addr + 2, y0);
+    _mm_store_si128((__m128i *)addr + 3, y1);
+  } else {
+    _mm_store_si128((__m128i *)addr, *qc0);
+    _mm_store_si128((__m128i *)addr + 1, *qc1);
+  }
 }
 
 static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) {
   const __m128i zero = _mm_setzero_si128();
   tran_low_t *addr = qcoeff + offset;
-#if CONFIG_HIGHBITDEPTH
-  _mm_store_si128((__m128i *)addr, zero);
-  _mm_store_si128((__m128i *)addr + 1, zero);
-  _mm_store_si128((__m128i *)addr + 2, zero);
-  _mm_store_si128((__m128i *)addr + 3, zero);
-#else
-  _mm_store_si128((__m128i *)addr, zero);
-  _mm_store_si128((__m128i *)addr + 1, zero);
-#endif
+  if (sizeof(tran_low_t) == 4) {
+    _mm_store_si128((__m128i *)addr, zero);
+    _mm_store_si128((__m128i *)addr + 1, zero);
+    _mm_store_si128((__m128i *)addr + 2, zero);
+    _mm_store_si128((__m128i *)addr + 3, zero);
+  } else {
+    _mm_store_si128((__m128i *)addr, zero);
+    _mm_store_si128((__m128i *)addr + 1, zero);
+  }
 }
 
 void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
diff --git a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
index 496c33395..e5b19a44c 100644
--- a/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
+++ b/third_party/aom/av1/encoder/x86/dct_intrin_sse2.c
@@ -205,7 +205,7 @@ static void fidtx4_sse2(__m128i *in) {
 void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
                      TxfmParam *txfm_param) {
   __m128i in[4];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -308,447 +308,6 @@ void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
   }
 }
 
-void av1_fdct8x8_quant_sse2(const int16_t *input, int stride,
-                            int16_t *coeff_ptr, intptr_t n_coeffs,
-                            int skip_block, const int16_t *zbin_ptr,
-                            const int16_t *round_ptr, const int16_t *quant_ptr,
-                            const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
-                            int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                            uint16_t *eob_ptr, const int16_t *scan_ptr,
-                            const int16_t *iscan_ptr) {
-  __m128i zero;
-  int pass;
-  // Constants
-  //    When we use them, in one case, they are all the same. In all others
-  //    it's a pair of them that we need to repeat four times. This is done
-  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
-  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
-  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
-  const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
-  const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
-  const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
-  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-  // Load input
-  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
-  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
-  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
-  __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
-  __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
-  __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
-  __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
-  __m128i *in[8];
-  int index = 0;
-
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)coeff_ptr;
-
-  // Pre-condition input (shift by two)
-  in0 = _mm_slli_epi16(in0, 2);
-  in1 = _mm_slli_epi16(in1, 2);
-  in2 = _mm_slli_epi16(in2, 2);
-  in3 = _mm_slli_epi16(in3, 2);
-  in4 = _mm_slli_epi16(in4, 2);
-  in5 = _mm_slli_epi16(in5, 2);
-  in6 = _mm_slli_epi16(in6, 2);
-  in7 = _mm_slli_epi16(in7, 2);
-
-  in[0] = &in0;
-  in[1] = &in1;
-  in[2] = &in2;
-  in[3] = &in3;
-  in[4] = &in4;
-  in[5] = &in5;
-  in[6] = &in6;
-  in[7] = &in7;
-
-  // We do two passes, first the columns, then the rows. The results of the
-  // first pass are transposed so that the same column code can be reused. The
-  // results of the second pass are also transposed so that the rows (processed
-  // as columns) are put back in row positions.
-  for (pass = 0; pass < 2; pass++) {
-    // To store results of each pass before the transpose.
-    __m128i res0, res1, res2, res3, res4, res5, res6, res7;
-    // Add/subtract
-    const __m128i q0 = _mm_add_epi16(in0, in7);
-    const __m128i q1 = _mm_add_epi16(in1, in6);
-    const __m128i q2 = _mm_add_epi16(in2, in5);
-    const __m128i q3 = _mm_add_epi16(in3, in4);
-    const __m128i q4 = _mm_sub_epi16(in3, in4);
-    const __m128i q5 = _mm_sub_epi16(in2, in5);
-    const __m128i q6 = _mm_sub_epi16(in1, in6);
-    const __m128i q7 = _mm_sub_epi16(in0, in7);
-    // Work on first four results
-    {
-      // Add/subtract
-      const __m128i r0 = _mm_add_epi16(q0, q3);
-      const __m128i r1 = _mm_add_epi16(q1, q2);
-      const __m128i r2 = _mm_sub_epi16(q1, q2);
-      const __m128i r3 = _mm_sub_epi16(q0, q3);
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
-      const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
-      const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
-      const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
-      const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
-      const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
-      const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
-      const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
-      const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
-      const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
-      const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
-      const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
-      // dct_const_round_shift
-      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
-      const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
-      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-      const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
-      const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-      const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
-      const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
-      const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
-      const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
-      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-      const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
-      const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-      const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
-      const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-      const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-      // Combine
-      res0 = _mm_packs_epi32(w0, w1);
-      res4 = _mm_packs_epi32(w2, w3);
-      res2 = _mm_packs_epi32(w4, w5);
-      res6 = _mm_packs_epi32(w6, w7);
-    }
-    // Work on next four results
-    {
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
-      const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
-      const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
-      const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
-      const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
-      const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
-      // dct_const_round_shift
-      const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
-      const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
-      const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
-      const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
-      const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
-      const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
-      const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
-      const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
-      // Combine
-      const __m128i r0 = _mm_packs_epi32(s0, s1);
-      const __m128i r1 = _mm_packs_epi32(s2, s3);
-      // Add/subtract
-      const __m128i x0 = _mm_add_epi16(q4, r0);
-      const __m128i x1 = _mm_sub_epi16(q4, r0);
-      const __m128i x2 = _mm_sub_epi16(q7, r1);
-      const __m128i x3 = _mm_add_epi16(q7, r1);
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
-      const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
-      const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
-      const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
-      const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
-      const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
-      const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
-      const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
-      const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
-      const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
-      const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
-      const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
-      // dct_const_round_shift
-      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
-      const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
-      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-      const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
-      const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-      const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
-      const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
-      const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
-      const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
-      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-      const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
-      const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-      const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
-      const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-      const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-      // Combine
-      res1 = _mm_packs_epi32(w0, w1);
-      res7 = _mm_packs_epi32(w2, w3);
-      res5 = _mm_packs_epi32(w4, w5);
-      res3 = _mm_packs_epi32(w6, w7);
-    }
-    // Transpose the 8x8.
-    {
-      // 00 01 02 03 04 05 06 07
-      // 10 11 12 13 14 15 16 17
-      // 20 21 22 23 24 25 26 27
-      // 30 31 32 33 34 35 36 37
-      // 40 41 42 43 44 45 46 47
-      // 50 51 52 53 54 55 56 57
-      // 60 61 62 63 64 65 66 67
-      // 70 71 72 73 74 75 76 77
-      const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
-      const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
-      const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
-      const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
-      const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
-      const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
-      const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
-      const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
-      // 00 10 01 11 02 12 03 13
-      // 20 30 21 31 22 32 23 33
-      // 04 14 05 15 06 16 07 17
-      // 24 34 25 35 26 36 27 37
-      // 40 50 41 51 42 52 43 53
-      // 60 70 61 71 62 72 63 73
-      // 54 54 55 55 56 56 57 57
-      // 64 74 65 75 66 76 67 77
-      const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
-      const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
-      const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
-      const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
-      const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
-      const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
-      const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
-      const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
-      // 00 10 20 30 01 11 21 31
-      // 40 50 60 70 41 51 61 71
-      // 02 12 22 32 03 13 23 33
-      // 42 52 62 72 43 53 63 73
-      // 04 14 24 34 05 15 21 36
-      // 44 54 64 74 45 55 61 76
-      // 06 16 26 36 07 17 27 37
-      // 46 56 66 76 47 57 67 77
-      in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
-      in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
-      in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
-      in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
-      in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
-      in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
-      in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
-      in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
-      // 00 10 20 30 40 50 60 70
-      // 01 11 21 31 41 51 61 71
-      // 02 12 22 32 42 52 62 72
-      // 03 13 23 33 43 53 63 73
-      // 04 14 24 34 44 54 64 74
-      // 05 15 25 35 45 55 65 75
-      // 06 16 26 36 46 56 66 76
-      // 07 17 27 37 47 57 67 77
-    }
-  }
-  // Post-condition output and store it
-  {
-    // Post-condition (division by two)
-    //    division of two 16 bits signed numbers using shifts
-    //    n / 2 = (n - (n >> 15)) >> 1
-    const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
-    const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
-    const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
-    const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
-    const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
-    const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
-    const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
-    const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
-    in0 = _mm_sub_epi16(in0, sign_in0);
-    in1 = _mm_sub_epi16(in1, sign_in1);
-    in2 = _mm_sub_epi16(in2, sign_in2);
-    in3 = _mm_sub_epi16(in3, sign_in3);
-    in4 = _mm_sub_epi16(in4, sign_in4);
-    in5 = _mm_sub_epi16(in5, sign_in5);
-    in6 = _mm_sub_epi16(in6, sign_in6);
-    in7 = _mm_sub_epi16(in7, sign_in7);
-    in0 = _mm_srai_epi16(in0, 1);
-    in1 = _mm_srai_epi16(in1, 1);
-    in2 = _mm_srai_epi16(in2, 1);
-    in3 = _mm_srai_epi16(in3, 1);
-    in4 = _mm_srai_epi16(in4, 1);
-    in5 = _mm_srai_epi16(in5, 1);
-    in6 = _mm_srai_epi16(in6, 1);
-    in7 = _mm_srai_epi16(in7, 1);
-  }
-
-  iscan_ptr += n_coeffs;
-  qcoeff_ptr += n_coeffs;
-  dqcoeff_ptr += n_coeffs;
-  n_coeffs = -n_coeffs;
-  zero = _mm_setzero_si128();
-
-  if (!skip_block) {
-    __m128i eob;
-    __m128i round, quant, dequant;
-    {
-      __m128i coeff0, coeff1;
-
-      // Setup global values
-      {
-        round = _mm_load_si128((const __m128i *)round_ptr);
-        quant = _mm_load_si128((const __m128i *)quant_ptr);
-        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
-      }
-
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-        // Do DC and first 15 AC
-        coeff0 = *in[0];
-        coeff1 = *in[1];
-
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-        round = _mm_unpackhi_epi64(round, round);
-        qcoeff1 = _mm_adds_epi16(qcoeff1, round);
-        qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-        quant = _mm_unpackhi_epi64(quant, quant);
-        qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
-
-        // Reinsert signs
-        qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
-        coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-        dequant = _mm_unpackhi_epi64(dequant, dequant);
-        coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
-      }
-
-      {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob = _mm_max_epi16(eob, eob1);
-      }
-      n_coeffs += 8 * 2;
-    }
-
-    // AC only loop
-    index = 2;
-    while (n_coeffs < 0) {
-      __m128i coeff0, coeff1;
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-
-        assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1);
-        coeff0 = *in[index];
-        coeff1 = *in[index + 1];
-
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-        qcoeff1 = _mm_adds_epi16(qcoeff1, round);
-        qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-        qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
-
-        // Reinsert signs
-        qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
-        coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-        coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
-      }
-
-      {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob0, eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob0 = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob0 = _mm_max_epi16(eob0, eob1);
-        eob = _mm_max_epi16(eob, eob0);
-      }
-      n_coeffs += 8 * 2;
-      index += 2;
-    }
-
-    // Accumulate EOB
-    {
-      __m128i eob_shuffled;
-      eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      *eob_ptr = _mm_extract_epi16(eob, 1);
-    }
-  } else {
-    do {
-      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
-      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
-      n_coeffs += 8 * 2;
-    } while (n_coeffs < 0);
-    *eob_ptr = 0;
-  }
-}
-
 // load 8x8 array
 static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
                                    int stride, int flipud, int fliplr) {
@@ -1307,7 +866,7 @@ static void fidtx8_sse2(__m128i *in) {
 void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
                      TxfmParam *txfm_param) {
   __m128i in[8];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -2344,7 +1903,7 @@ static void fidtx16_sse2(__m128i *in0, __m128i *in1) {
 void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m128i in0[16], in1[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -2564,7 +2123,7 @@ static INLINE void write_buffer_4x8(tran_low_t *output, __m128i *res) {
 void av1_fht4x8_sse2(const int16_t *input, tran_low_t *output, int stride,
                      TxfmParam *txfm_param) {
   __m128i in[8];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -2742,7 +2301,7 @@ static INLINE void write_buffer_8x4(tran_low_t *output, __m128i *res) {
 void av1_fht8x4_sse2(const int16_t *input, tran_low_t *output, int stride,
                      TxfmParam *txfm_param) {
   __m128i in[8];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -2886,7 +2445,7 @@ static void row_8x16_rounding(__m128i *in, int bits) {
 void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
                       TxfmParam *txfm_param) {
   __m128i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -3071,7 +2630,7 @@ static INLINE void load_buffer_16x8(const int16_t *input, __m128i *in,
 void av1_fht16x8_sse2(const int16_t *input, tran_low_t *output, int stride,
                       TxfmParam *txfm_param) {
   __m128i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -3385,7 +2944,7 @@ static INLINE void fhalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
 void av1_fht16x32_sse2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m128i intl[16], intr[16], inbl[16], inbr[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -3578,7 +3137,7 @@ static INLINE void write_buffer_32x16(tran_low_t *output, __m128i *res0,
 void av1_fht32x16_sse2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m128i in0[16], in1[16], in2[16], in3[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -3822,7 +3381,7 @@ static INLINE void write_buffer_32x32(__m128i *in0, __m128i *in1, __m128i *in2,
 void av1_fht32x32_sse2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m128i in0[32], in1[32], in2[32], in3[32];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "No 32x32 sse2 MRC_DCT implementation");
 #endif
diff --git a/third_party/aom/av1/encoder/x86/dct_ssse3.c b/third_party/aom/av1/encoder/x86/dct_ssse3.c
deleted file mode 100644
index 717a99af8..000000000
--- a/third_party/aom/av1/encoder/x86/dct_ssse3.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#if defined(_MSC_VER) && _MSC_VER <= 1500
-// Need to include math.h before calling tmmintrin.h/intrin.h
-// in certain versions of MSVS.
-#include <math.h>
-#endif
-#include <tmmintrin.h>  // SSSE3
-
-#include "./av1_rtcd.h"
-#include "aom_dsp/x86/inv_txfm_sse2.h"
-#include "aom_dsp/x86/txfm_common_sse2.h"
-
-void av1_fdct8x8_quant_ssse3(
-    const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
-    int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
-    const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
-    int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-    uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
-  __m128i zero;
-  int pass;
-  // Constants
-  //    When we use them, in one case, they are all the same. In all others
-  //    it's a pair of them that we need to repeat four times. This is done
-  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170);
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
-  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
-  const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
-  const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
-  const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
-  const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
-  const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-  // Load input
-  __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
-  __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
-  __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
-  __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
-  __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
-  __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
-  __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
-  __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
-  __m128i *in[8];
-  int index = 0;
-
-  (void)scan_ptr;
-  (void)zbin_ptr;
-  (void)quant_shift_ptr;
-  (void)coeff_ptr;
-
-  // Pre-condition input (shift by two)
-  in0 = _mm_slli_epi16(in0, 2);
-  in1 = _mm_slli_epi16(in1, 2);
-  in2 = _mm_slli_epi16(in2, 2);
-  in3 = _mm_slli_epi16(in3, 2);
-  in4 = _mm_slli_epi16(in4, 2);
-  in5 = _mm_slli_epi16(in5, 2);
-  in6 = _mm_slli_epi16(in6, 2);
-  in7 = _mm_slli_epi16(in7, 2);
-
-  in[0] = &in0;
-  in[1] = &in1;
-  in[2] = &in2;
-  in[3] = &in3;
-  in[4] = &in4;
-  in[5] = &in5;
-  in[6] = &in6;
-  in[7] = &in7;
-
-  // We do two passes, first the columns, then the rows. The results of the
-  // first pass are transposed so that the same column code can be reused. The
-  // results of the second pass are also transposed so that the rows (processed
-  // as columns) are put back in row positions.
-  for (pass = 0; pass < 2; pass++) {
-    // To store results of each pass before the transpose.
-    __m128i res0, res1, res2, res3, res4, res5, res6, res7;
-    // Add/subtract
-    const __m128i q0 = _mm_add_epi16(in0, in7);
-    const __m128i q1 = _mm_add_epi16(in1, in6);
-    const __m128i q2 = _mm_add_epi16(in2, in5);
-    const __m128i q3 = _mm_add_epi16(in3, in4);
-    const __m128i q4 = _mm_sub_epi16(in3, in4);
-    const __m128i q5 = _mm_sub_epi16(in2, in5);
-    const __m128i q6 = _mm_sub_epi16(in1, in6);
-    const __m128i q7 = _mm_sub_epi16(in0, in7);
-    // Work on first four results
-    {
-      // Add/subtract
-      const __m128i r0 = _mm_add_epi16(q0, q3);
-      const __m128i r1 = _mm_add_epi16(q1, q2);
-      const __m128i r2 = _mm_sub_epi16(q1, q2);
-      const __m128i r3 = _mm_sub_epi16(q0, q3);
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
-      const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
-      const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
-      const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
-
-      const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
-      const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
-      const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
-      const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
-
-      const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
-      const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
-      const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
-      const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
-      // dct_const_round_shift
-
-      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
-      const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
-      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-      const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
-
-      const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-      const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
-      const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
-      const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-
-      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
-      const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
-      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-      const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
-
-      const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-      const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
-      const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-      const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-      // Combine
-
-      res0 = _mm_packs_epi32(w0, w1);
-      res4 = _mm_packs_epi32(w2, w3);
-      res2 = _mm_packs_epi32(w4, w5);
-      res6 = _mm_packs_epi32(w6, w7);
-    }
-    // Work on next four results
-    {
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i d0 = _mm_sub_epi16(q6, q5);
-      const __m128i d1 = _mm_add_epi16(q6, q5);
-      const __m128i r0 = _mm_mulhrs_epi16(d0, k__dual_p16_p16);
-      const __m128i r1 = _mm_mulhrs_epi16(d1, k__dual_p16_p16);
-
-      // Add/subtract
-      const __m128i x0 = _mm_add_epi16(q4, r0);
-      const __m128i x1 = _mm_sub_epi16(q4, r0);
-      const __m128i x2 = _mm_sub_epi16(q7, r1);
-      const __m128i x3 = _mm_add_epi16(q7, r1);
-      // Interleave to do the multiply by constants which gets us into 32bits
-      const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
-      const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
-      const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
-      const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
-      const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
-      const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
-      const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
-      const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
-      const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
-      const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
-      const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
-      const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
-      // dct_const_round_shift
-      const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
-      const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
-      const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-      const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
-      const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-      const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
-      const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
-      const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-      const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
-      const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
-      const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-      const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
-      const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-      const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
-      const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-      const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-      // Combine
-      res1 = _mm_packs_epi32(w0, w1);
-      res7 = _mm_packs_epi32(w2, w3);
-      res5 = _mm_packs_epi32(w4, w5);
-      res3 = _mm_packs_epi32(w6, w7);
-    }
-    // Transpose the 8x8.
-    {
-      // 00 01 02 03 04 05 06 07
-      // 10 11 12 13 14 15 16 17
-      // 20 21 22 23 24 25 26 27
-      // 30 31 32 33 34 35 36 37
-      // 40 41 42 43 44 45 46 47
-      // 50 51 52 53 54 55 56 57
-      // 60 61 62 63 64 65 66 67
-      // 70 71 72 73 74 75 76 77
-      const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
-      const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
-      const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
-      const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
-      const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
-      const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
-      const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
-      const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
-      // 00 10 01 11 02 12 03 13
-      // 20 30 21 31 22 32 23 33
-      // 04 14 05 15 06 16 07 17
-      // 24 34 25 35 26 36 27 37
-      // 40 50 41 51 42 52 43 53
-      // 60 70 61 71 62 72 63 73
-      // 54 54 55 55 56 56 57 57
-      // 64 74 65 75 66 76 67 77
-      const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
-      const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
-      const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
-      const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
-      const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
-      const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
-      const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
-      const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
-      // 00 10 20 30 01 11 21 31
-      // 40 50 60 70 41 51 61 71
-      // 02 12 22 32 03 13 23 33
-      // 42 52 62 72 43 53 63 73
-      // 04 14 24 34 05 15 21 36
-      // 44 54 64 74 45 55 61 76
-      // 06 16 26 36 07 17 27 37
-      // 46 56 66 76 47 57 67 77
-      in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
-      in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
-      in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
-      in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
-      in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
-      in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
-      in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
-      in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
-      // 00 10 20 30 40 50 60 70
-      // 01 11 21 31 41 51 61 71
-      // 02 12 22 32 42 52 62 72
-      // 03 13 23 33 43 53 63 73
-      // 04 14 24 34 44 54 64 74
-      // 05 15 25 35 45 55 65 75
-      // 06 16 26 36 46 56 66 76
-      // 07 17 27 37 47 57 67 77
-    }
-  }
-  // Post-condition output and store it
-  {
-    // Post-condition (division by two)
-    //    division of two 16 bits signed numbers using shifts
-    //    n / 2 = (n - (n >> 15)) >> 1
-    const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
-    const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
-    const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
-    const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
-    const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
-    const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
-    const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
-    const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
-    in0 = _mm_sub_epi16(in0, sign_in0);
-    in1 = _mm_sub_epi16(in1, sign_in1);
-    in2 = _mm_sub_epi16(in2, sign_in2);
-    in3 = _mm_sub_epi16(in3, sign_in3);
-    in4 = _mm_sub_epi16(in4, sign_in4);
-    in5 = _mm_sub_epi16(in5, sign_in5);
-    in6 = _mm_sub_epi16(in6, sign_in6);
-    in7 = _mm_sub_epi16(in7, sign_in7);
-    in0 = _mm_srai_epi16(in0, 1);
-    in1 = _mm_srai_epi16(in1, 1);
-    in2 = _mm_srai_epi16(in2, 1);
-    in3 = _mm_srai_epi16(in3, 1);
-    in4 = _mm_srai_epi16(in4, 1);
-    in5 = _mm_srai_epi16(in5, 1);
-    in6 = _mm_srai_epi16(in6, 1);
-    in7 = _mm_srai_epi16(in7, 1);
-  }
-
-  iscan_ptr += n_coeffs;
-  qcoeff_ptr += n_coeffs;
-  dqcoeff_ptr += n_coeffs;
-  n_coeffs = -n_coeffs;
-  zero = _mm_setzero_si128();
-
-  if (!skip_block) {
-    __m128i eob;
-    __m128i round, quant, dequant, thr;
-    int16_t nzflag;
-    {
-      __m128i coeff0, coeff1;
-
-      // Setup global values
-      {
-        round = _mm_load_si128((const __m128i *)round_ptr);
-        quant = _mm_load_si128((const __m128i *)quant_ptr);
-        dequant = _mm_load_si128((const __m128i *)dequant_ptr);
-      }
-
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-        // Do DC and first 15 AC
-        coeff0 = *in[0];
-        coeff1 = *in[1];
-
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-        round = _mm_unpackhi_epi64(round, round);
-        qcoeff1 = _mm_adds_epi16(qcoeff1, round);
-        qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-        quant = _mm_unpackhi_epi64(quant, quant);
-        qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
-
-        // Reinsert signs
-        qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
-        _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
-        coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-        dequant = _mm_unpackhi_epi64(dequant, dequant);
-        coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
-        _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
-      }
-
-      {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob = _mm_max_epi16(eob, eob1);
-      }
-      n_coeffs += 8 * 2;
-    }
-
-    // AC only loop
-    index = 2;
-    thr = _mm_srai_epi16(dequant, 1);
-    while (n_coeffs < 0) {
-      __m128i coeff0, coeff1;
-      {
-        __m128i coeff0_sign, coeff1_sign;
-        __m128i qcoeff0, qcoeff1;
-        __m128i qtmp0, qtmp1;
-
-        assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1);
-        coeff0 = *in[index];
-        coeff1 = *in[index + 1];
-
-        // Poor man's sign extract
-        coeff0_sign = _mm_srai_epi16(coeff0, 15);
-        coeff1_sign = _mm_srai_epi16(coeff1, 15);
-        qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
-        qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
-        qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-        qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-        nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
-                 _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
-
-        if (nzflag) {
-          qcoeff0 = _mm_adds_epi16(qcoeff0, round);
-          qcoeff1 = _mm_adds_epi16(qcoeff1, round);
-          qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
-          qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
-
-          // Reinsert signs
-          qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
-          qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
-          qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
-          qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
-          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
-          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
-          coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
-          coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
-          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
-          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
-        } else {
-          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
-
-          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
-          _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
-        }
-      }
-
-      if (nzflag) {
-        // Scan for eob
-        __m128i zero_coeff0, zero_coeff1;
-        __m128i nzero_coeff0, nzero_coeff1;
-        __m128i iscan0, iscan1;
-        __m128i eob0, eob1;
-        zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
-        zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
-        nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
-        nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
-        iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
-        iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
-        // Add one to convert from indices to counts
-        iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
-        iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
-        eob0 = _mm_and_si128(iscan0, nzero_coeff0);
-        eob1 = _mm_and_si128(iscan1, nzero_coeff1);
-        eob0 = _mm_max_epi16(eob0, eob1);
-        eob = _mm_max_epi16(eob, eob0);
-      }
-      n_coeffs += 8 * 2;
-      index += 2;
-    }
-
-    // Accumulate EOB
-    {
-      __m128i eob_shuffled;
-      eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
-      eob = _mm_max_epi16(eob, eob_shuffled);
-      *eob_ptr = _mm_extract_epi16(eob, 1);
-    }
-  } else {
-    do {
-      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
-      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
-      _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
-      n_coeffs += 8 * 2;
-    } while (n_coeffs < 0);
-    *eob_ptr = 0;
-  }
-}
diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
index 20ba4149c..6599630d0 100644
--- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
+++ b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c
@@ -17,14 +17,15 @@
 static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
                               __m256i *c) {
   const tran_low_t *addr = coeff + offset;
-#if CONFIG_HIGHBITDEPTH
-  const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
-  const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
-  const __m256i y = _mm256_packs_epi32(x0, x1);
-  *c = _mm256_permute4x64_epi64(y, 0xD8);
-#else
-  *c = _mm256_loadu_si256((const __m256i *)addr);
-#endif
+
+  if (sizeof(tran_low_t) == 4) {
+    const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr);
+    const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1);
+    const __m256i y = _mm256_packs_epi32(x0, x1);
+    *c = _mm256_permute4x64_epi64(y, 0xD8);
+  } else {
+    *c = _mm256_loadu_si256((const __m256i *)addr);
+  }
 }
 
 int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
index cab36f2bd..b684f7a3a 100644
--- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -195,7 +195,7 @@ static void fadst4x4_sse4_1(__m128i *in, int bit) {
 }
 
 void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
-                               int input_stride, int tx_type, int bd) {
+                               int input_stride, TX_TYPE tx_type, int bd) {
   __m128i in[4];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
@@ -926,7 +926,7 @@ static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
 }
 
 void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride,
-                               int tx_type, int bd) {
+                               TX_TYPE tx_type, int bd) {
   __m128i in[16], out[16];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
@@ -1800,7 +1800,7 @@ static void write_buffer_16x16(const __m128i *in, int32_t *output) {
 }
 
 void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
-                                 int stride, int tx_type, int bd) {
+                                 int stride, TX_TYPE tx_type, int bd) {
   __m128i in[64], out[64];
   const TXFM_1D_CFG *row_cfg = NULL;
   const TXFM_1D_CFG *col_cfg = NULL;
diff --git a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index af8e9a5f4..88621c82b 100644
--- a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -916,7 +916,7 @@ static void fidtx16_avx2(__m256i *in) {
 void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m256i in[16];
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
 #endif
@@ -1516,7 +1516,7 @@ void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
                        TxfmParam *txfm_param) {
   __m256i in0[32];  // left 32 columns
   __m256i in1[32];  // right 32 columns
-  int tx_type = txfm_param->tx_type;
+  const TX_TYPE tx_type = txfm_param->tx_type;
 #if CONFIG_MRC_TX
   assert(tx_type != MRC_DCT && "No avx2 32x32 implementation of MRC_DCT");
 #endif
diff --git a/third_party/aom/build/cmake/aom_config.c.cmake b/third_party/aom/build/cmake/aom_config.c.cmake
index 70bf95037..62f0a10ab 100644
--- a/third_party/aom/build/cmake/aom_config.c.cmake
+++ b/third_party/aom/build/cmake/aom_config.c.cmake
@@ -10,6 +10,4 @@
  */
 #include "aom/aom_codec.h"
 static const char* const cfg = "${AOM_CMAKE_CONFIG}";
-static const char* const aom_git_hash = "${AOM_GIT_HASH}";
 const char *aom_codec_build_config(void) {return cfg;}
-const char *aom_codec_git_hash(void) {return aom_git_hash;}
diff --git a/third_party/aom/build/cmake/aom_config_defaults.cmake b/third_party/aom/build/cmake/aom_config_defaults.cmake
index abdae1d66..488401be1 100644
--- a/third_party/aom/build/cmake/aom_config_defaults.cmake
+++ b/third_party/aom/build/cmake/aom_config_defaults.cmake
@@ -8,170 +8,208 @@
 ## Media Patent License 1.0 was not distributed with this source code in the
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
-
 # Defaults for every libaom configuration variable. Here we add all libaom
 # config variables to the cmake variable cache, but omit the FORCE parameter to
 # allow users to specify values when executing cmake to generate build files.
 # Values here are used only if not set by the user.
-set(RESTRICT "" CACHE STRING "Sets RESTRICT value for current target.")
 set(INLINE "" CACHE STRING "Sets INLINE value for current target.")
+set(RESTRICT "" CACHE STRING "Sets RESTRICT value for current target.")
+
+# CPUs.
 set(ARCH_ARM 0 CACHE NUMBER "Enables ARM architecture.")
 set(ARCH_MIPS 0 CACHE NUMBER "Enables MIPS architecture.")
 set(ARCH_X86 0 CACHE NUMBER "Enables X86 architecture.")
 set(ARCH_X86_64 0 CACHE NUMBER "Enables X86_64 architecture.")
-set(HAVE_EDSP 0 CACHE NUMBER "Enables EDSP optimizations.")
+
+# ARM optimization flags.
 set(HAVE_NEON 0 CACHE NUMBER "Enables NEON intrinsics optimizations.")
 set(HAVE_NEON_ASM 0 CACHE NUMBER "Enables NEON assembly optimizations.")
-set(HAVE_MIPS32 0 CACHE NUMBER "Enables MIPS32 optimizations.")
+
+# MIPS optimization flags.
 set(HAVE_DSPR2 0 CACHE NUMBER "Enables DSPR2 optimizations.")
-set(HAVE_MSA 0 CACHE NUMBER "Enables MSA optimizations.")
+set(HAVE_MIPS32 0 CACHE NUMBER "Enables MIPS32 optimizations.")
 set(HAVE_MIPS64 0 CACHE NUMBER "Enables MIPS64 optimizations. ")
+set(HAVE_MSA 0 CACHE NUMBER "Enables MSA optimizations.")
+
+# x86/x86_64 optimization flags.
+set(HAVE_AVX 0 CACHE NUMBER "Enables AVX optimizations.")
+set(HAVE_AVX2 0 CACHE NUMBER "Enables AVX2 optimizations.")
 set(HAVE_MMX 0 CACHE NUMBER "Enables MMX optimizations. ")
 set(HAVE_SSE 0 CACHE NUMBER "Enables SSE optimizations.")
 set(HAVE_SSE2 0 CACHE NUMBER "Enables SSE2 optimizations.")
 set(HAVE_SSE3 0 CACHE NUMBER "Enables SSE3 optimizations.")
-set(HAVE_SSSE3 0 CACHE NUMBER "Enables SSSE3 optimizations.")
 set(HAVE_SSE4_1 0 CACHE NUMBER "Enables SSE 4.1 optimizations.")
-set(HAVE_AVX 0 CACHE NUMBER "Enables AVX optimizations.")
-set(HAVE_AVX2 0 CACHE NUMBER "Enables AVX2 optimizations.")
+set(HAVE_SSSE3 0 CACHE NUMBER "Enables SSSE3 optimizations.")
+
+# Flags describing the build environment.
 set(HAVE_AOM_PORTS 0 CACHE NUMBER "Internal flag, deprecated.")
 set(HAVE_FEXCEPT 0 CACHE NUMBER "Internal flag, GNU fenv.h present for target.")
 set(HAVE_PTHREAD_H 0 CACHE NUMBER "Internal flag, target pthread support.")
 set(HAVE_UNISTD_H 0 CACHE NUMBER "Internal flag, unistd.h present for target.")
 set(HAVE_WXWIDGETS 0 CACHE NUMBER "WxWidgets present.")
-set(CONFIG_DEPENDENCY_TRACKING 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXTERNAL_BUILD 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INSTALL_BINS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INSTALL_LIBS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INSTALL_SRCS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DEBUG 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_GPROF 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_GCOV 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_RVCT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_GCC 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_MSVS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_PIC 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_BIG_ENDIAN 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_CODEC_SRCS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DEBUG_LIBS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_RUNTIME_CPU_DETECT 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_POSTPROC 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_MULTITHREAD 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTERNAL_STATS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_AV1_ENCODER 1 CACHE NUMBER "Enable AV1 encoder.")
-set(CONFIG_AV1_DECODER 1 CACHE NUMBER "Enable AV1 decoder.")
+
+# Deprecated flags preserved for compatibility with configure build.
+set(CONFIG_CODEC_SRCS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_DEBUG_LIBS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_DEPENDENCY_TRACKING 1 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_EXPERIMENTAL 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_EXTERNAL_BUILD 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_INSTALL_BINS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_INSTALL_DOCS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_INSTALL_LIBS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_INSTALL_SRCS 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_POSTPROC 1 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_POSTPROC_VISUALIZER 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_RVCT 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_SMALL 0 CACHE NUMBER "Deprecated flag.")
+set(CONFIG_STATIC_MSVCRT 0 CACHE NUMBER "Deprecated flag.")
+
+# Build configuration flags.
 set(CONFIG_AV1 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_STATIC_MSVCRT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SPATIAL_RESAMPLING 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_REALTIME_ONLY 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ONTHEFLY_BITPACKING 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ERROR_CONCEALMENT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SHARED 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_STATIC 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_SMALL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_POSTPROC_VISUALIZER 0 CACHE NUMBER "Internal flag.")
+set(CONFIG_AV1_DECODER 1 CACHE NUMBER "Enable AV1 decoder.")
+set(CONFIG_AV1_ENCODER 1 CACHE NUMBER "Enable AV1 encoder.")
+set(CONFIG_BIG_ENDIAN 0 CACHE NUMBER "Internal flag.")
+set(CONFIG_GCC 0 CACHE NUMBER "Building with GCC (detected).")
+set(CONFIG_GCOV 0 CACHE NUMBER "Enable gcov support.")
+set(CONFIG_GPROF 0 CACHE NUMBER "Enable gprof support.")
+set(CONFIG_LIBYUV 1 CACHE NUMBER "Enables libyuv scaling/conversion support.")
+set(CONFIG_MSVS 0 CACHE NUMBER "Building with MS Visual Studio (detected).")
+set(CONFIG_MULTITHREAD 1 CACHE NUMBER "Multithread support.")
 set(CONFIG_OS_SUPPORT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_UNIT_TESTS 1 CACHE NUMBER "Internal flag.")
+set(CONFIG_PIC 0 CACHE NUMBER "Build with PIC enabled.")
+set(CONFIG_RUNTIME_CPU_DETECT 1 CACHE NUMBER "Runtime CPU detection support.")
+set(CONFIG_SHARED 0 CACHE NUMBER "Build shared libs.")
+set(CONFIG_STATIC 1 CACHE NUMBER "Build static libs.")
 set(CONFIG_WEBM_IO 1 CACHE NUMBER "Enables WebM support.")
-set(CONFIG_LIBYUV 1 CACHE NUMBER "Enables libyuv scaling and conversion support.")
+
+# Debugging flags.
+set(CONFIG_BITSTREAM_DEBUG 0 CACHE NUMBER "Bitstream debugging flag.")
+set(CONFIG_DEBUG 0 CACHE NUMBER "Debug build flag.")
+
+# Testing flags.
+set(CONFIG_DECODE_PERF_TESTS 0 CACHE NUMBER "Enables decoder performance test.")
+set(CONFIG_ENCODE_PERF_TESTS 0 CACHE NUMBER "Enables encoder performance test.")
+set(CONFIG_UNIT_TESTS 1 CACHE NUMBER "Enables unit tests.")
+
+# AV1 feature flags.
 set(CONFIG_ACCOUNTING 0 CACHE NUMBER "Enables bit accounting.")
+set(CONFIG_ANALYZER 0 CACHE NUMBER "Enables bit stream analyzer.")
+set(CONFIG_COEFFICIENT_RANGE_CHECKING 0 CACHE NUMBER "Coefficient range check.")
+set(CONFIG_HIGHBITDEPTH 1 CACHE NUMBER "Enables high bit depth support.")
 set(CONFIG_INSPECTION 0 CACHE NUMBER "Enables bitstream inspection.")
-set(CONFIG_DECODE_PERF_TESTS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ENCODE_PERF_TESTS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_COEFFICIENT_RANGE_CHECKING 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_LOWBITDEPTH 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_HIGHBITDEPTH 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXPERIMENTAL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SIZE_LIMIT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_FP_MB_STATS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_CDEF 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_VAR_TX 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_RECT_TX 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_RECT_TX_EXT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_TPL_MV 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DUAL_FILTER 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_CONVOLVE_ROUND 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_COMPOUND_ROUND 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_TX 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_DPCM_INTRA 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_TX64X64 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_INTRA 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTRA_INTERP 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_FILTER_INTRA 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTRA_EDGE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTRABC 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_INTER 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTERINTRA 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_WEDGE 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_COMPOUND_SEGMENT 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_REFS 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_SPEED_REFS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_GLOBAL_MOTION 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_NEW_QUANT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SUPERTX 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ANS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_LOOP_RESTORATION 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_PARTITION 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_PARTITION_TYPES 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_UNPOISON_PARTITION_CTX 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_TILE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_MOTION_VAR 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_NCOBMC 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_WARPED_MOTION 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_Q_ADAPT_PROBS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_BITSTREAM_DEBUG 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ALT_INTRA 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_PALETTE 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_PALETTE_DELTA_ENCODING 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_RAWBITS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_EC_SMALLMUL 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_PVQ 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_CFL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_XIPHRC 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_CB4X4 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_CHROMA_2X2 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_CHROMA_SUB8X8 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_FRAME_SIZE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DELTA_Q 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_DELTA_Q 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_ADAPT_SCAN 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_FILTER_7BIT 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_PARALLEL_DEBLOCKING 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_LOOPFILTERING_ACROSS_TILES 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_TEMPMV_SIGNALING 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_RD_DEBUG 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_REFERENCE_BUFFER 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_COEF_INTERLEAVE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ENTROPY_STATS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_MASKED_TX 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DEPENDENT_HORZTILES 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DAALA_DIST 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_TRIPRED 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_PALETTE_THROUGHPUT 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_REF_ADAPT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_LV_MAP 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_TXK_SEL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_MV_COMPRESS 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_FRAME_SUPERRES 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_NEW_MULTISYMBOL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_COMPOUND_SINGLEREF 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_AOM_QM 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ONE_SIDED_COMPOUND 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_EXT_COMP_REFS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SMOOTH_HV 1 CACHE NUMBER "Internal flag.")
-set(CONFIG_VAR_REFS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_RECT_INTRA_PRED 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_LGT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_SBL_SYMBOL 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_NCOBMC_ADAPT_WEIGHT 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_BGSPRITE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_VAR_TX_NO_TX_MODE 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_ANALYZER 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DCT_ONLY 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DAALA_DCT8 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_GF_GROUPS 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_MRC_TX 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "Internal flag.")
-set(CONFIG_DIST_8X8 0 CACHE NUMBER "Internal flag.")
+set(CONFIG_INTERNAL_STATS 0 CACHE NUMBER "Codec stats.")
+set(CONFIG_LOWBITDEPTH 1 CACHE NUMBER "Enables low bit depth support.")
+set(CONFIG_REALTIME_ONLY 0 CACHE NUMBER "Support only realtime encodes.")
+set(CONFIG_SIZE_LIMIT 0 CACHE NUMBER "Limit max decode width/height.")
+set(CONFIG_SPATIAL_RESAMPLING 1 CACHE NUMBER "Spatial resampling.")
+set(CONFIG_SYMBOLRATE 0 CACHE NUMBER "Enables symbol rate accounting.")
+
+# AV1 experiment flags.
+set(CONFIG_ADAPT_SCAN 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_AMVR 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_ANS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_AOM_QM 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_BGSPRITE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CB4X4 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CDEF 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CDEF_SINGLEPASS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CFL 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CHROMA_2X2 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CHROMA_SUB8X8 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_COEF_INTERLEAVE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_COMPOUND_ROUND 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_COMPOUND_SEGMENT 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_COMPOUND_SINGLEREF 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CONVOLVE_ROUND 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_CTX1D 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_DCT16 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_DCT32 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_DCT64 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_DCT8 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DAALA_TX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DCT_ONLY 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DEBLOCK_13TAP 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DEPENDENT_HORZTILES 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DIST_8X8 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_DUAL_FILTER 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_ENTROPY_STATS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_COMP_REFS 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_DELTA_Q 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_INTRA 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_PARTITION 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_PARTITION_TYPES 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_PARTITION_TYPES_AB 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_REFS 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_SKIP 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_TILE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_EXT_TX 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FILTER_INTRA 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FP_MB_STATS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FRAME_MARKER 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FRAME_SIGN_BIAS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FRAME_SIZE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_FRAME_SUPERRES 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_GLOBAL_MOTION 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_HASH_ME 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_HORZONLY_FRAME_SUPERRES 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_INTERINTRA 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_INTRABC 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_INTRA_EDGE 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_INTRA_INTERP 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_JNT_COMP 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_KF_CTX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LGT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LGT_FROM_PRED 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LOOPFILTERING_ACROSS_TILES 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LOOPFILTER_LEVEL 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LOOP_RESTORATION 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LPF_DIRECT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LPF_SB 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LV_MAP 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MASKED_TX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MAX_TILE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MFMV 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MOTION_VAR 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MRC_TX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_MV_COMPRESS 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_NCOBMC 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_NCOBMC_ADAPT_WEIGHT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_NEW_MULTISYMBOL 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_NEW_QUANT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_NO_FRAME_CONTEXT_SIGNALING 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_OBU 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_ONE_SIDED_COMPOUND 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_OPT_REF_MV 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_PALETTE_DELTA_ENCODING 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_PALETTE_THROUGHPUT 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_PARALLEL_DEBLOCKING 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_PVQ 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_Q_ADAPT_PROBS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_RAWBITS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_RD_DEBUG 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_RECT_TX 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_RECT_TX_EXT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_REFERENCE_BUFFER 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_REF_ADAPT 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_RESTRICT_COMPRESSED_HDR 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_SBL_SYMBOL 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_SEGMENT_ZEROMV 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_SMOOTH_HV 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_STRIPED_LOOP_RESTORATION 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_SUPERTX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TEMPMV_SIGNALING 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TMV 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TPL_MV 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TX64X64 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TXK_SEL 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_TXMG 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_UNPOISON_PARTITION_CTX 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_VAR_REFS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_VAR_TX 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_VAR_TX_NO_TX_MODE 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_WARPED_MOTION 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_WEDGE 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_XIPHRC 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/third_party/aom/build/cmake/aom_configure.cmake b/third_party/aom/build/cmake/aom_configure.cmake
index 1c373e589..3553710d3 100644
--- a/third_party/aom/build/cmake/aom_configure.cmake
+++ b/third_party/aom/build/cmake/aom_configure.cmake
@@ -33,9 +33,11 @@ endforeach()
 string(STRIP "${AOM_CMAKE_CONFIG}" AOM_CMAKE_CONFIG)
 
 include("${AOM_ROOT}/build/cmake/aom_config_defaults.cmake")
+include("${AOM_ROOT}/build/cmake/aom_experiment_deps.cmake")
 include("${AOM_ROOT}/build/cmake/aom_optimization.cmake")
 include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
 include("${AOM_ROOT}/build/cmake/compiler_tests.cmake")
+include("${AOM_ROOT}/build/cmake/util.cmake")
 
 # Build a list of all configurable variables.
 get_cmake_property(cmake_cache_vars CACHE_VARIABLES)
@@ -45,6 +47,24 @@ foreach (var ${cmake_cache_vars})
   endif ()
 endforeach ()
 
+# Adopted experiments get enabled by default. For debugging, make it possible to
+# to turn them all off with a single option.
+if (NOT ENABLE_ADOPTED_EXPERIMENTS)
+  get_cmake_property(cmake_cache_vars CACHE_VARIABLES)
+  unset(var)
+  foreach (var ${cmake_cache_vars})
+    unset(var_helpstring)
+    get_property(var_helpstring CACHE ${var} PROPERTY HELPSTRING)
+    if ("${var_helpstring}" STREQUAL "AV1 experiment flag.")
+     if ("${var}" STREQUAL "CONFIG_CB4X4")
+       # CB4X4 is required and can not be disabled.
+     else ()
+       set(${var} 0)
+     endif ()
+    endif ()
+  endforeach ()
+endif ()
+
 # Detect target CPU.
 if (NOT AOM_TARGET_CPU)
   if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64" OR
@@ -83,17 +103,28 @@ string(STRIP "${AOM_CMAKE_CONFIG}" AOM_CMAKE_CONFIG)
 message("--- aom_configure: Detected CPU: ${AOM_TARGET_CPU}")
 set(AOM_TARGET_SYSTEM ${CMAKE_SYSTEM_NAME})
 
-if (BUILD_SHARED_LIBS)
-  set(CONFIG_PIC 1)
-  set(CONFIG_SHARED 1)
-  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+if ("${CMAKE_BUILD_TYPE}" MATCHES "Deb")
+  set(CONFIG_DEBUG 1)
+endif ()
 
-  if ("${AOM_TARGET_SYSTEM}" STREQUAL "Linux" AND
-      "${AOM_TARGET_CPU}" MATCHES "^armv7")
-    set(AOM_AS_FLAGS ${AOM_AS_FLAGS} --defsym PIC=1)
-  else ()
-    set(AOM_AS_FLAGS ${AOM_AS_FLAGS} -DPIC)
+if (NOT MSVC)
+  if (BUILD_SHARED_LIBS)
+    set(CONFIG_PIC 1)
+    set(CONFIG_SHARED 1)
+    set(CONFIG_STATIC 0)
+  endif ()
+
+  if (CONFIG_PIC)
+    set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+    if ("${AOM_TARGET_SYSTEM}" STREQUAL "Linux" AND
+        "${AOM_TARGET_CPU}" MATCHES "^armv7")
+      set(AOM_AS_FLAGS ${AOM_AS_FLAGS} --defsym PIC=1)
+    else ()
+      set(AOM_AS_FLAGS ${AOM_AS_FLAGS} -DPIC)
+    endif ()
   endif ()
+else ()
+  set(CONFIG_MSVS 1)
 endif ()
 
 if (NOT "${AOM_SUPPORTED_CPU_TARGETS}" MATCHES "${AOM_TARGET_CPU}")
@@ -112,8 +143,9 @@ if ("${AOM_TARGET_CPU}" STREQUAL "x86" OR "${AOM_TARGET_CPU}" STREQUAL "x86_64")
   endif ()
 
   if (NOT AS_EXECUTABLE)
-    message(FATAL_ERROR "Unable to find yasm. To build without optimizations, "
-            "add -DAOM_TARGET_CPU=generic to your cmake command line.")
+    message(FATAL_ERROR "Unable to find assembler. To build without "
+            "optimizations, add -DAOM_TARGET_CPU=generic to your cmake command "
+            "line.")
   endif ()
   get_asm_obj_format("objformat")
   set(AOM_AS_FLAGS -f ${objformat} ${AOM_AS_FLAGS})
@@ -134,26 +166,71 @@ elseif ("${AOM_TARGET_CPU}" MATCHES "arm")
   string(STRIP "${AOM_AS_FLAGS}" AOM_AS_FLAGS)
 endif ()
 
+if (CONFIG_ANALYZER)
+  find_package(wxWidgets REQUIRED adv base core)
+  include(${wxWidgets_USE_FILE})
+endif ()
+
+if (NOT MSVC AND CMAKE_C_COMPILER_ID MATCHES "GNU\|Clang")
+  set(CONFIG_GCC 1)
+endif ()
+
+if (CONFIG_GCOV)
+  message("--- Testing for CONFIG_GCOV support.")
+  require_linker_flag("-fprofile-arcs -ftest-coverage")
+  require_compiler_flag("-fprofile-arcs -ftest-coverage" YES)
+endif ()
+
+if (CONFIG_GPROF)
+  message("--- Testing for CONFIG_GPROF support.")
+  require_compiler_flag("-pg" YES)
+endif ()
+
+if ("${AOM_TARGET_SYSTEM}" MATCHES "Darwin\|Linux\|Windows")
+  set(CONFIG_OS_SUPPORT 1)
+endif ()
+
+################################################################################
+# Fix CONFIG_* dependencies. This must be done before including cpu.cmake to
+# ensure RTCD_CONFIG_* are properly set.
+fix_experiment_configs()
+
+# Test compiler support.
+aom_get_inline("INLINE")
+
+# TODO(tomfinegan): aom_ports_check is legacy; HAVE_AOM_PORTS is not used
+# anywhere in the aom sources. To be removed after parity with the legacy
+# build system stops being important.
+aom_check_source_compiles("aom_ports_check"
+                          "#include \"${AOM_ROOT}/aom/aom_integer.h\""
+                          HAVE_AOM_PORTS)
+aom_check_source_compiles("pthread_check" "#include <pthread.h>" HAVE_PTHREAD_H)
+aom_check_source_compiles("unistd_check" "#include <unistd.h>" HAVE_UNISTD_H)
+
+if (NOT MSVC)
+  aom_push_var(CMAKE_REQUIRED_LIBRARIES "m")
+  aom_check_c_compiles("fenv_check"
+                       "#define _GNU_SOURCE
+                        #include <fenv.h>
+                        void unused(void) {
+                          (void)unused;
+                          (void)feenableexcept(FE_DIVBYZERO | FE_INVALID);
+                        }" HAVE_FEXCEPT)
+  aom_pop_var(CMAKE_REQUIRED_LIBRARIES)
+endif()
+
 include("${AOM_ROOT}/build/cmake/cpu.cmake")
 
 if (ENABLE_CCACHE)
-  find_program(CCACHE "ccache")
-  if (NOT "${CCACHE}" STREQUAL "")
-    set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}")
-    set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}")
-  else ()
-    message("--- Cannot find ccache, ENABLE_CCACHE ignored.")
-  endif ()
+  set_compiler_launcher(ENABLE_CCACHE ccache)
 endif ()
 
 if (ENABLE_DISTCC)
-  find_program(DISTCC "distcc")
-  if (NOT "${DISTCC}" STREQUAL "")
-    set(CMAKE_C_COMPILER_LAUNCHER "${DISTCC}")
-    set(CMAKE_CXX_COMPILER_LAUNCHER "${DISTCC}")
-  else ()
-    message("--- Cannot find distcc, ENABLE_DISTCC ignored.")
-  endif ()
+  set_compiler_launcher(ENABLE_DISTCC distcc)
+endif ()
+
+if (ENABLE_GOMA)
+  set_compiler_launcher(ENABLE_GOMA gomacc)
 endif ()
 
 if (NOT CONFIG_AV1_DECODER AND NOT CONFIG_AV1_ENCODER)
@@ -175,8 +252,10 @@ else ()
   add_compiler_flag_if_supported("-Wextra")
   add_compiler_flag_if_supported("-Wfloat-conversion")
   add_compiler_flag_if_supported("-Wimplicit-function-declaration")
+  add_compiler_flag_if_supported("-Wlogical-op")
   add_compiler_flag_if_supported("-Wpointer-arith")
   add_compiler_flag_if_supported("-Wsign-compare")
+  add_compiler_flag_if_supported("-Wstack-usage=320000")
   add_compiler_flag_if_supported("-Wstring-conversion")
   add_compiler_flag_if_supported("-Wtype-limits")
   add_compiler_flag_if_supported("-Wuninitialized")
@@ -199,10 +278,6 @@ else ()
   # to the existing configure/make build system.
   add_compiler_flag_if_supported("-Wno-unused-function")
 
-  if (CMAKE_C_COMPILER_ID MATCHES "GNU\|Clang")
-    set(CONFIG_GCC 1)
-  endif ()
-
   if ("${CMAKE_BUILD_TYPE}" MATCHES "Rel")
     add_compiler_flag_if_supported("-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0")
   endif ()
@@ -210,86 +285,6 @@ else ()
   add_compiler_flag_if_supported("-D_FILE_OFFSET_BITS=64")
 endif ()
 
-if ("${AOM_TARGET_SYSTEM}" MATCHES "Darwin\|Linux\|Windows")
-  set(CONFIG_OS_SUPPORT 1)
-endif ()
-
-# Test compiler support.
-aom_get_inline("INLINE")
-
-# TODO(tomfinegan): aom_ports_check is legacy; HAVE_AOM_PORTS is not used
-# anywhere in the aom sources. To be removed after parity with the legacy
-# build system stops being important.
-aom_check_source_compiles("aom_ports_check"
-                          "#include \"${AOM_ROOT}/aom/aom_integer.h\""
-                          HAVE_AOM_PORTS)
-aom_check_source_compiles("pthread_check" "#include <pthread.h>" HAVE_PTHREAD_H)
-aom_check_source_compiles("unistd_check" "#include <unistd.h>" HAVE_UNISTD_H)
-
-if (CONFIG_ANALYZER)
-  find_package(wxWidgets REQUIRED adv base core)
-  include(${wxWidgets_USE_FILE})
-
-  if (NOT CONFIG_INSPECTION)
-    set(CONFIG_INSPECTION 1)
-    message(WARNING
-            "--- Enabled CONFIG_INSPECTION, required for CONFIG_ANALYZER.")
-  endif ()
-endif ()
-
-if (CONFIG_VAR_TX_NO_TX_MODE AND NOT CONFIG_VAR_TX)
-   message(WARNING
-     "--- CONFIG_VAR_TX_NO_TX_MODE requires CONFIG_VAR_TX, disabling.")
-   set(CONFIG_VAR_TX_NO_TX_MODE 0)
-endif()
-
-if (CONFIG_DAALA_DCT4)
-  if (NOT CONFIG_DCT_ONLY)
-     message(WARNING
-       "--- Enabled CONFIG_DCT_ONLY, needed for CONFIG_DAALA_DCT4.")
-     set(CONFIG_DCT_ONLY 1)
-   endif()
-endif()
-
-if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8)
-  if (HAVE_MMX)
-     message(WARNING
-       "--- Disabled HAVE_MMX, incompatible with CONFIG_DAALA_DCTx.")
-     set(HAVE_MMX 0)
-  endif()
-  if (CONFIG_RECT_TX)
-     message(WARNING
-       "--- Disabled CONFIG_RECT_TX, incompatible with  CONFIG_DAALA_DCTx.")
-     set(CONFIG_RECT_TX 0)
-  endif()
-  if (CONFIG_VAR_TX)
-     message(WARNING
-       "--- Disabled CONFIG_VAR_TX, incompatible with CONFIG_DAALA_DCTx.")
-     set(CONFIG_VAR_TX 0)
-  endif()
-  if (CONFIG_LGT)
-     message(WARNING
-       "--- Disabled CONFIG_LGT, incompatible with CONFIG_DAALA_DCTx.")
-     set(CONFIG_LGT 0)
-   endif()
-  if (NOT CONFIG_LOWBITDEPTH)
-     message(WARNING
-       "--- Enabled CONFIG_LOWBITDEPTH, needed for CONFIG_DAALA_DCTx.")
-     set(CONFIG_LOWBITDEPTH 1)
-   endif()
-endif()
-
-if (NOT MSVC)
-  aom_push_var(CMAKE_REQUIRED_LIBRARIES "m")
-  aom_check_c_compiles("fenv_check"
-                       "#define _GNU_SOURCE
-                        #include <fenv.h>
-                        void unused(void) {
-                          (void)feenableexcept(FE_DIVBYZERO | FE_INVALID);
-                        }" HAVE_FEXCEPT)
-  aom_pop_var(CMAKE_REQUIRED_LIBRARIES)
-endif()
-
 set(AOM_LIB_LINK_TYPE PUBLIC)
 if (EMSCRIPTEN)
   # Avoid CMake generation time errors resulting from collisions with the form
@@ -311,19 +306,8 @@ configure_file("${aom_config_h_template}" "${AOM_CONFIG_DIR}/aom_config.h")
 
 # Read the current git hash.
 find_package(Git)
-set(AOM_GIT_DESCRIPTION)
-set(AOM_GIT_HASH)
-if (GIT_FOUND)
-  # TODO(tomfinegan): Add build rule so users don't have to re-run cmake to
-  # create accurately versioned cmake builds.
-  execute_process(COMMAND ${GIT_EXECUTABLE}
-                  --git-dir=${AOM_ROOT}/.git rev-parse HEAD
-                  OUTPUT_VARIABLE AOM_GIT_HASH)
-  execute_process(COMMAND ${GIT_EXECUTABLE} --git-dir=${AOM_ROOT}/.git describe
-                  OUTPUT_VARIABLE AOM_GIT_DESCRIPTION ERROR_QUIET)
-  # Consume the newline at the end of the git output.
-  string(STRIP "${AOM_GIT_HASH}" AOM_GIT_HASH)
-  string(STRIP "${AOM_GIT_DESCRIPTION}" AOM_GIT_DESCRIPTION)
+if (NOT GIT_FOUND)
+  message("--- Git missing, version will be read from CHANGELOG.")
 endif ()
 
 configure_file("${AOM_ROOT}/build/cmake/aom_config.c.cmake"
@@ -367,64 +351,26 @@ foreach(NUM RANGE ${AOM_RTCD_CUSTOM_COMMAND_COUNT})
     OUTPUT_FILE ${AOM_RTCD_HEADER_FILE})
 endforeach()
 
-function (add_rtcd_build_step config output source symbol)
-  add_custom_command(
-    OUTPUT ${output}
-    COMMAND ${PERL_EXECUTABLE}
-    ARGS "${AOM_ROOT}/build/make/rtcd.pl"
-      --arch=${AOM_TARGET_CPU}
-      --sym=${symbol}
-      ${AOM_RTCD_FLAGS}
-      --config=${AOM_CONFIG_DIR}/${AOM_TARGET_CPU}_rtcd_config.rtcd
-      ${config}
-      > ${output}
-    DEPENDS ${config}
-    COMMENT "Generating ${output}"
-    WORKING_DIRECTORY ${AOM_CONFIG_DIR}
-    VERBATIM)
-  set_property(SOURCE ${source} PROPERTY OBJECT_DEPENDS ${output})
-  set_property(SOURCE ${output} PROPERTY GENERATED)
-endfunction ()
-
 # Generate aom_version.h.
-if ("${AOM_GIT_DESCRIPTION}" STREQUAL "")
-  set(AOM_GIT_DESCRIPTION "${AOM_ROOT}/CHANGELOG")
-endif ()
 execute_process(
-  COMMAND ${PERL_EXECUTABLE} "${AOM_ROOT}/build/cmake/aom_version.pl"
-  --version_data=${AOM_GIT_DESCRIPTION}
-  --version_filename=${AOM_CONFIG_DIR}/aom_version.h)
+  COMMAND ${CMAKE_COMMAND}
+  -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
+  -DAOM_ROOT=${AOM_ROOT}
+  -DGIT_EXECUTABLE=${GIT_EXECUTABLE}
+  -DPERL_EXECUTABLE=${PERL_EXECUTABLE}
+  -P "${AOM_ROOT}/build/cmake/version.cmake")
 
-# Generate aom.pc (pkg-config file).
 if (NOT MSVC)
-  # Extract the version string from aom_version.h
-  file(STRINGS "${AOM_CONFIG_DIR}/aom_version.h" aom_version
-       REGEX "VERSION_STRING_NOSP")
-  string(REPLACE "#define VERSION_STRING_NOSP \"v" "" aom_version
-         "${aom_version}")
-  string(REPLACE "\"" "" aom_version "${aom_version}")
-
-  # Write pkg-config info.
-  set(prefix "${CMAKE_INSTALL_PREFIX}")
-  set(pkgconfig_file "${AOM_CONFIG_DIR}/aom.pc")
-  string(TOLOWER ${CMAKE_PROJECT_NAME} pkg_name)
-  file(WRITE "${pkgconfig_file}" "# libaom pkg-config.\n")
-  file(APPEND "${pkgconfig_file}" "prefix=${prefix}\n")
-  file(APPEND "${pkgconfig_file}" "exec_prefix=${prefix}/bin\n")
-  file(APPEND "${pkgconfig_file}" "libdir=${prefix}/lib\n")
-  file(APPEND "${pkgconfig_file}" "includedir=${prefix}/include\n\n")
-  file(APPEND "${pkgconfig_file}" "Name: ${pkg_name}\n")
-  file(APPEND "${pkgconfig_file}" "Description: AV1 codec library.\n")
-  file(APPEND "${pkgconfig_file}" "Version: ${aom_version}\n")
-  file(APPEND "${pkgconfig_file}" "Requires:\n")
-  file(APPEND "${pkgconfig_file}" "Conflicts:\n")
-  file(APPEND "${pkgconfig_file}" "Libs: -L${prefix}/lib -l${pkg_name} -lm\n")
-  if (CONFIG_MULTITHREAD AND HAVE_PTHREAD_H)
-    file(APPEND "${pkgconfig_file}" "Libs.private: -lm -lpthread\n")
-  else ()
-    file(APPEND "${pkgconfig_file}" "Libs.private: -lm\n")
-  endif ()
-  file(APPEND "${pkgconfig_file}" "Cflags: -I${prefix}/include\n")
+  # Generate aom.pc (pkg-config file).
+  execute_process(
+    COMMAND ${CMAKE_COMMAND}
+    -DAOM_CONFIG_DIR=${AOM_CONFIG_DIR}
+    -DAOM_ROOT=${AOM_ROOT}
+    -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+    -DCMAKE_PROJECT_NAME=${CMAKE_PROJECT_NAME}
+    -DCONFIG_MULTITHREAD=${CONFIG_MULTITHREAD}
+    -DHAVE_PTHREAD_H=${HAVE_PTHREAD_H}
+    -P "${AOM_ROOT}/build/cmake/pkg_config.cmake")
 endif ()
 
 endif ()  # AOM_BUILD_CMAKE_AOM_CONFIGURE_CMAKE_
diff --git a/third_party/aom/build/cmake/aom_experiment_deps.cmake b/third_party/aom/build/cmake/aom_experiment_deps.cmake
new file mode 100644
index 000000000..938af2386
--- /dev/null
+++ b/third_party/aom/build/cmake/aom_experiment_deps.cmake
@@ -0,0 +1,139 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_)
+set(AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_ 1)
+
+# Adjusts CONFIG_* CMake variables to address conflicts between active AV1
+# experiments.
+macro (fix_experiment_configs)
+  if (CONFIG_ANALYZER)
+    if (NOT CONFIG_INSPECTION)
+      change_config_and_warn(CONFIG_INSPECTION 1 CONFIG_ANALYZER)
+    endif ()
+  endif ()
+
+  if (CONFIG_VAR_TX_NO_TX_MODE AND NOT CONFIG_VAR_TX)
+     change_config_and_warn(CONFIG_VAR_TX 1 CONFIG_VAR_TX_NO_TX_MODE)
+  endif ()
+
+  if (CONFIG_CHROMA_2X2)
+    change_config_and_warn(CONFIG_CHROMA_SUB8X8 0 CONFIG_CHROMA_2X2)
+  endif ()
+
+  if (CONFIG_DAALA_TX)
+     set(CONFIG_DAALA_DCT4 1)
+     set(CONFIG_DAALA_DCT8 1)
+     set(CONFIG_DAALA_DCT16 1)
+     set(CONFIG_DAALA_DCT32 1)
+     set(CONFIG_DAALA_DCT64 1)
+  endif ()
+
+  if (CONFIG_DAALA_DCT64)
+    if (NOT CONFIG_TX64X64)
+      set(CONFIG_DAALA_DCT64 0)
+      message("--- DAALA_DCT64 requires TX64X64: disabled DAALA_DCT64")
+    endif ()
+  endif ()
+
+  if (CONFIG_DAALA_DCT4 OR CONFIG_DAALA_DCT8 OR CONFIG_DAALA_DCT16 OR
+      CONFIG_DAALA_DCT32 OR CONFIG_DAALA_DCT64)
+    if (CONFIG_LGT)
+      change_config_and_warn(CONFIG_LGT 0 CONFIG_DAALA_DCTx)
+    endif ()
+    if (NOT CONFIG_LOWBITDEPTH)
+      change_config_and_warn(CONFIG_LOWBITDEPTH 1 CONFIG_DAALA_DCTx)
+    endif ()
+  endif ()
+
+  if (CONFIG_TXK_SEL)
+    if (NOT CONFIG_LV_MAP)
+      change_config_and_warn(CONFIG_LV_MAP 1 CONFIG_TXK_SEL)
+    endif ()
+  endif ()
+
+  if (CONFIG_CTX1D)
+    if (NOT CONFIG_LV_MAP)
+      change_config_and_warn(CONFIG_LV_MAP 1 CONFIG_CTX1D)
+    endif ()
+    if (NOT CONFIG_EXT_TX)
+      change_config_and_warn(CONFIG_EXT_TX 1 CONFIG_CTX1D)
+    endif ()
+  endif ()
+
+  if (CONFIG_EXT_COMP_REFS)
+    if (NOT CONFIG_EXT_REFS)
+      change_config_and_warn(CONFIG_EXT_REFS 1 CONFIG_EXT_COMP_REFS)
+    endif ()
+  endif ()
+
+  if (CONFIG_STRIPED_LOOP_RESTORATION)
+    if (NOT CONFIG_LOOP_RESTORATION)
+      change_config_and_warn(CONFIG_LOOP_RESTORATION 1
+                             CONFIG_STRIPED_LOOP_RESTORATION)
+    endif ()
+  endif ()
+
+  if (CONFIG_MFMV)
+    if (NOT CONFIG_FRAME_MARKER)
+      change_config_and_warn(CONFIG_FRAME_MARKER 1 CONFIG_MFMV)
+    endif ()
+  endif ()
+
+  if (CONFIG_NEW_MULTISYMBOL)
+    if (NOT CONFIG_RESTRICT_COMPRESSED_HDR)
+      change_config_and_warn(CONFIG_RESTRICT_COMPRESSED_HDR 1
+                             CONFIG_NEW_MULTISYMBOL)
+    endif ()
+  endif ()
+
+  if (CONFIG_EXT_PARTITION_TYPES)
+    if (CONFIG_SUPERTX)
+      change_config_and_warn(CONFIG_SUPERTX 0
+                             CONFIG_EXT_PARTITION_TYPES)
+    endif ()
+  endif ()
+
+  if (CONFIG_JNT_COMP)
+    if (NOT CONFIG_FRAME_MARKER)
+      change_config_and_warn(CONFIG_FRAME_MARKER 1 CONFIG_JNT_COMP)
+    endif ()
+  endif ()
+
+  if (CONFIG_AMVR)
+    change_config_and_warn(CONFIG_HASH_ME 1 CONFIG_AMVR)
+  endif ()
+
+  if (CONFIG_PVQ)
+    if (CONFIG_EXT_TX)
+      change_config_and_warn(CONFIG_EXT_TX 0 CONFIG_PVQ)
+    endif ()
+    if (CONFIG_HIGHBITDEPTH)
+      change_config_and_warn(CONFIG_HIGHBITDEPTH 0 CONFIG_PVQ)
+    endif ()
+    if (CONFIG_PALETTE_THROUGHPUT)
+      change_config_and_warn(CONFIG_PALETTE_THROUGHPUT 0 CONFIG_PVQ)
+    endif ()
+    if (CONFIG_RECT_TX)
+      change_config_and_warn(CONFIG_RECT_TX 0 CONFIG_PVQ)
+    endif ()
+    if (CONFIG_VAR_TX)
+      change_config_and_warn(CONFIG_VAR_TX 0 CONFIG_PVQ)
+    endif ()
+  endif ()
+
+  if (CONFIG_HORZONLY_FRAME_SUPERRES)
+    if (NOT CONFIG_FRAME_SUPERRES)
+      change_config_and_warn(CONFIG_FRAME_SUPERRES 1 CONFIG_HORZONLY_FRAME_SUPERRES)
+    endif ()
+  endif ()
+endmacro ()
+
+endif ()  # AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_
diff --git a/third_party/aom/build/cmake/aom_optimization.cmake b/third_party/aom/build/cmake/aom_optimization.cmake
index 456798ceb..c58c3993e 100644
--- a/third_party/aom/build/cmake/aom_optimization.cmake
+++ b/third_party/aom/build/cmake/aom_optimization.cmake
@@ -238,4 +238,27 @@ function (test_nasm)
   endif ()
 endfunction ()
 
+# Adds build command for generation of rtcd C source files using
+# build/make/rtcd.pl. $config is the input perl file, $output is the output C
+# include file, $source is the C source file, and $symbol is used for the symbol
+# argument passed to rtcd.pl.
+function (add_rtcd_build_step config output source symbol)
+  add_custom_command(
+    OUTPUT ${output}
+    COMMAND ${PERL_EXECUTABLE}
+    ARGS "${AOM_ROOT}/build/make/rtcd.pl"
+      --arch=${AOM_TARGET_CPU}
+      --sym=${symbol}
+      ${AOM_RTCD_FLAGS}
+      --config=${AOM_CONFIG_DIR}/${AOM_TARGET_CPU}_rtcd_config.rtcd
+      ${config}
+      > ${output}
+    DEPENDS ${config}
+    COMMENT "Generating ${output}"
+    WORKING_DIRECTORY ${AOM_CONFIG_DIR}
+    VERBATIM)
+  set_property(SOURCE ${source} PROPERTY OBJECT_DEPENDS ${output})
+  set_property(SOURCE ${output} PROPERTY GENERATED)
+endfunction ()
+
 endif ()  # AOM_BUILD_CMAKE_AOM_OPTIMIZATION_CMAKE_
diff --git a/third_party/aom/build/cmake/compiler_flags.cmake b/third_party/aom/build/cmake/compiler_flags.cmake
index ee6d12bd3..aa34b83ae 100644
--- a/third_party/aom/build/cmake/compiler_flags.cmake
+++ b/third_party/aom/build/cmake/compiler_flags.cmake
@@ -13,53 +13,93 @@ set(AOM_BUILD_CMAKE_COMPILER_FLAGS_CMAKE_ 1)
 
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
+include("${AOM_ROOT}/build/cmake/compiler_tests.cmake")
 
-# Strings used to cache failed C/CXX flags.
+# Strings used to cache flags.
+set(AOM_C_FLAGS)
+set(AOM_CXX_FLAGS)
+set(AOM_EXE_LINKER_FLAGS)
 set(AOM_FAILED_C_FLAGS)
 set(AOM_FAILED_CXX_FLAGS)
 
-# Checks C compiler for support of $c_flag. Adds $c_flag to $CMAKE_C_FLAGS when
-# the compile test passes. Caches $c_flag in $AOM_FAILED_C_FLAGS when the test
-# fails.
+# Sets variable named by $out_is_present to YES in the caller's scope when $flag
+# is found in the string variable named by $flag_cache. Sets the var to NO
+# otherwise.
+function(is_flag_present flag_cache flag out_is_present)
+  string(FIND "${${flag_cache}}" "${flag}" flag_pos)
+  if (${flag_pos} EQUAL -1)
+    set(${out_is_present} NO PARENT_SCOPE)
+  else ()
+    set(${out_is_present} YES PARENT_SCOPE)
+  endif ()
+endfunction ()
+
+# Appends $flag to $flags. Ignores scope via use of FORCE with set() call.
+function (append_flag flags flag)
+  string(FIND "${${flags}}" "${flag}" found)
+  if (${found} EQUAL -1)
+    set(${flags} "${${flags}} ${flag}" CACHE STRING "" FORCE)
+  endif ()
+endfunction ()
+
+# Checks C compiler for support of $c_flag. Adds $c_flag to all
+# $CMAKE_C_FLAGS_<CONFIG>s stored in AOM_C_CONFIGS when the compile test passes.
+# Caches $c_flag in $AOM_C_FLAGS or $AOM_FAILED_C_FLAGS depending on test
+# outcome.
 function (add_c_flag_if_supported c_flag)
-  unset(C_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_C_FLAGS}" "${c_flag}" C_FLAG_FOUND)
-  unset(C_FLAG_FAILED CACHE)
-  string(FIND "${AOM_FAILED_C_FLAGS}" "${c_flag}" C_FLAG_FAILED)
-
-  if (${C_FLAG_FOUND} EQUAL -1 AND ${C_FLAG_FAILED} EQUAL -1)
-    unset(C_FLAG_SUPPORTED CACHE)
-    message("Checking C compiler flag support for: " ${c_flag})
-    check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
-    if (C_FLAG_SUPPORTED)
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${c_flag}" CACHE STRING "" FORCE)
-    else ()
-      set(AOM_FAILED_C_FLAGS "${AOM_FAILED_C_FLAGS} ${c_flag}" CACHE STRING ""
-          FORCE)
-    endif ()
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
+  is_flag_present(AOM_C_FLAGS "${c_flag}" flag_ok)
+  is_flag_present(AOM_FAILED_C_FLAGS "${c_flag}" flag_failed)
+  if (${flag_ok} OR ${flag_failed})
+    return ()
+  endif ()
+
+  unset(C_FLAG_SUPPORTED CACHE)
+  message("Checking C compiler flag support for: " ${c_flag})
+  check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
+
+  if (${C_FLAG_SUPPORTED})
+    append_flag(AOM_C_FLAGS "${c_flag}")
+    foreach (config ${AOM_C_CONFIGS})
+      unset(C_FLAG_FOUND)
+      append_flag("${config}" "${c_flag}")
+    endforeach ()
+  else ()
+    append_flag(AOM_FAILED_C_FLAGS "${c_flag}")
   endif ()
 endfunction ()
 
-# Checks C++ compiler for support of $cxx_flag. Adds $cxx_flag to
-# $CMAKE_CXX_FLAGS when the compile test passes. Caches $c_flag in
-# $AOM_FAILED_CXX_FLAGS when the test fails.
+# Checks C++ compiler for support of $cxx_flag. Adds $cxx_flag to all
+# $CMAKE_CXX_FLAGS_<CONFIG>s stored in AOM_CXX_CONFIGS when the compile test
+# passes.
+# Caches $cxx_flag in $AOM_CXX_FLAGS or $AOM_FAILED_CXX_FLAGS depending on test
+# outcome.
 function (add_cxx_flag_if_supported cxx_flag)
-  unset(CXX_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_CXX_FLAGS}" "${cxx_flag}" CXX_FLAG_FOUND)
-  unset(CXX_FLAG_FAILED CACHE)
-  string(FIND "${AOM_FAILED_CXX_FLAGS}" "${cxx_flag}" CXX_FLAG_FAILED)
-
-  if (${CXX_FLAG_FOUND} EQUAL -1 AND ${CXX_FLAG_FAILED} EQUAL -1)
-    unset(CXX_FLAG_SUPPORTED CACHE)
-    message("Checking CXX compiler flag support for: " ${cxx_flag})
-    check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
-    if (CXX_FLAG_SUPPORTED)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${cxx_flag}" CACHE STRING ""
-          FORCE)
-    else()
-      set(AOM_FAILED_CXX_FLAGS "${AOM_FAILED_CXX_FLAGS} ${cxx_flag}" CACHE
-          STRING "" FORCE)
-    endif ()
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
+  is_flag_present(AOM_CXX_FLAGS "${cxx_flag}" flag_ok)
+  is_flag_present(AOM_FAILED_CXX_FLAGS "${cxx_flag}" flag_failed)
+  if (${flag_ok} OR ${flag_failed})
+    return ()
+  endif ()
+
+  unset(CXX_FLAG_SUPPORTED CACHE)
+  message("Checking C++ compiler flag support for: " ${cxx_flag})
+  check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
+
+  if (${CXX_FLAG_SUPPORTED})
+    append_flag(AOM_CXX_FLAGS "${cxx_flag}")
+    foreach (config ${AOM_CXX_CONFIGS})
+      unset(CXX_FLAG_FOUND)
+      append_flag("${config}" "${cxx_flag}")
+    endforeach ()
+  else ()
+    append_flag(AOM_FAILED_CXX_FLAGS "${cxx_flag}")
   endif ()
 endfunction ()
 
@@ -73,47 +113,78 @@ endfunction ()
 # Checks C compiler for support of $c_flag and terminates generation when
 # support is not present.
 function (require_c_flag c_flag update_c_flags)
-  unset(C_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_C_FLAGS}" "${c_flag}" C_FLAG_FOUND)
-
-  if (${C_FLAG_FOUND} EQUAL -1)
-    unset(HAVE_C_FLAG CACHE)
-    message("Checking C compiler flag support for: " ${c_flag})
-    check_c_compiler_flag("${c_flag}" HAVE_C_FLAG)
-    if (NOT HAVE_C_FLAG)
-      message(FATAL_ERROR
-              "${PROJECT_NAME} requires support for C flag: ${c_flag}.")
-    endif ()
-    if (update_c_flags)
-      set(CMAKE_C_FLAGS "${c_flag} ${CMAKE_C_FLAGS}" CACHE STRING "" FORCE)
-    endif ()
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
+  is_flag_present(AOM_C_FLAGS "${c_flag}" flag_ok)
+  if (${flag_ok})
+    return ()
+  endif ()
+
+  if (NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
+    aom_push_var(CMAKE_EXE_LINKER_FLAGS "${AOM_EXE_LINKER_FLAGS}")
+  endif ()
+
+  unset(HAVE_C_FLAG CACHE)
+  message("Checking C compiler flag support for: " ${c_flag})
+  check_c_compiler_flag("${c_flag}" HAVE_C_FLAG)
+  if (NOT HAVE_C_FLAG)
+    message(FATAL_ERROR
+            "${PROJECT_NAME} requires support for C flag: ${c_flag}.")
+  endif ()
+
+  if (NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
+    aom_pop_var(CMAKE_EXE_LINKER_FLAGS)
+  endif ()
+
+  append_flag(AOM_C_FLAGS "${c_flag}")
+  if (update_c_flags)
+    foreach (config ${AOM_C_CONFIGS})
+      set(${config} "${${config}} ${c_flag}" CACHE STRING "" FORCE)
+    endforeach ()
   endif ()
 endfunction ()
 
 # Checks CXX compiler for support of $cxx_flag and terminates generation when
 # support is not present.
 function (require_cxx_flag cxx_flag update_cxx_flags)
-  unset(CXX_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_CXX_FLAGS}" "${cxx_flag}" CXX_FLAG_FOUND)
-
-  if (${CXX_FLAG_FOUND} EQUAL -1)
-    unset(HAVE_CXX_FLAG CACHE)
-    message("Checking CXX compiler flag support for: " ${cxx_flag})
-    check_cxx_compiler_flag("${cxx_flag}" HAVE_CXX_FLAG)
-    if (NOT HAVE_CXX_FLAG)
-      message(FATAL_ERROR
-              "${PROJECT_NAME} requires support for CXX flag: ${cxx_flag}.")
-    endif ()
-    if (update_cxx_flags)
-      set(CMAKE_CXX_FLAGS "${cxx_flag} ${CMAKE_CXX_FLAGS}" CACHE STRING ""
-          FORCE)
-    endif ()
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
+  is_flag_present(AOM_CXX_FLAGS "${cxx_flag}" flag_ok)
+  if (${flag_ok})
+    return ()
+  endif ()
+
+  if (NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
+    aom_push_var(CMAKE_EXE_LINKER_FLAGS "${AOM_EXE_LINKER_FLAGS}")
+  endif ()
+
+  unset(HAVE_CXX_FLAG CACHE)
+  message("Checking C compiler flag support for: " ${cxx_flag})
+  check_cxx_compiler_flag("${cxx_flag}" HAVE_CXX_FLAG)
+  if (NOT HAVE_CXX_FLAG)
+    message(FATAL_ERROR
+            "${PROJECT_NAME} requires support for C flag: ${cxx_flag}.")
+  endif ()
+
+  if (NOT "${AOM_EXE_LINKER_FLAGS}" STREQUAL "")
+    aom_pop_var(CMAKE_EXE_LINKER_FLAGS)
+  endif ()
+
+  append_flag(AOM_CXX_FLAGS "${cxx_flag}")
+  if (update_cxx_flags)
+    foreach (config ${AOM_CXX_CONFIGS})
+      set(${config} "${${config}} ${cxx_flag}" CACHE STRING "" FORCE)
+    endforeach ()
   endif ()
 endfunction ()
 
 # Checks for support of $flag by both the C and CXX compilers. Terminates
 # generation when support is not present in both compilers.
-function (require_flag flag update_cmake_flags)
+function (require_compiler_flag flag update_cmake_flags)
   require_c_flag(${flag} ${update_cmake_flags})
   require_cxx_flag(${flag} ${update_cmake_flags})
 endfunction ()
@@ -137,7 +208,7 @@ endfunction ()
 # Checks only non-MSVC targets for support of $flag by both the C and CXX
 # compilers. Terminates generation when support is not present in both
 # compilers.
-function (require_flag_nomsvc flag update_cmake_flags)
+function (require_compiler_flag_nomsvc flag update_cmake_flags)
   require_c_flag_nomsvc(${flag} ${update_cmake_flags})
   require_cxx_flag_nomsvc(${flag} ${update_cmake_flags})
 endfunction ()
@@ -145,25 +216,29 @@ endfunction ()
 # Adds $preproc_def to C compiler command line (as -D$preproc_def) if not
 # already present.
 function (add_c_preproc_definition preproc_def)
-  unset(PREPROC_DEF_FOUND CACHE)
-  string(FIND "${CMAKE_C_FLAGS}" "${preproc_def}" PREPROC_DEF_FOUND)
-
-  if (${PREPROC_DEF_FOUND} EQUAL -1)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D${preproc_def}" CACHE STRING ""
-        FORCE)
+  set(preproc_def "-D${preproc_def}")
+  is_flag_present(AOM_C_FLAGS "${preproc_def}" flag_cached)
+  if (${flag_cached})
+    return ()
   endif ()
+
+  foreach (config ${AOM_C_CONFIGS})
+    set(${config} "${${config}} ${preproc_def}" CACHE STRING "" FORCE)
+  endforeach ()
 endfunction ()
 
 # Adds $preproc_def to CXX compiler command line (as -D$preproc_def) if not
 # already present.
 function (add_cxx_preproc_definition preproc_def)
-  unset(PREPROC_DEF_FOUND CACHE)
-  string(FIND "${CMAKE_CXX_FLAGS}" "${preproc_def}" PREPROC_DEF_FOUND)
-
-  if (${PREPROC_DEF_FOUND} EQUAL -1)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D${preproc_def}" CACHE STRING ""
-        FORCE)
+  set(preproc_def "-D${preproc_def}")
+  is_flag_present(AOM_CXX_FLAGS "${preproc_def}" flag_cached)
+  if (${flag_cached})
+    return ()
   endif ()
+
+  foreach (config ${AOM_CXX_CONFIGS})
+    set(${config} "${${config}} ${preproc_def}" CACHE STRING "" FORCE)
+  endforeach ()
 endfunction ()
 
 # Adds $preproc_def to C and CXX compiler command line (as -D$preproc_def) if
@@ -175,32 +250,35 @@ endfunction ()
 
 # Adds $flag to assembler command line.
 function (append_as_flag flag)
-  unset(AS_FLAG_FOUND CACHE)
-  string(FIND "${AOM_AS_FLAGS}" "${flag}" AS_FLAG_FOUND)
-
-  if (${AS_FLAG_FOUND} EQUAL -1)
-    set(AOM_AS_FLAGS "${AOM_AS_FLAGS} ${flag}" CACHE STRING "" FORCE)
+  is_flag_present(AOM_AS_FLAGS "${flag}" flag_cached)
+  if (${flag_cached})
+    return ()
   endif ()
+  append_flag(AOM_AS_FLAGS "${flag}")
 endfunction ()
 
 # Adds $flag to the C compiler command line.
 function (append_c_flag flag)
-  unset(C_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_C_FLAGS}" "${flag}" C_FLAG_FOUND)
-
-  if (${C_FLAG_FOUND} EQUAL -1)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" CACHE STRING "" FORCE)
+  is_flag_present(AOM_C_FLAGS "${flag}" flag_cached)
+  if (${flag_cached})
+    return ()
   endif ()
+
+  foreach (config ${AOM_C_CONFIGS})
+    append_flag(${config} "${flag}")
+  endforeach ()
 endfunction ()
 
 # Adds $flag to the CXX compiler command line.
 function (append_cxx_flag flag)
-  unset(CXX_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_CXX_FLAGS}" "${flag}" CXX_FLAG_FOUND)
-
-  if (${CXX_FLAG_FOUND} EQUAL -1)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" CACHE STRING "" FORCE)
+  is_flag_present(AOM_CXX_FLAGS "${flag}" flag_cached)
+  if (${flag_cached})
+    return ()
   endif ()
+
+  foreach (config ${AOM_CXX_CONFIGS})
+    append_flag(${config} "${flag}")
+  endforeach ()
 endfunction ()
 
 # Adds $flag to the C and CXX compiler command lines.
@@ -209,30 +287,29 @@ function (append_compiler_flag flag)
   append_cxx_flag(${flag})
 endfunction ()
 
-# Adds $flag to the executable linker command line.
+# Adds $flag to the executable linker command line when not present.
 function (append_exe_linker_flag flag)
-  unset(LINKER_FLAG_FOUND CACHE)
-  string(FIND "${CMAKE_EXE_LINKER_FLAGS}" "${flag}" LINKER_FLAG_FOUND)
-
-  if (${LINKER_FLAG_FOUND} EQUAL -1)
-    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}" CACHE STRING
-        "" FORCE)
+  is_flag_present(AOM_EXE_LINKER_FLAGS "${flag}" flag_cached)
+  if (${flag_cached})
+    return()
   endif ()
+
+  append_flag(AOM_EXE_LINKER_FLAGS "${flag}")
+  foreach (config ${AOM_EXE_LINKER_CONFIGS})
+    append_flag(${config} "${flag}")
+  endforeach ()
 endfunction ()
 
 # Adds $flag to the link flags for $target.
-function (append_link_flag_to_target target flags)
+function (append_link_flag_to_target target flag)
   unset(target_link_flags)
   get_target_property(target_link_flags ${target} LINK_FLAGS)
 
   if (target_link_flags)
-    unset(link_flag_found)
-    string(FIND "${target_link_flags}" "${flags}" link_flag_found)
-
-    if (NOT ${link_flag_found} EQUAL -1)
+    is_flag_present(target_link_flags "${flag}" flag_found)
+    if (${flag_found})
       return()
     endif ()
-
     set(target_link_flags "${target_link_flags} ${flags}")
   else ()
     set(target_link_flags "${flags}")
@@ -241,4 +318,55 @@ function (append_link_flag_to_target target flags)
   set_target_properties(${target} PROPERTIES LINK_FLAGS ${target_link_flags})
 endfunction ()
 
+# Adds $flag to executable linker flags, and makes sure C/CXX builds still work.
+function (require_linker_flag flag)
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
+  append_exe_linker_flag(${flag})
+
+  unset(c_passed)
+  aom_check_c_compiles("LINKER_FLAG_C_TEST(${flag})" "" c_passed)
+  unset(cxx_passed)
+  aom_check_cxx_compiles("LINKER_FLAG_CXX_TEST(${flag})" "" cxx_passed)
+
+  if (NOT c_passed OR NOT cxx_passed)
+    message(FATAL_ERROR "Linker flag test for ${flag} failed.")
+  endif ()
+endfunction ()
+
+# Appends flags in $AOM_EXTRA_<TYPE>_FLAGS variables to the flags used at build
+# time.
+function (set_user_flags)
+  # Linker flags are handled first because some C/CXX flags require that a
+  # linker flag is present at link time.
+  if (AOM_EXTRA_EXE_LINKER_FLAGS)
+    is_flag_present(AOM_EXE_LINKER_FLAGS "${AOM_EXTRA_EXE_LINKER_FLAGS}"
+                    extra_present)
+    if (NOT ${extra_present})
+      require_linker_flag("${AOM_EXTRA_EXE_LINKER_FLAGS}")
+    endif ()
+  endif ()
+  if (AOM_EXTRA_AS_FLAGS)
+    # TODO(tomfinegan): assembler flag testing would be a good thing to have.
+    is_flag_present(AOM_AS_FLAGS "${AOM_EXTRA_AS_FLAGS}" extra_present)
+    if (NOT ${extra_present})
+      append_flag(AOM_AS_FLAGS "${AOM_EXTRA_AS_FLAGS}")
+    endif ()
+  endif ()
+  if (AOM_EXTRA_C_FLAGS)
+    is_flag_present(AOM_C_FLAGS "${AOM_EXTRA_C_FLAGS}" extra_present)
+    if (NOT ${extra_present})
+      require_c_flag("${AOM_EXTRA_C_FLAGS}" YES)
+    endif ()
+  endif ()
+  if (AOM_EXTRA_CXX_FLAGS)
+    is_flag_present(AOM_CXX_FLAGS "${AOM_EXTRA_CXX_FLAGS}" extra_present)
+    if (NOT ${extra_present})
+      require_cxx_flag("${AOM_EXTRA_CXX_FLAGS}" YES)
+    endif ()
+  endif ()
+endfunction ()
+
 endif ()  # AOM_BUILD_CMAKE_COMPILER_FLAGS_CMAKE_
diff --git a/third_party/aom/build/cmake/compiler_tests.cmake b/third_party/aom/build/cmake/compiler_tests.cmake
index 89f0bc12c..8a73ec8ff 100644
--- a/third_party/aom/build/cmake/compiler_tests.cmake
+++ b/third_party/aom/build/cmake/compiler_tests.cmake
@@ -14,6 +14,34 @@ set(AOM_BUILD_CMAKE_COMPILER_TESTS_CMAKE_ 1)
 include(CheckCSourceCompiles)
 include(CheckCXXSourceCompiles)
 
+# CMake passes command line flags like this:
+#   $compiler $lang_flags $lang_flags_config ...
+# To ensure the flags tested here and elsewhere are obeyed a list of active
+# build configuration types is built, and flags are applied to the flag strings
+# for each configuration currently active for C and CXX builds as determined by
+# reading $CMAKE_CONFIGURATION_TYPES and $CMAKE_BUILD_TYPE. When
+# $CMAKE_CONFIGURATION_TYPES is non-empty a multi-configuration generator is in
+# use: currently this includes MSVC and Xcode. For other generators
+# $CMAKE_BUILD_TYPE is used. For both cases AOM_<LANG>_CONFIGS is populated with
+# CMake string variable names that contain flags for the currently available
+# configuration(s).
+unset(AOM_C_CONFIGS)
+unset(AOM_CXX_CONFIGS)
+list(LENGTH CMAKE_CONFIGURATION_TYPES num_configs)
+if (${num_configs} GREATER 0)
+  foreach (config ${CMAKE_CONFIGURATION_TYPES})
+    string(TOUPPER ${config} config)
+    list(APPEND AOM_C_CONFIGS "CMAKE_C_FLAGS_${config}")
+    list(APPEND AOM_CXX_CONFIGS "CMAKE_CXX_FLAGS_${config}")
+    list(APPEND AOM_EXE_LINKER_CONFIGS "CMAKE_EXE_LINKER_FLAGS_${config}")
+  endforeach ()
+else ()
+  string(TOUPPER ${CMAKE_BUILD_TYPE} config)
+  set(AOM_C_CONFIGS "CMAKE_C_FLAGS_${config}")
+  set(AOM_CXX_CONFIGS "CMAKE_CXX_FLAGS_${config}")
+  set(AOM_EXE_LINKER_CONFIGS "CMAKE_EXE_LINKER_FLAGS_${config}")
+endif ()
+
 # The basic main() function used in all compile tests.
 set(AOM_C_MAIN "\nint main(void) { return 0; }")
 set(AOM_CXX_MAIN "\nint main() { return 0; }")
@@ -25,8 +53,8 @@ set(AOM_CXX_PASSED_TESTS)
 set(AOM_CXX_FAILED_TESTS)
 
 function(aom_push_var var new_value)
-  set(SAVED_${var} ${var} PARENT_SCOPE)
-  set(${var} ${new_value} PARENT_SCOPE)
+  set(SAVED_${var} ${${var}} PARENT_SCOPE)
+  set(${var} "${${var}} ${new_value}" PARENT_SCOPE)
 endfunction ()
 
 function(aom_pop_var var)
@@ -40,6 +68,10 @@ endfunction ()
 # The test is not run if the test name is found in either of the passed or
 # failed test variables.
 function(aom_check_c_compiles test_name test_source result_var)
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
   unset(C_TEST_PASSED CACHE)
   unset(C_TEST_FAILED CACHE)
   string(FIND "${AOM_C_PASSED_TESTS}" "${test_name}" C_TEST_PASSED)
@@ -71,6 +103,10 @@ endfunction ()
 # The test is not run if the test name is found in either of the passed or
 # failed test variables.
 function(aom_check_cxx_compiles test_name test_source result_var)
+  if (DEBUG_CMAKE_DISABLE_COMPILER_TESTS)
+    return()
+  endif ()
+
   unset(CXX_TEST_PASSED CACHE)
   unset(CXX_TEST_FAILED CACHE)
   string(FIND "${AOM_CXX_PASSED_TESTS}" "${test_name}" CXX_TEST_PASSED)
@@ -100,9 +136,9 @@ endfunction ()
 # Convenience function that confirms $test_source compiles as C and C++.
 # $result_var is set to 1 when both tests are successful, and 0 when one or both
 # tests fail.
-# Note: This function is intended to be used to write to result variables that are
-# expanded via configure_file(). $result_var is set to 1 or 0 to allow direct
-# usage of the value in generated source files.
+# Note: This function is intended to be used to write to result variables that
+# are expanded via configure_file(). $result_var is set to 1 or 0 to allow
+# direct usage of the value in generated source files.
 function(aom_check_source_compiles test_name test_source result_var)
   unset(C_PASSED)
   unset(CXX_PASSED)
diff --git a/third_party/aom/build/cmake/dist.cmake b/third_party/aom/build/cmake/dist.cmake
index 0de68a44d..ad1e069fe 100644
--- a/third_party/aom/build/cmake/dist.cmake
+++ b/third_party/aom/build/cmake/dist.cmake
@@ -17,8 +17,8 @@ function (listify_string in_string out_string)
   set(${out_string} "${${out_string}}" PARENT_SCOPE)
 endfunction ()
 
-set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "AOM_DIST_DIR" "AOM_DIST_EXAMPLES"
-    "AOM_DIST_APPS" "AOM_DIST_INCLUDES" "AOM_DIST_LIBS" "ENABLE_DOCS")
+set(REQUIRED_ARGS "AOM_ROOT" "AOM_CONFIG_DIR" "AOM_DIST_DIR" "AOM_DIST_INCLUDES"
+    "AOM_DIST_LIBS" "ENABLE_DOCS")
 
 foreach (arg ${REQUIRED_ARGS})
   if ("${${arg}}" STREQUAL "")
@@ -30,15 +30,27 @@ if (ENABLE_DOCS)
   file(INSTALL "${AOM_CONFIG_DIR}/docs" DESTINATION "${AOM_DIST_DIR}")
 endif ()
 
-listify_string("${AOM_DIST_EXAMPLES}" "AOM_DIST_EXAMPLES")
-foreach (example ${AOM_DIST_EXAMPLES})
-  file(INSTALL "${example}" DESTINATION "${AOM_DIST_DIR}/bin/examples")
-endforeach ()
+if (AOM_DIST_EXAMPLES)
+  listify_string("${AOM_DIST_EXAMPLES}" "AOM_DIST_EXAMPLES")
+  foreach (example ${AOM_DIST_EXAMPLES})
+    if (NOT "${example}" MATCHES "aomdec\|aomenc")
+      file(INSTALL "${example}" DESTINATION "${AOM_DIST_DIR}/bin/examples")
+    endif ()
+  endforeach ()
+endif ()
 
-listify_string("${AOM_DIST_APPS}" "AOM_DIST_APPS")
-foreach (app ${AOM_DIST_APPS})
-  file(INSTALL "${app}" DESTINATION "${AOM_DIST_DIR}/bin")
-endforeach ()
+if (AOM_DIST_TOOLS)
+  foreach (tool ${AOM_DIST_TOOLS})
+    file(INSTALL "${tool}" DESTINATION "${AOM_DIST_DIR}/bin/tools")
+  endforeach ()
+endif ()
+
+if (AOM_DIST_APPS)
+  listify_string("${AOM_DIST_APPS}" "AOM_DIST_APPS")
+  foreach (app ${AOM_DIST_APPS})
+    file(INSTALL "${app}" DESTINATION "${AOM_DIST_DIR}/bin")
+  endforeach ()
+endif ()
 
 listify_string("${AOM_DIST_INCLUDES}" "AOM_DIST_INCLUDES")
 foreach (inc ${AOM_DIST_INCLUDES})
diff --git a/third_party/aom/build/cmake/pkg_config.cmake b/third_party/aom/build/cmake/pkg_config.cmake
new file mode 100644
index 000000000..aee375f43
--- /dev/null
+++ b/third_party/aom/build/cmake/pkg_config.cmake
@@ -0,0 +1,57 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.5)
+
+set(REQUIRED_ARGS
+    "AOM_ROOT" "AOM_CONFIG_DIR" "CMAKE_INSTALL_PREFIX" "CMAKE_PROJECT_NAME"
+    "CONFIG_MULTITHREAD" "HAVE_PTHREAD_H")
+
+foreach (arg ${REQUIRED_ARGS})
+  if ("${${arg}}" STREQUAL "")
+    message(FATAL_ERROR "${arg} must not be empty.")
+  endif ()
+endforeach ()
+
+include("${AOM_ROOT}/build/cmake/util.cmake")
+
+extract_version_string("${AOM_CONFIG_DIR}/aom_version.h" aom_version)
+
+# Create a version string suitable for comparison using the RPM version compare
+# algorithm: strip out everything after the number.
+string(FIND "${aom_version}" "-" dash_pos)
+if (${dash_pos} EQUAL -1)
+  set(package_version "${aom_version}")
+else ()
+  string(SUBSTRING "${aom_version}" 0 ${dash_pos} package_version)
+endif ()
+
+# Write pkg-config info.
+set(prefix "${CMAKE_INSTALL_PREFIX}")
+set(pkgconfig_file "${AOM_CONFIG_DIR}/aom.pc")
+string(TOLOWER ${CMAKE_PROJECT_NAME} pkg_name)
+file(WRITE "${pkgconfig_file}" "# libaom pkg-config.\n")
+file(APPEND "${pkgconfig_file}" "prefix=${prefix}\n")
+file(APPEND "${pkgconfig_file}" "exec_prefix=${prefix}/bin\n")
+file(APPEND "${pkgconfig_file}" "libdir=${prefix}/lib\n")
+file(APPEND "${pkgconfig_file}" "includedir=${prefix}/include\n\n")
+file(APPEND "${pkgconfig_file}" "Name: ${pkg_name}\n")
+file(APPEND "${pkgconfig_file}"
+     "Description: AV1 codec library v${aom_version}.\n")
+file(APPEND "${pkgconfig_file}" "Version: ${package_version}\n")
+file(APPEND "${pkgconfig_file}" "Requires:\n")
+file(APPEND "${pkgconfig_file}" "Conflicts:\n")
+file(APPEND "${pkgconfig_file}" "Libs: -L${prefix}/lib -l${pkg_name} -lm\n")
+if (CONFIG_MULTITHREAD AND HAVE_PTHREAD_H)
+  file(APPEND "${pkgconfig_file}" "Libs.private: -lm -lpthread\n")
+else ()
+  file(APPEND "${pkgconfig_file}" "Libs.private: -lm\n")
+endif ()
+file(APPEND "${pkgconfig_file}" "Cflags: -I${prefix}/include\n")
diff --git a/third_party/aom/build/cmake/sanitizers.cmake b/third_party/aom/build/cmake/sanitizers.cmake
new file mode 100644
index 000000000..2b9067868
--- /dev/null
+++ b/third_party/aom/build/cmake/sanitizers.cmake
@@ -0,0 +1,29 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT AOM_BUILD_CMAKE_SANITIZERS_CMAKE_)
+set(AOM_BUILD_CMAKE_SANITIZERS_CMAKE_ 1)
+
+if (MSVC OR NOT SANITIZE)
+  return ()
+endif ()
+
+include("${AOM_ROOT}/build/cmake/compiler_flags.cmake")
+
+string(TOLOWER ${SANITIZE} SANITIZE)
+
+# Require the sanitizer requested.
+require_linker_flag("-fsanitize=${SANITIZE}")
+require_compiler_flag("-fsanitize=${SANITIZE}" YES)
+
+# Make callstacks accurate.
+require_compiler_flag("-fno-omit-frame-pointer -fno-optimize-sibling-calls" YES)
+
+endif()  # AOM_BUILD_CMAKE_SANITIZERS_CMAKE_
diff --git a/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake
index 11406bd08..b452e368f 100644
--- a/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake
+++ b/third_party/aom/build/cmake/toolchains/mips32-linux-gcc.cmake
@@ -66,6 +66,10 @@ set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
 set(CMAKE_SYSTEM_PROCESSOR "mips32")
 
 # No runtime cpu detect for mips32-linux-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
+if (CONFIG_RUNTIME_CPU_DETECT)
+  message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips32 targets.")
+endif ()
+
+set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "" FORCE)
 
 endif ()  # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_
diff --git a/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake b/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake
index fffb495bf..0b63d778d 100644
--- a/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake
+++ b/third_party/aom/build/cmake/toolchains/mips64-linux-gcc.cmake
@@ -43,6 +43,10 @@ set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
 set(CMAKE_SYSTEM_PROCESSOR "mips64")
 
 # No runtime cpu detect for mips64-linux-gcc.
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "")
+if (CONFIG_RUNTIME_CPU_DETECT)
+  message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips64 targets.")
+endif ()
+
+set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE NUMBER "" FORCE)
 
 endif ()  # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_
diff --git a/third_party/aom/build/cmake/toolchains/x86-linux.cmake b/third_party/aom/build/cmake/toolchains/x86-linux.cmake
index 077c8f325..42cc61467 100644
--- a/third_party/aom/build/cmake/toolchains/x86-linux.cmake
+++ b/third_party/aom/build/cmake/toolchains/x86-linux.cmake
@@ -8,7 +8,12 @@
 ## Media Patent License 1.0 was not distributed with this source code in the
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
+if (NOT AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_)
+set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_ 1)
+
 set(CMAKE_SYSTEM_PROCESSOR "x86")
 set(CMAKE_SYSTEM_NAME "Linux")
 set(CMAKE_C_COMPILER_ARG1 "-m32")
 set(CMAKE_CXX_COMPILER_ARG1 "-m32")
+
+endif ()  # AOM_BUILD_CMAKE_TOOLCHAINS_X86_LINUX_CMAKE_
diff --git a/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake
new file mode 100644
index 000000000..bdd3fa539
--- /dev/null
+++ b/third_party/aom/build/cmake/toolchains/x86-mingw-gcc.cmake
@@ -0,0 +1,29 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_)
+set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_ 1)
+
+set(CMAKE_SYSTEM_PROCESSOR "x86")
+set(CMAKE_SYSTEM_NAME "Windows")
+set(CMAKE_C_COMPILER_ARG1 "-m32")
+set(CMAKE_CXX_COMPILER_ARG1 "-m32")
+
+if ("${CROSS}" STREQUAL "")
+  set(CROSS i686-w64-mingw32-)
+endif ()
+
+set(CMAKE_C_COMPILER ${CROSS}gcc)
+set(CMAKE_CXX_COMPILER ${CROSS}g++)
+
+# Disable the use of the gtest's CMake support.
+set(AOM_DISABLE_GTEST_CMAKE 1)
+
+endif ()  # AOM_BUILD_CMAKE_TOOLCHAINS_X86_MINGW_GCC_CMAKE_
diff --git a/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake b/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
new file mode 100644
index 000000000..be94332b4
--- /dev/null
+++ b/third_party/aom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
@@ -0,0 +1,27 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+if (NOT AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_)
+set(AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_ 1)
+
+set(CMAKE_SYSTEM_PROCESSOR "x86_64")
+set(CMAKE_SYSTEM_NAME "Windows")
+
+if ("${CROSS}" STREQUAL "")
+  set(CROSS x86_64-w64-mingw32-)
+endif ()
+
+set(CMAKE_C_COMPILER ${CROSS}gcc)
+set(CMAKE_CXX_COMPILER ${CROSS}g++)
+
+# Disable the use of the gtest's CMake support.
+set(AOM_DISABLE_GTEST_CMAKE 1)
+
+endif ()  # AOM_BUILD_CMAKE_TOOLCHAINS_X86_64_MINGW_GCC_CMAKE_
diff --git a/third_party/aom/build/cmake/util.cmake b/third_party/aom/build/cmake/util.cmake
index 62841a63b..d6c432229 100644
--- a/third_party/aom/build/cmake/util.cmake
+++ b/third_party/aom/build/cmake/util.cmake
@@ -11,9 +11,12 @@
 if (NOT AOM_BUILD_CMAKE_UTIL_CMAKE_)
 set(AOM_BUILD_CMAKE_UTIL_CMAKE_ 1)
 
+# Creates dummy source file in $AOM_CONFIG_DIR named $basename.$extension and
+# returns the full path to the dummy source file via the $out_file_path
+# parameter.
 function (create_dummy_source_file basename extension out_file_path)
   set(dummy_source_file "${AOM_CONFIG_DIR}/${basename}.${extension}")
-   file(WRITE "${dummy_source_file}"
+  file(WRITE "${dummy_source_file}"
        "// Generated file. DO NOT EDIT!\n"
        "// ${target_name} needs a ${extension} file to force link language, \n"
        "// or to silence a harmless CMake warning: Ignore me.\n"
@@ -21,10 +24,61 @@ function (create_dummy_source_file basename extension out_file_path)
   set(${out_file_path} ${dummy_source_file} PARENT_SCOPE)
 endfunction ()
 
+# Convenience function for adding a dummy source file to $target_name using
+# $extension as the file extension. Wraps create_dummy_source_file().
 function (add_dummy_source_file_to_target target_name extension)
   create_dummy_source_file("${target_name}" "${extension}" "dummy_source_file")
   target_sources(${target_name} PRIVATE ${dummy_source_file})
 endfunction ()
 
+# Sets the value of the variable referenced by $feature to $value, and reports
+# the change to the user via call to message(WARNING ...). $cause is expected to
+# be a configuration variable that conflicts with $feature in some way.
+function (change_config_and_warn feature value cause)
+  set(${feature} ${value} PARENT_SCOPE)
+  if (${value} EQUAL 1)
+    set(verb "Enabled")
+    set(reason "required for")
+  else ()
+    set(verb "Disabled")
+    set(reason "incompatible with")
+  endif ()
+  set(warning_message "${verb} ${feature}, ${reason} ${cause}.")
+  message(WARNING "--- ${warning_message}")
+endfunction ()
+
+# Extracts the version string from $version_file and returns it to the user via
+# $version_string_out_var. To achieve this VERSION_STRING_NOSP is located in
+# $version_file and then everything but the string literal assigned to the
+# variable is removed. Quotes and the leading 'v' are stripped from the
+# returned string.
+function (extract_version_string version_file version_string_out_var)
+  file(STRINGS "${version_file}" aom_version REGEX "VERSION_STRING_NOSP")
+  string(REPLACE "#define VERSION_STRING_NOSP " "" aom_version
+         "${aom_version}")
+  string(REPLACE "\"" "" aom_version "${aom_version}")
+  string(REPLACE " " "" aom_version "${aom_version}")
+  string(FIND "${aom_version}" "v" v_pos)
+  if (${v_pos} EQUAL 0)
+    string(SUBSTRING "${aom_version}" 1 -1 aom_version)
+  endif ()
+  set("${version_string_out_var}" "${aom_version}" PARENT_SCOPE)
+endfunction ()
+
+# Sets CMake compiler launcher to $launcher_name when $launcher_name is found in
+# $PATH. Warns user about ignoring build flag $launcher_flag when $launcher_name
+# is not found in $PATH.
+function (set_compiler_launcher launcher_flag launcher_name)
+  find_program(launcher_path "${launcher_name}")
+  if (launcher_path)
+    set(CMAKE_C_COMPILER_LAUNCHER "${launcher_path}" PARENT_SCOPE)
+    set(CMAKE_CXX_COMPILER_LAUNCHER "${launcher_path}" PARENT_SCOPE)
+    message("--- Using ${launcher_name} as compiler launcher.")
+  else ()
+    message(WARNING
+            "--- Cannot find ${launcher_name}, ${launcher_flag} ignored.")
+  endif ()
+endfunction ()
+
 endif()  # AOM_BUILD_CMAKE_UTIL_CMAKE_
 
diff --git a/third_party/aom/build/cmake/version.cmake b/third_party/aom/build/cmake/version.cmake
new file mode 100644
index 000000000..c2b3bdb61
--- /dev/null
+++ b/third_party/aom/build/cmake/version.cmake
@@ -0,0 +1,55 @@
+##
+## Copyright (c) 2017, Alliance for Open Media. All rights reserved
+##
+## This source code is subject to the terms of the BSD 2 Clause License and
+## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+## was not distributed with this source code in the LICENSE file, you can
+## obtain it at www.aomedia.org/license/software. If the Alliance for Open
+## Media Patent License 1.0 was not distributed with this source code in the
+## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+##
+cmake_minimum_required(VERSION 3.5)
+
+set(REQUIRED_ARGS
+    "AOM_ROOT" "AOM_CONFIG_DIR" "GIT_EXECUTABLE" "PERL_EXECUTABLE")
+
+foreach (arg ${REQUIRED_ARGS})
+  if ("${${arg}}" STREQUAL "")
+    message(FATAL_ERROR "${arg} must not be empty.")
+  endif ()
+endforeach ()
+
+include("${AOM_ROOT}/build/cmake/util.cmake")
+
+# Generate the version string for this run.
+unset(aom_version)
+if (EXISTS "${GIT_EXECUTABLE}")
+  execute_process(COMMAND ${GIT_EXECUTABLE} --git-dir=${AOM_ROOT}/.git describe
+                  OUTPUT_VARIABLE aom_version ERROR_QUIET)
+  string(STRIP "${aom_version}" aom_version)
+
+  # Remove the leading 'v' from the version string.
+  string(FIND "${aom_version}" "v" v_pos)
+  if (${v_pos} EQUAL 0)
+    string(SUBSTRING "${aom_version}" 1 -1 aom_version)
+  endif ()
+endif ()
+
+if ("${aom_version}" STREQUAL "")
+  set(aom_version "${AOM_ROOT}/CHANGELOG")
+endif ()
+
+unset(last_aom_version)
+if (EXISTS "${AOM_CONFIG_DIR}/aom_version.h")
+  extract_version_string("${AOM_CONFIG_DIR}/aom_version.h" last_aom_version)
+endif ()
+
+if (NOT "${aom_version}" STREQUAL "${last_aom_version}")
+  # TODO(tomfinegan): Perl dependency is unnecessary. CMake can do everything
+  # that is done by version.pl on its own (if a bit more verbose...).
+  execute_process(
+    COMMAND ${PERL_EXECUTABLE} "${AOM_ROOT}/build/cmake/version.pl"
+    --version_data=${aom_version}
+    --version_filename=${AOM_CONFIG_DIR}/aom_version.h
+    VERBATIM)
+endif ()
diff --git a/third_party/aom/build/cmake/aom_version.pl b/third_party/aom/build/cmake/version.pl
index 3412feebd..323e178f8 100755
--- a/third_party/aom/build/cmake/aom_version.pl
+++ b/third_party/aom/build/cmake/version.pl
@@ -14,6 +14,7 @@ use warnings;
 use 5.010;
 use Getopt::Long;
 
+my $git_desc;
 my $version_data;
 my $version_filename;
 GetOptions('version_data=s' => \$version_data,
@@ -27,6 +28,7 @@ if (!defined $version_data || length($version_data) == 0 ||
 
 # Determine if $version_data is a filename or a git tag/description.
 my $version_string;
+chomp($version_data);
 if (-r $version_data) {
   # $version_data is the path to the CHANGELOG. Parse the most recent version.
   my $changelog_filename = $version_data;
@@ -45,6 +47,7 @@ if (-r $version_data) {
   # tagName OR tagName-commitsSinceTag-shortCommitHash
   # In either case we want the first element of the array returned by split.
   $version_string = (split("-", $version_data))[0];
+  $git_desc = $version_data;
 }
 
 if (substr($version_string, 0, 1) eq "v") {
@@ -80,7 +83,19 @@ my $lic_block = << "EOF";
 EOF
 
 select $version_file;
-print << "EOF";
+if (length($git_desc)) {
+  print << "EOF";
+$lic_block
+#define VERSION_MAJOR $version_major
+#define VERSION_MINOR $version_minor
+#define VERSION_PATCH $version_patch
+#define VERSION_EXTRA \"$version_extra\"
+#define VERSION_PACKED $version_packed
+#define VERSION_STRING_NOSP \"$git_desc\"
+#define VERSION_STRING \" $git_desc\"
+EOF
+} else {
+  print << "EOF";
 $lic_block
 #define VERSION_MAJOR $version_major
 #define VERSION_MINOR $version_minor
@@ -90,4 +105,5 @@ $lic_block
 #define VERSION_STRING_NOSP \"v$version_string\"
 #define VERSION_STRING \" v$version_string\"
 EOF
+}
 close($version_file);
diff --git a/third_party/aom/configure b/third_party/aom/configure
index a88401431..50a5fb266 100755
--- a/third_party/aom/configure
+++ b/third_party/aom/configure
@@ -44,8 +44,6 @@ Advanced options:
   ${toggle_multithread}           multithreaded encoding and decoding
   ${toggle_spatial_resampling}    spatial sampling (scaling) support
   ${toggle_realtime_only}         enable this option while building for real-time encoding
-  ${toggle_onthefly_bitpacking}   enable on-the-fly bitpacking in real-time encoding
-  ${toggle_error_concealment}     enable this option to get a decoder which is able to conceal losses
   ${toggle_coefficient_range_checking}
                                   enable decoder to check if intermediate
                                   transform coefficients are in valid range
@@ -157,6 +155,10 @@ for t in ${all_targets}; do
     [ -f "${source_path}/${t}.mk" ] && enable_feature ${t}
 done
 
+if ! diff --version >/dev/null; then
+  die "diff missing: Try installing diffutils via your package manager."
+fi
+
 if ! perl --version >/dev/null; then
     die "Perl is required to build"
 fi
@@ -246,6 +248,7 @@ HAVE_LIST="
 EXPERIMENT_LIST="
     fp_mb_stats
     cdef
+    cdef_singlepass
     var_tx
     rect_tx
     rect_tx_ext
@@ -254,57 +257,53 @@ EXPERIMENT_LIST="
     convolve_round
     compound_round
     ext_tx
-    dpcm_intra
     tx64x64
     ext_intra
     intra_interp
     filter_intra
     intra_edge
     intrabc
-    ext_inter
     interintra
     wedge
     compound_segment
     ext_refs
-    altref2
-    speed_refs
-    gf_groups
-    flex_refs
     global_motion
     new_quant
     supertx
     ans
     loop_restoration
+    striped_loop_restoration
     ext_partition
     ext_partition_types
+    ext_partition_types_ab
     unpoison_partition_ctx
     ext_tile
     motion_var
     ncobmc
     warped_motion
     q_adapt_probs
-    bitstream_debug
     inter_stats_only
-    alt_intra
-    palette
     palette_delta_encoding
     rawbits
-    ec_smallmul
+    kf_ctx
     pvq
     cfl
     xiphrc
     dct_only
+    daala_tx
     daala_dct4
     daala_dct8
+    daala_dct16
+    daala_dct32
+    daala_dct64
     cb4x4
     chroma_2x2
     chroma_sub8x8
     frame_size
-    delta_q
     ext_delta_q
     adapt_scan
-    filter_7bit
     parallel_deblocking
+    deblock_13tap
     loopfiltering_across_tiles
     tempmv_signaling
     rd_debug
@@ -314,13 +313,13 @@ EXPERIMENT_LIST="
     masked_tx
     dependent_horztiles
     dist_8x8
-    daala_dist
-    tripred
     palette_throughput
     ref_adapt
     lv_map
+    ctx1d
     txk_sel
     mv_compress
+    segment_zeromv
     frame_superres
     new_multisymbol
     compound_singleref
@@ -329,15 +328,32 @@ EXPERIMENT_LIST="
     ext_comp_refs
     smooth_hv
     var_refs
-    rect_intra_pred
     lgt
+    lgt_from_pred
     sbl_symbol
     ncobmc_adapt_weight
     bgsprite
     var_tx_no_tx_mode
     mrc_tx
     lpf_direct
-    uv_lvl
+    loopfilter_level
+    no_frame_context_signaling
+    txmg
+    max_tile
+    hash_me
+    colorspace_headers
+    mfmv
+    frame_marker
+    jnt_comp
+    frame_sign_bias
+    ext_skip
+    obu
+    amvr
+    lpf_sb
+    opt_ref_mv
+    tmv
+    restrict_compressed_hdr
+    horzonly_frame_superres
 "
 CONFIG_LIST="
     dependency_tracking
@@ -367,8 +383,6 @@ CONFIG_LIST="
     static_msvcrt
     spatial_resampling
     realtime_only
-    onthefly_bitpacking
-    error_concealment
     shared
     static
     small
@@ -381,12 +395,13 @@ CONFIG_LIST="
     inspection
     decode_perf_tests
     encode_perf_tests
+    bitstream_debug
+    symbolrate
     coefficient_range_checking
     lowbitdepth
     highbitdepth
     experimental
     size_limit
-    colorspace_headers
     ${EXPERIMENT_LIST}
     analyzer
 "
@@ -427,8 +442,6 @@ CMDLINE_SELECT="
     static_msvcrt
     spatial_resampling
     realtime_only
-    onthefly_bitpacking
-    error_concealment
     shared
     static
     small
@@ -441,10 +454,12 @@ CMDLINE_SELECT="
     decode_perf_tests
     encode_perf_tests
     coefficient_range_checking
+    bitstream_debug
+    symbolrate
     lowbitdepth
-    aom_highbitdepth
     highbitdepth
     experimental
+    adopted_experiments
     colorspace_headers
 "
 
@@ -491,48 +506,52 @@ post_process_cmdline() {
         enabled ${c} && enable_feature ${c##*_}s
     done
 
+    enable_feature cb4x4
+
     # Enable adopted experiments by default
-    soft_enable cb4x4
-    soft_enable chroma_sub8x8
-    soft_enable filter_7bit
-    soft_enable reference_buffer
-    soft_enable delta_q
-    soft_enable rect_tx
-    soft_enable global_motion
-    soft_enable ext_tx
-    soft_enable cdef
-    soft_enable ext_intra
-    soft_enable mv_compress
-    soft_enable ext_refs
-    soft_enable dual_filter
-    soft_enable motion_var
-    soft_enable warped_motion
-    soft_enable ext_delta_q
-    soft_enable loopfiltering_across_tiles
-    soft_enable ec_smallmul
-    soft_enable var_tx
-    soft_enable ext_inter
-    soft_enable wedge
-    soft_enable compound_segment
-    soft_enable interintra
-    soft_enable one_sided_compound
-    soft_enable smooth_hv
-    soft_enable parallel_deblocking
-    soft_enable rect_intra_pred
-
-    # Backwards/jenkins compatibility with --enable-aom-highbitdepth
-    enabled aom_highbitdepth && enable_feature highbitdepth
+    soft_enable adopted_experiments
+    if enabled adopted_experiments; then
+      soft_enable chroma_sub8x8
+      soft_enable rect_tx
+      soft_enable global_motion
+      soft_enable ext_tx
+      soft_enable cdef
+      soft_enable ext_intra
+      soft_enable intra_edge
+      soft_enable mv_compress
+      soft_enable ext_refs
+      soft_enable dual_filter
+      soft_enable motion_var
+      soft_enable warped_motion
+      soft_enable var_tx
+      soft_enable wedge
+      soft_enable compound_segment
+      soft_enable interintra
+      soft_enable one_sided_compound
+      soft_enable convolve_round
+      soft_enable aom_qm
+      soft_enable dist_8x8
+      soft_enable cdef_singlepass
+      soft_enable loop_restoration
+
+      # Provisional adopted
+      soft_enable reference_buffer
+      soft_enable loopfiltering_across_tiles
+      soft_enable palette_throughput
+      soft_enable smooth_hv
+      soft_enable tempmv_signaling
+      soft_enable ext_comp_refs
+      soft_enable ext_delta_q
+      soft_enable parallel_deblocking
+    fi
 
     # Enable low-bitdepth pixel pipeline by default
     soft_enable lowbitdepth
 
-    soft_enable palette
-    soft_enable alt_intra
-    soft_enable palette_throughput
-    soft_enable tempmv_signaling
+    # Enable LBD/HBD txfm consistency tool
+    soft_enable txmg
 
     # Fix up experiment dependencies
-    enabled daala_dist && enable_feature dist_8x8
     enabled pvq && disable_feature chroma_2x2
     enabled pvq && disable_feature rect_tx
     enabled pvq && disable_feature ext_tx
@@ -540,48 +559,54 @@ post_process_cmdline() {
     enabled pvq && disable_feature highbitdepth
     enabled pvq && disable_feature lgt
     enabled pvq && disable_feature mrc_tx
-    enabled palette_throughput && soft_enable palette
-    enabled ext_delta_q && soft_enable delta_q
+    enabled lv_map && disable_feature mrc_tx
+    enabled supertx && disable_feature mrc_tx
+    enabled coef_interleave && disable_feature mrc_tx
+    enabled pvq && disable_feature palette_throughput
+    enabled mrc_tx && enable_feature ext_tx
+    enabled mrc_tx && enable_feature var_tx
     enabled txk_sel && soft_enable lv_map
+    enabled ctx1d && soft_enable lv_map
+    enabled ctx1d && soft_enable ext_tx
     enabled compound_round && soft_enable convolve_round
-    enabled smooth_hv && soft_enable alt_intra
     enabled intra_edge && enable_feature ext_intra
     enabled chroma_2x2 && disable_feature chroma_sub8x8
-    enabled dpcm_intra && enable_feature ext_tx
     enabled chroma_sub8x8 && enable_feature cb4x4
-    enabled compound_singleref && enable_feature ext_inter
-    enabled warped_motion && disable_feature ncobmc_adapt_weight
     enabled ncobmc_adapt_weight && enable_feature motion_var
     enabled bgsprite && enable_feature global_motion
-    enabled ext_comp_refs && enable_feature var_refs
-    enabled ext_comp_refs && disable_feature one_sided_compound
-    enabled altref2 && enable_feature ext_refs
+    enabled ext_comp_refs && enable_feature ext_refs
+    enabled ext_comp_refs && enable_feature one_sided_compound
     enabled rect_tx_ext && enable_feature rect_tx
+    enabled lgt_from_pred && enable_feature ext_tx
+    enabled lgt_from_pred && disable_feature mrc_tx
+    enabled cfl && enable_feature smooth_hv
+    enabled cdef_singlepass && enable_feature cdef
+    enabled new_multisymbol && enable_feature restrict_compressed_hdr
+    enabled mfmv && enable_feature frame_marker
+    enabled jnt_comp && enable_feature frame_marker
+    enabled frame_sign_bias && enable_feature frame_marker
+    enabled txmg && enable_feature highbitdepth
+    enabled ext_skip && enable_feature frame_marker
+    enabled ext_skip && enable_feature ext_refs
+    enabled horzonly_frame_superres && enable_feature frame_superres
 
-    if ! enabled delta_q && enabled ext_delta_q; then
-      log_echo "ext_delta_q requires delta_q, so disabling ext_delta_q"
-      disable_feature ext_delta_q
-    fi
     if enabled rawbits && enabled ans; then
       log_echo "rawbits requires not ans, so disabling rawbits"
       disable_feature rawbits
     fi
-    if enabled ec_smallmul && enabled ans; then
-      log_echo "ec_smallmul requires not ans, so disabling ec_smallmul"
-      disable_feature ec_smallmul
+    if enabled daala_tx; then
+      enable_feature daala_dct4
+      enable_feature daala_dct8
+      enable_feature daala_dct16
+      enable_feature daala_dct32
+      enable_feature daala_dct64
     fi
-    if enabled daala_dct4; then
-      enable_feature dct_only
-      disable_feature mmx
-      disable_feature rect_tx
-      disable_feature var_tx
-      disable_feature lgt
-      enable_feature lowbitdepth
+    if enabled daala_dct64 && ! enabled tx64x64; then
+      log_echo "daala_dct64 requires tx64x64, so disabling daala_dct64"
+      disable_feature daala_dct64
     fi
-    if enabled daala_dct8; then
-      disable_feature mmx
-      disable_feature rect_tx
-      disable_feature var_tx
+    if enabled daala_dct4 || enabled daala_dct8 || enabled daala_dct16 ||
+        enabled daala_dct32 || enabled daala_dct64; then
       disable_feature lgt
       enable_feature lowbitdepth
     fi
@@ -600,12 +625,33 @@ post_process_cmdline() {
         log_echo "disabling supertx"
         disable_feature supertx
       fi
+      if ! enabled rect_tx; then
+        log_echo "ext_partition_types requires rect_tx;"
+        log_echo "enabling rect_tx;"
+        enable_feature rect_tx
+      fi
     fi
     # Enable accounting and inspection when building the analyzer
     if enabled analyzer; then
       soft_enable accounting
       soft_enable inspection
     fi
+    # Enable hash_me if amvr is enabled
+    if enabled amvr; then
+      log_echo "amvr requires hash_me"
+      enable_feature hash_me
+    fi
+
+    if enabled striped_loop_restoration && ! enabled loop_restoration ; then
+      log_echo "striped_loop_restoration requires loop_restoration"
+      log_echo "enable loop_restoration"
+      enable_feature loop_restoration
+    fi
+    if enabled striped_loop_restoration && enabled frame_superres ; then
+      log_echo "striped_loop_restoration not compatible with frame_superres"
+      log_echo "disabling striped_loop_restoration"
+      disable_feature striped_loop_restoration
+    fi
 }
 
 process_targets() {
diff --git a/third_party/aom/examples.mk b/third_party/aom/examples.mk
index 2aa4c3592..329932670 100644
--- a/third_party/aom/examples.mk
+++ b/third_party/aom/examples.mk
@@ -199,6 +199,26 @@ set_maps.SRCS                      += video_writer.h video_writer.c
 set_maps.SRCS                      += aom_ports/msvc.h
 set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
+ifeq ($(CONFIG_EXT_TILE),yes)
+EXAMPLES-$(CONFIG_AV1_ENCODER)     += lightfield_encoder.c
+lightfield_encoder.SRCS            += ivfenc.h ivfenc.c
+lightfield_encoder.SRCS            += tools_common.h tools_common.c
+lightfield_encoder.SRCS            += video_common.h
+lightfield_encoder.SRCS            += video_writer.h video_writer.c
+lightfield_encoder.SRCS            += aom_ports/msvc.h
+lightfield_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
+lightfield_encoder.DESCRIPTION      = Lightfield encoder loop
+EXAMPLES-$(CONFIG_AV1_DECODER)     += lightfield_decoder.c
+lightfield_decoder.SRCS            += ivfdec.h ivfdec.c
+lightfield_decoder.SRCS            += tools_common.h tools_common.c
+lightfield_decoder.SRCS            += video_common.h
+lightfield_decoder.SRCS            += video_reader.h video_reader.c
+lightfield_decoder.SRCS            += aom_ports/mem_ops.h
+lightfield_decoder.SRCS            += aom_ports/mem_ops_aligned.h
+lightfield_decoder.SRCS            += aom_ports/msvc.h
+lightfield_decoder.GUID             = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
+lightfield_decoder.DESCRIPTION      = lightfield decoder loop
+endif
 ifeq ($(CONFIG_AV1_ENCODER),yes)
 ifeq ($(CONFIG_AV1_DECODER),yes)
 EXAMPLES-$(CONFIG_AV1_ENCODER)     += aom_cx_set_ref.c
diff --git a/third_party/aom/examples/aom_cx_set_ref.c b/third_party/aom/examples/aom_cx_set_ref.c
index ff24fa14a..456e81300 100644
--- a/third_party/aom/examples/aom_cx_set_ref.c
+++ b/third_party/aom/examples/aom_cx_set_ref.c
@@ -270,18 +270,18 @@ int main(int argc, char **argv) {
   while (aom_img_read(&raw, infile)) {
     if (limit && frame_in >= limit) break;
     if (update_frame_num > 1 && frame_out + 1 == update_frame_num) {
-      aom_ref_frame_t ref;
-      ref.frame_type = AOM_LAST_FRAME;
+      av1_ref_frame_t ref;
+      ref.idx = 0;
       ref.img = raw;
       // Set reference frame in encoder.
-      if (aom_codec_control(&ecodec, AOM_SET_REFERENCE, &ref))
+      if (aom_codec_control(&ecodec, AV1_SET_REFERENCE, &ref))
         die_codec(&ecodec, "Failed to set reference frame");
       printf(" <SET_REF>");
 
       // If set_reference in decoder is commented out, the enc/dec mismatch
       // would be seen.
       if (test_decode) {
-        if (aom_codec_control(&dcodec, AOM_SET_REFERENCE, &ref))
+        if (aom_codec_control(&dcodec, AV1_SET_REFERENCE, &ref))
           die_codec(&dcodec, "Failed to set reference frame");
       }
     }
diff --git a/third_party/aom/examples/inspect.c b/third_party/aom/examples/inspect.c
index e5c28711f..74e770b84 100644
--- a/third_party/aom/examples/inspect.c
+++ b/third_party/aom/examples/inspect.c
@@ -149,6 +149,11 @@ const map_entry block_size_map[] = {
 #if CONFIG_EXT_PARTITION
   ENUM(BLOCK_64X128), ENUM(BLOCK_128X64), ENUM(BLOCK_128X128),
 #endif
+  ENUM(BLOCK_4X16),   ENUM(BLOCK_16X4),   ENUM(BLOCK_8X32),
+  ENUM(BLOCK_32X8),   ENUM(BLOCK_16X64),  ENUM(BLOCK_64X16),
+#if CONFIG_EXT_PARTITION
+  ENUM(BLOCK_32X128), ENUM(BLOCK_128X32),
+#endif
   LAST_ENUM
 };
 
@@ -161,8 +166,12 @@ const map_entry tx_size_map[] = {
   ENUM(TX_64X64),
 #endif
   ENUM(TX_4X8),   ENUM(TX_8X4),   ENUM(TX_8X16),  ENUM(TX_16X8),
-  ENUM(TX_16X32), ENUM(TX_32X16), ENUM(TX_4X16),  ENUM(TX_16X4),
-  ENUM(TX_8X32),  ENUM(TX_32X8),  LAST_ENUM
+  ENUM(TX_16X32), ENUM(TX_32X16),
+#if CONFIG_TX64X64
+  ENUM(TX_32X64), ENUM(TX_64X32),
+#endif  // CONFIG_TX64X64
+  ENUM(TX_4X16),  ENUM(TX_16X4),  ENUM(TX_8X32),  ENUM(TX_32X8),
+  LAST_ENUM
 };
 
 const map_entry tx_type_map[] = { ENUM(DCT_DCT),
@@ -185,52 +194,36 @@ const map_entry tx_type_map[] = { ENUM(DCT_DCT),
 #endif
                                   LAST_ENUM };
 
-const map_entry prediction_mode_map[] = { ENUM(DC_PRED),
-                                          ENUM(V_PRED),
-                                          ENUM(H_PRED),
-                                          ENUM(D45_PRED),
-                                          ENUM(D135_PRED),
-                                          ENUM(D117_PRED),
-                                          ENUM(D153_PRED),
-                                          ENUM(D207_PRED),
-                                          ENUM(D63_PRED),
-#if CONFIG_ALT_INTRA
-                                          ENUM(SMOOTH_PRED),
+const map_entry prediction_mode_map[] = {
+  ENUM(DC_PRED),       ENUM(V_PRED),        ENUM(H_PRED),
+  ENUM(D45_PRED),      ENUM(D135_PRED),     ENUM(D117_PRED),
+  ENUM(D153_PRED),     ENUM(D207_PRED),     ENUM(D63_PRED),
+  ENUM(SMOOTH_PRED),
 #if CONFIG_SMOOTH_HV
-                                          ENUM(SMOOTH_V_PRED),
-                                          ENUM(SMOOTH_H_PRED),
+  ENUM(SMOOTH_V_PRED), ENUM(SMOOTH_H_PRED),
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-                                          ENUM(TM_PRED),
-                                          ENUM(NEARESTMV),
-                                          ENUM(NEARMV),
-                                          ENUM(ZEROMV),
-                                          ENUM(NEWMV),
-#if CONFIG_EXT_INTER
-                                          ENUM(NEAREST_NEARESTMV),
-                                          ENUM(NEAR_NEARMV),
-                                          ENUM(NEAREST_NEWMV),
-                                          ENUM(NEW_NEARESTMV),
-                                          ENUM(NEAR_NEWMV),
-                                          ENUM(NEW_NEARMV),
-                                          ENUM(ZERO_ZEROMV),
-                                          ENUM(NEW_NEWMV),
-#endif
-                                          ENUM(INTRA_INVALID),
-                                          LAST_ENUM };
+  ENUM(TM_PRED),       ENUM(NEARESTMV),     ENUM(NEARMV),
+  ENUM(ZEROMV),        ENUM(NEWMV),         ENUM(NEAREST_NEARESTMV),
+  ENUM(NEAR_NEARMV),   ENUM(NEAREST_NEWMV), ENUM(NEW_NEARESTMV),
+  ENUM(NEAR_NEWMV),    ENUM(NEW_NEARMV),    ENUM(ZERO_ZEROMV),
+  ENUM(NEW_NEWMV),     ENUM(INTRA_INVALID), LAST_ENUM
+};
 
 #if CONFIG_CFL
 const map_entry uv_prediction_mode_map[] = {
-  ENUM(UV_DC_PRED),       ENUM(UV_V_PRED),        ENUM(UV_H_PRED),
-  ENUM(UV_D45_PRED),      ENUM(UV_D135_PRED),     ENUM(UV_D117_PRED),
-  ENUM(UV_D153_PRED),     ENUM(UV_D207_PRED),     ENUM(UV_D63_PRED),
-#if CONFIG_ALT_INTRA
-  ENUM(UV_SMOOTH_PRED),
+  ENUM(UV_DC_PRED),       ENUM(UV_V_PRED),
+  ENUM(UV_H_PRED),        ENUM(UV_D45_PRED),
+  ENUM(UV_D135_PRED),     ENUM(UV_D117_PRED),
+  ENUM(UV_D153_PRED),     ENUM(UV_D207_PRED),
+  ENUM(UV_D63_PRED),      ENUM(UV_SMOOTH_PRED),
 #if CONFIG_SMOOTH_HV
   ENUM(UV_SMOOTH_V_PRED), ENUM(UV_SMOOTH_H_PRED),
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
-  ENUM(UV_TM_PRED),       ENUM(UV_MODE_INVALID),  LAST_ENUM
+  ENUM(UV_TM_PRED),
+#if CONFIG_CFL
+  ENUM(UV_CFL_PRED),
+#endif
+  ENUM(UV_MODE_INVALID),  LAST_ENUM
 };
 #else
 #define uv_prediction_mode_map prediction_mode_map
diff --git a/third_party/aom/examples/lightfield_decoder.c b/third_party/aom/examples/lightfield_decoder.c
new file mode 100644
index 000000000..8743df9bc
--- /dev/null
+++ b/third_party/aom/examples/lightfield_decoder.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Lightfield Decoder
+// ==================
+//
+// This is an example of a simple lightfield decoder. It builds upon the
+// simple_decoder.c example.  It takes an input file containing the compressed
+// data (in webm format), treating it as a lightfield instead of a video and
+// will decode a single lightfield tile. The lf_width and lf_height arguments
+// are the number of lightfield images in each dimension. The tile to decode
+// is specified by the tile_u, tile_v, tile_s, tile_t arguments. The tile_u,
+// tile_v specify the image and tile_s, tile_t specify the tile in the image.
+// After running the lightfield encoder, run lightfield decoder to decode a
+// single tile:
+// examples/lightfield_decoder vase10x10.webm vase_tile.yuv 10 10 3 4 5 10 5
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "aom/aom_decoder.h"
+#include "aom/aomdx.h"
+
+#include "../tools_common.h"
+#include "../video_reader.h"
+#include "./aom_config.h"
+
+static const char *exec_name;
+
+void usage_exit(void) {
+  fprintf(stderr,
+          "Usage: %s <infile> <outfile> <lf_width> <lf_height> <tlie_u>"
+          " <tile_v> <tile_s> <tile_t> <lf_blocksize>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
+}
+
+aom_image_t *aom_img_copy(aom_image_t *src, aom_image_t *dst) {
+  dst = aom_img_alloc(dst, src->fmt, src->d_w, src->d_h, 16);
+
+  int plane;
+
+  for (plane = 0; plane < 3; ++plane) {
+    uint8_t *src_buf = src->planes[plane];
+    const int src_stride = src->stride[plane];
+    const int src_w = plane == 0 ? src->d_w : src->d_w >> 1;
+    const int src_h = plane == 0 ? src->d_h : src->d_h >> 1;
+
+    uint8_t *dst_buf = dst->planes[plane];
+    const int dst_stride = dst->stride[plane];
+    int y;
+
+    for (y = 0; y < src_h; ++y) {
+      memcpy(dst_buf, src_buf, src_w);
+      src_buf += src_stride;
+      dst_buf += dst_stride;
+    }
+  }
+  return dst;
+}
+
+int main(int argc, char **argv) {
+  int frame_cnt = 0;
+  FILE *outfile = NULL;
+  aom_codec_ctx_t codec;
+  AvxVideoReader *reader = NULL;
+  const AvxInterface *decoder = NULL;
+  const AvxVideoInfo *info = NULL;
+  const char *lf_width_arg;
+  const char *lf_height_arg;
+  const char *tile_u_arg;
+  const char *tile_v_arg;
+  const char *tile_s_arg;
+  const char *tile_t_arg;
+  const char *lf_blocksize_arg;
+  int lf_width, lf_height;
+  int tile_u, tile_v, tile_s, tile_t;
+  int lf_blocksize;
+  int u_blocks;
+  int v_blocks;
+
+  exec_name = argv[0];
+
+  if (argc != 10) die("Invalid number of arguments.");
+
+  reader = aom_video_reader_open(argv[1]);
+  if (!reader) die("Failed to open %s for reading.", argv[1]);
+
+  if (!(outfile = fopen(argv[2], "wb")))
+    die("Failed to open %s for writing.", argv[2]);
+
+  lf_width_arg = argv[3];
+  lf_height_arg = argv[4];
+  tile_u_arg = argv[5];
+  tile_v_arg = argv[6];
+  tile_s_arg = argv[7];
+  tile_t_arg = argv[8];
+  lf_blocksize_arg = argv[9];
+  lf_width = (int)strtol(lf_width_arg, NULL, 0);
+  lf_height = (int)strtol(lf_height_arg, NULL, 0);
+  tile_u = (int)strtol(tile_u_arg, NULL, 0);
+  tile_v = (int)strtol(tile_v_arg, NULL, 0);
+  tile_s = (int)strtol(tile_s_arg, NULL, 0);
+  tile_t = (int)strtol(tile_t_arg, NULL, 0);
+  lf_blocksize = (int)strtol(lf_blocksize_arg, NULL, 0);
+
+  info = aom_video_reader_get_info(reader);
+
+  decoder = get_aom_decoder_by_fourcc(info->codec_fourcc);
+  if (!decoder) die("Unknown input codec.");
+
+  printf("Using %s\n", aom_codec_iface_name(decoder->codec_interface()));
+
+  if (aom_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0))
+    die_codec(&codec, "Failed to initialize decoder.");
+
+  // How many reference images we need to encode.
+  u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize;
+  v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize;
+  aom_image_t *reference_images =
+      (aom_image_t *)malloc(u_blocks * v_blocks * sizeof(aom_image_t));
+  for (int bv = 0; bv < v_blocks; ++bv) {
+    for (int bu = 0; bu < u_blocks; ++bu) {
+      aom_video_reader_read_frame(reader);
+      aom_codec_iter_t iter = NULL;
+      aom_image_t *img = NULL;
+      size_t frame_size = 0;
+      const unsigned char *frame =
+          aom_video_reader_get_frame(reader, &frame_size);
+      if (aom_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0))
+        die_codec(&codec, "Failed to decode frame.");
+
+      while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) {
+        aom_img_copy(img, &reference_images[bu + bv * u_blocks]);
+        char name[1024];
+        snprintf(name, sizeof(name), "ref_%d_%d.yuv", bu, bv);
+        printf("writing ref image to %s, %d, %d\n", name, img->d_w, img->d_h);
+        FILE *ref_file = fopen(name, "wb");
+        aom_img_write(img, ref_file);
+        fclose(ref_file);
+        ++frame_cnt;
+      }
+    }
+  }
+
+  int decode_frame_index = tile_v * lf_width + tile_u;
+  do {
+    aom_video_reader_read_frame(reader);
+  } while (frame_cnt++ != decode_frame_index);
+  size_t frame_size = 0;
+  const unsigned char *frame = aom_video_reader_get_frame(reader, &frame_size);
+
+  int ref_bu = tile_u / lf_blocksize;
+  int ref_bv = tile_v / lf_blocksize;
+  int ref_bi = ref_bu + ref_bv * u_blocks;
+  av1_ref_frame_t ref;
+  ref.idx = 0;
+  ref.img = reference_images[ref_bi];
+  // This is too slow for real lightfield rendering.  This copies the
+  // reference image bytes.  We need a way to just set a pointer
+  // in order to make this fast enough.
+  if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref)) {
+    die_codec(&codec, "Failed to set reference image.");
+  }
+  aom_codec_control_(&codec, AV1_SET_DECODE_TILE_ROW, tile_t);
+  aom_codec_control_(&codec, AV1_SET_DECODE_TILE_COL, tile_s);
+  aom_codec_err_t aom_status =
+      aom_codec_decode(&codec, frame, frame_size, NULL, 0);
+  if (aom_status) die_codec(&codec, "Failed to decode tile.");
+  aom_codec_iter_t iter = NULL;
+  aom_image_t *img = aom_codec_get_frame(&codec, &iter);
+  aom_img_write(img, outfile);
+
+  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
+  aom_video_reader_close(reader);
+  fclose(outfile);
+
+  return EXIT_SUCCESS;
+}
diff --git a/third_party/aom/examples/lightfield_encoder.c b/third_party/aom/examples/lightfield_encoder.c
new file mode 100644
index 000000000..0a424db8c
--- /dev/null
+++ b/third_party/aom/examples/lightfield_encoder.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+// Lightfield Encoder
+// ==================
+//
+// This is an example of a simple lightfield encoder.  It builds upon the
+// twopass_encoder.c example. It takes an input file in YV12 format,
+// treating it as a planar lightfield instead of a video. The img_width
+// and img_height arguments are the dimensions of the lightfield images,
+// while the lf_width and lf_height arguments are the number of
+// lightfield images in each dimension. The lf_blocksize determines the
+// number of reference images used for MCP. For example, 5 means that there
+// is a reference image for every 5x5 lightfield image block. All images
+// within a block will use the center image in that block as the reference
+// image for MCP.
+// Run "make test" to download lightfield test data: vase10x10.yuv.
+// Run lightfield encoder to encode whole lightfield:
+// examples/lightfield_encoder 1024 1024 vase10x10.yuv vase10x10.webm 10 10 5
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "aom/aom_encoder.h"
+#include "aom/aomcx.h"
+#include "av1/common/enums.h"
+
+#include "../tools_common.h"
+#include "../video_writer.h"
+
+static const char *exec_name;
+static const unsigned int deadline = AOM_DL_GOOD_QUALITY;
+
+void usage_exit(void) {
+  fprintf(stderr,
+          "Usage: %s <img_width> <img_height> <infile> <outfile> "
+          "<lf_width> <lf_height> <lf_blocksize>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
+}
+
+static aom_image_t *aom_img_copy(aom_image_t *src, aom_image_t *dst) {
+  dst = aom_img_alloc(dst, src->fmt, src->d_w, src->d_h, 16);
+
+  int plane;
+
+  for (plane = 0; plane < 3; ++plane) {
+    unsigned char *src_buf = src->planes[plane];
+    const int src_stride = src->stride[plane];
+    const int src_w = plane == 0 ? src->d_w : src->d_w >> 1;
+    const int src_h = plane == 0 ? src->d_h : src->d_h >> 1;
+
+    unsigned char *dst_buf = dst->planes[plane];
+    const int dst_stride = dst->stride[plane];
+    int y;
+
+    for (y = 0; y < src_h; ++y) {
+      memcpy(dst_buf, src_buf, src_w);
+      src_buf += src_stride;
+      dst_buf += dst_stride;
+    }
+  }
+  return dst;
+}
+
+static int aom_img_size_bytes(aom_image_t *img) {
+  int image_size_bytes = 0;
+  int plane;
+  for (plane = 0; plane < 3; ++plane) {
+    const int stride = img->stride[plane];
+    const int w = aom_img_plane_width(img, plane) *
+                  ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
+    const int h = aom_img_plane_height(img, plane);
+    image_size_bytes += (w + stride) * h;
+  }
+  return image_size_bytes;
+}
+
+static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img,
+                           aom_codec_pts_t pts, unsigned int duration,
+                           aom_enc_frame_flags_t flags, unsigned int dl,
+                           aom_fixed_buf_t *stats) {
+  int got_pkts = 0;
+  aom_codec_iter_t iter = NULL;
+  const aom_codec_cx_pkt_t *pkt = NULL;
+  const aom_codec_err_t res =
+      aom_codec_encode(ctx, img, pts, duration, flags, dl);
+  if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats.");
+
+  while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
+    got_pkts = 1;
+
+    if (pkt->kind == AOM_CODEC_STATS_PKT) {
+      const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
+      const size_t pkt_size = pkt->data.twopass_stats.sz;
+      stats->buf = realloc(stats->buf, stats->sz + pkt_size);
+      memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size);
+      stats->sz += pkt_size;
+    }
+  }
+
+  return got_pkts;
+}
+
+static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img,
+                        aom_codec_pts_t pts, unsigned int duration,
+                        aom_enc_frame_flags_t flags, unsigned int dl,
+                        AvxVideoWriter *writer) {
+  int got_pkts = 0;
+  aom_codec_iter_t iter = NULL;
+  const aom_codec_cx_pkt_t *pkt = NULL;
+  const aom_codec_err_t res =
+      aom_codec_encode(ctx, img, pts, duration, flags, dl);
+  if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame.");
+
+  while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) {
+    got_pkts = 1;
+    if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0;
+
+      if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts))
+        die_codec(ctx, "Failed to write compressed frame.");
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
+    }
+  }
+
+  return got_pkts;
+}
+
+static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile,
+                             const AvxInterface *encoder,
+                             const aom_codec_enc_cfg_t *cfg, int lf_width,
+                             int lf_height, int lf_blocksize) {
+  aom_codec_ctx_t codec;
+  int frame_count = 0;
+  int image_size_bytes = 0;
+  int u_blocks, v_blocks;
+  int bu, bv;
+  aom_fixed_buf_t stats = { NULL, 0 };
+
+  if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
+    die_codec(&codec, "Failed to initialize encoder");
+  if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1))
+    die_codec(&codec, "Failed to set frame parallel decoding");
+  if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0))
+    die_codec(&codec, "Failed to turn off auto altref");
+  if (aom_codec_control(&codec, AV1E_SET_SINGLE_TILE_DECODING, 1))
+    die_codec(&codec, "Failed to turn on single tile decoding");
+
+  image_size_bytes = aom_img_size_bytes(raw);
+
+  // How many reference images we need to encode.
+  u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize;
+  v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize;
+  aom_image_t *reference_images =
+      (aom_image_t *)malloc(u_blocks * v_blocks * sizeof(aom_image_t));
+  for (bv = 0; bv < v_blocks; ++bv) {
+    for (bu = 0; bu < u_blocks; ++bu) {
+      const int block_u_min = bu * lf_blocksize;
+      const int block_v_min = bv * lf_blocksize;
+      int block_u_end = (bu + 1) * lf_blocksize;
+      int block_v_end = (bv + 1) * lf_blocksize;
+      int u_block_size, v_block_size;
+      int block_ref_u, block_ref_v;
+      struct av1_ref_frame ref_frame;
+
+      block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
+      block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
+      u_block_size = block_u_end - block_u_min;
+      v_block_size = block_v_end - block_v_min;
+      block_ref_u = block_u_min + u_block_size / 2;
+      block_ref_v = block_v_min + v_block_size / 2;
+      fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes,
+            SEEK_SET);
+      aom_img_read(raw, infile);
+      if (aom_codec_control(&codec, AOME_USE_REFERENCE,
+                            AOM_LAST_FLAG | AOM_GOLD_FLAG | AOM_ALT_FLAG))
+        die_codec(&codec, "Failed to set reference flags");
+      // Reference frames can be encoded encoded without tiles.
+      ++frame_count;
+      get_frame_stats(&codec, raw, frame_count, 1,
+                      AOM_EFLAG_FORCE_GF | AOM_EFLAG_NO_UPD_ENTROPY, deadline,
+                      &stats);
+      ref_frame.idx = 0;
+      aom_codec_control(&codec, AV1_GET_REFERENCE, &ref_frame);
+      aom_img_copy(&ref_frame.img, &reference_images[frame_count - 1]);
+    }
+  }
+  for (bv = 0; bv < v_blocks; ++bv) {
+    for (bu = 0; bu < u_blocks; ++bu) {
+      const int block_u_min = bu * lf_blocksize;
+      const int block_v_min = bv * lf_blocksize;
+      int block_u_end = (bu + 1) * lf_blocksize;
+      int block_v_end = (bv + 1) * lf_blocksize;
+      int u, v;
+      block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
+      block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
+      for (v = block_v_min; v < block_v_end; ++v) {
+        for (u = block_u_min; u < block_u_end; ++u) {
+          // This was a work around for a bug in libvpx.  I'm not sure if this
+          // same bug exists in current version of av1.  Need to call this,
+          // otherwise the default is to not use any reference frames.  Then
+          // if you don't have at least one AOM_EFLAG_NO_REF_* flag, all frames
+          // will be intra encoded.  I'm not sure why the default is not to use
+          // any reference frames.  It looks like there is something about the
+          // way I encode the reference frames above that sets that as
+          // default...
+          if (aom_codec_control(&codec, AOME_USE_REFERENCE,
+                                AOM_LAST_FLAG | AOM_GOLD_FLAG | AOM_ALT_FLAG))
+            die_codec(&codec, "Failed to set reference flags");
+
+          // Set tile size to 64 pixels. The tile_columns and
+          // tile_rows in the tile coding are overloaded to represent
+          // tile_width and tile_height, that range from 1 to 64, in the unit
+          // of 64 pixels.
+          if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 1))
+            die_codec(&codec, "Failed to set tile width");
+          if (aom_codec_control(&codec, AV1E_SET_TILE_ROWS, 1))
+            die_codec(&codec, "Failed to set tile height");
+
+          av1_ref_frame_t ref;
+          ref.idx = 0;
+          ref.img = reference_images[bv * u_blocks + bu];
+          if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref))
+            die_codec(&codec, "Failed to set reference frame");
+
+          fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET);
+          aom_img_read(raw, infile);
+          ++frame_count;
+          get_frame_stats(&codec, raw, frame_count, 1,
+                          AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                              AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY |
+                              AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF,
+                          deadline, &stats);
+        }
+      }
+    }
+  }
+  // Flush encoder.
+  while (get_frame_stats(&codec, NULL, frame_count, 1, 0, deadline, &stats)) {
+  }
+
+  printf("Pass 0 complete. Processed %d frames.\n", frame_count);
+  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
+
+  return stats;
+}
+
+static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
+                  const AvxInterface *encoder, const aom_codec_enc_cfg_t *cfg,
+                  int lf_width, int lf_height, int lf_blocksize) {
+  AvxVideoInfo info = { encoder->fourcc,
+                        cfg->g_w,
+                        cfg->g_h,
+                        { cfg->g_timebase.num, cfg->g_timebase.den } };
+  AvxVideoWriter *writer = NULL;
+  aom_codec_ctx_t codec;
+  int frame_count = 0;
+  int image_size_bytes;
+  int bu, bv;
+  int u_blocks, v_blocks;
+
+  writer = aom_video_writer_open(outfile_name, kContainerIVF, &info);
+  if (!writer) die("Failed to open %s for writing", outfile_name);
+
+  if (aom_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0))
+    die_codec(&codec, "Failed to initialize encoder");
+  if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1))
+    die_codec(&codec, "Failed to set frame parallel decoding");
+  if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0))
+    die_codec(&codec, "Failed to turn off auto altref");
+  if (aom_codec_control(&codec, AV1E_SET_SINGLE_TILE_DECODING, 1))
+    die_codec(&codec, "Failed to turn on single tile decoding");
+
+  image_size_bytes = aom_img_size_bytes(raw);
+  u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize;
+  v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize;
+  aom_image_t *reference_images =
+      (aom_image_t *)malloc(u_blocks * v_blocks * sizeof(aom_image_t));
+  // Encode reference images first.
+  printf("Encoding Reference Images\n");
+  for (bv = 0; bv < v_blocks; ++bv) {
+    for (bu = 0; bu < u_blocks; ++bu) {
+      const int block_u_min = bu * lf_blocksize;
+      const int block_v_min = bv * lf_blocksize;
+      int block_u_end = (bu + 1) * lf_blocksize;
+      int block_v_end = (bv + 1) * lf_blocksize;
+      int u_block_size, v_block_size;
+      int block_ref_u, block_ref_v;
+      struct av1_ref_frame ref_frame;
+
+      block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
+      block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
+      u_block_size = block_u_end - block_u_min;
+      v_block_size = block_v_end - block_v_min;
+      block_ref_u = block_u_min + u_block_size / 2;
+      block_ref_v = block_v_min + v_block_size / 2;
+      fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes,
+            SEEK_SET);
+      aom_img_read(raw, infile);
+      if (aom_codec_control(&codec, AOME_USE_REFERENCE,
+                            AOM_LAST_FLAG | AOM_GOLD_FLAG | AOM_ALT_FLAG))
+        die_codec(&codec, "Failed to set reference flags");
+      // Reference frames may be encoded without tiles.
+      ++frame_count;
+      printf("Encoding reference image %d of %d\n", bv * u_blocks + bu,
+             u_blocks * v_blocks);
+      encode_frame(&codec, raw, frame_count, 1,
+                   AOM_EFLAG_FORCE_GF | AOM_EFLAG_NO_UPD_ENTROPY, deadline,
+                   writer);
+      ref_frame.idx = 0;
+      aom_codec_control(&codec, AV1_GET_REFERENCE, &ref_frame);
+      aom_img_copy(&ref_frame.img, &reference_images[frame_count - 1]);
+    }
+  }
+
+  for (bv = 0; bv < v_blocks; ++bv) {
+    for (bu = 0; bu < u_blocks; ++bu) {
+      const int block_u_min = bu * lf_blocksize;
+      const int block_v_min = bv * lf_blocksize;
+      int block_u_end = (bu + 1) * lf_blocksize;
+      int block_v_end = (bv + 1) * lf_blocksize;
+      int u, v;
+      block_u_end = block_u_end < lf_width ? block_u_end : lf_width;
+      block_v_end = block_v_end < lf_height ? block_v_end : lf_height;
+      for (v = block_v_min; v < block_v_end; ++v) {
+        for (u = block_u_min; u < block_u_end; ++u) {
+          // This was a work around for a bug in libvpx.  I'm not sure if this
+          // same bug exists in current version of av1.  Need to call this,
+          // otherwise the default is to not use any reference frames.  Then
+          // if you don't have at least one AOM_EFLAG_NO_REF_* flag, all frames
+          // will be intra encoded.  I'm not sure why the default is not to use
+          // any reference frames.  It looks like there is something about the
+          // way I encode the reference frames above that sets that as
+          // default...
+          if (aom_codec_control(&codec, AOME_USE_REFERENCE,
+                                AOM_LAST_FLAG | AOM_GOLD_FLAG | AOM_ALT_FLAG))
+            die_codec(&codec, "Failed to set reference flags");
+
+          // Set tile size to 64 pixels. The tile_columns and
+          // tile_rows in the tile coding are overloaded to represent tile_width
+          // and tile_height, that range from 1 to 64, in the unit of 64 pixels.
+          if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 1))
+            die_codec(&codec, "Failed to set tile width");
+          if (aom_codec_control(&codec, AV1E_SET_TILE_ROWS, 1))
+            die_codec(&codec, "Failed to set tile height");
+
+          av1_ref_frame_t ref;
+          ref.idx = 0;
+          ref.img = reference_images[bv * u_blocks + bu];
+          if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref))
+            die_codec(&codec, "Failed to set reference frame");
+          fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET);
+          aom_img_read(raw, infile);
+          ++frame_count;
+
+          printf("Encoding image %d of %d\n",
+                 frame_count - (u_blocks * v_blocks), lf_width * lf_height);
+          encode_frame(&codec, raw, frame_count, 1,
+                       AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF |
+                           AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY |
+                           AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF,
+                       deadline, writer);
+        }
+      }
+    }
+  }
+
+  // Flush encoder.
+  while (encode_frame(&codec, NULL, -1, 1, 0, deadline, writer)) {
+  }
+
+  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec.");
+
+  aom_video_writer_close(writer);
+
+  printf("Pass 1 complete. Processed %d frames.\n", frame_count);
+}
+
+int main(int argc, char **argv) {
+  FILE *infile = NULL;
+  int w, h;
+  // The number of lightfield images in the u and v dimensions.
+  int lf_width, lf_height;
+  // Defines how many images refer to the same reference image for MCP.
+  // lf_blocksize X lf_blocksize images will all use the reference image
+  // in the middle of the block of images.
+  int lf_blocksize;
+  aom_codec_ctx_t codec;
+  aom_codec_enc_cfg_t cfg;
+  aom_image_t raw;
+  aom_codec_err_t res;
+  aom_fixed_buf_t stats;
+
+  const AvxInterface *encoder = NULL;
+  const int fps = 30;
+  const int bitrate = 200;  // kbit/s
+  const char *const width_arg = argv[1];
+  const char *const height_arg = argv[2];
+  const char *const infile_arg = argv[3];
+  const char *const outfile_arg = argv[4];
+  const char *const lf_width_arg = argv[5];
+  const char *const lf_height_arg = argv[6];
+  const char *lf_blocksize_arg = argv[7];
+  exec_name = argv[0];
+
+  if (argc < 8) die("Invalid number of arguments");
+
+  encoder = get_aom_encoder_by_name("av1");
+  if (!encoder) die("Unsupported codec.");
+
+  w = (int)strtol(width_arg, NULL, 0);
+  h = (int)strtol(height_arg, NULL, 0);
+  lf_width = (int)strtol(lf_width_arg, NULL, 0);
+  lf_height = (int)strtol(lf_height_arg, NULL, 0);
+  lf_blocksize = (int)strtol(lf_blocksize_arg, NULL, 0);
+  lf_blocksize = lf_blocksize < lf_width ? lf_blocksize : lf_width;
+  lf_blocksize = lf_blocksize < lf_height ? lf_blocksize : lf_height;
+
+  if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
+    die("Invalid frame size: %dx%d", w, h);
+  if (lf_width <= 0 || lf_height <= 0)
+    die("Invalid lf_width and/or lf_height: %dx%d", lf_width, lf_height);
+  if (lf_blocksize <= 0) die("Invalid lf_blocksize: %d", lf_blocksize);
+
+  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 1))
+    die("Failed to allocate image", w, h);
+
+  printf("Using %s\n", aom_codec_iface_name(encoder->codec_interface()));
+
+  // Configuration
+  res = aom_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
+
+  if (res) die_codec(&codec, "Failed to get default codec config.");
+
+  cfg.g_w = w;
+  cfg.g_h = h;
+  cfg.g_timebase.num = 1;
+  cfg.g_timebase.den = fps;
+  cfg.rc_target_bitrate = bitrate;
+  cfg.g_error_resilient = AOM_ERROR_RESILIENT_DEFAULT;
+  // Need to set lag_in_frames to 1 or 0.  Otherwise the frame flags get
+  // overridden after the first frame in encode_frame_to_data_rate() (see where
+  // get_frame_flags() is called).
+  cfg.g_lag_in_frames = 0;
+  cfg.kf_mode = AOM_KF_DISABLED;
+  cfg.large_scale_tile = 1;
+
+  if (!(infile = fopen(infile_arg, "rb")))
+    die("Failed to open %s for reading", infile_arg);
+
+  // Pass 0
+  cfg.g_pass = AOM_RC_FIRST_PASS;
+  stats = pass0(&raw, infile, encoder, &cfg, lf_width, lf_height, lf_blocksize);
+
+  // Pass 1
+  rewind(infile);
+  cfg.g_pass = AOM_RC_LAST_PASS;
+  cfg.rc_twopass_stats_in = stats;
+  pass1(&raw, infile, outfile_arg, encoder, &cfg, lf_width, lf_height,
+        lf_blocksize);
+  free(stats.buf);
+
+  aom_img_free(&raw);
+  fclose(infile);
+
+  return EXIT_SUCCESS;
+}
diff --git a/third_party/aom/test/ans_test.cc b/third_party/aom/test/ans_test.cc
index a553a9e84..fd460f409 100644
--- a/third_party/aom/test/ans_test.cc
+++ b/third_party/aom/test/ans_test.cc
@@ -51,7 +51,8 @@ PvVec abs_encode_build_vals(int iters) {
 
 bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
   BufAnsCoder a;
-  aom_buf_ans_alloc(&a, NULL, kBufAnsSize);
+  a.size = kBufAnsSize;
+  aom_buf_ans_alloc(&a, NULL);
   buf_ans_write_init(&a, buf);
 
   std::clock_t start = std::clock();
@@ -125,7 +126,8 @@ void rans_build_dec_tab(const struct rans_sym sym_tab[],
 bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
                 uint8_t *buf) {
   BufAnsCoder a;
-  aom_buf_ans_alloc(&a, NULL, kBufAnsSize);
+  a.size = kBufAnsSize;
+  aom_buf_ans_alloc(&a, NULL);
   buf_ans_write_init(&a, buf);
   aom_cdf_prob dec_tab[kRansSymbols];
   rans_build_dec_tab(tab, dec_tab);
diff --git a/third_party/aom/test/aq_segment_test.cc b/third_party/aom/test/aq_segment_test.cc
index 026b0022b..57db0d0ff 100644
--- a/third_party/aom/test/aq_segment_test.cc
+++ b/third_party/aom/test/aq_segment_test.cc
@@ -90,7 +90,7 @@ TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ2) { DoTest(2); }
 
 TEST_P(AqSegmentTestLarge, TestNoMisMatchAQ3) { DoTest(3); }
 
-#if CONFIG_DELTA_Q & !CONFIG_EXT_DELTA_Q
+#if !CONFIG_EXT_DELTA_Q
 // Validate that this AQ mode (AQ=4, delta q)
 // encodes and decodes without a mismatch.
 TEST_P(AqSegmentTest, TestNoMisMatchAQ4) {
diff --git a/third_party/aom/test/av1_convolve_2d_test.cc b/third_party/aom/test/av1_convolve_2d_test.cc
index b066dd4f8..002ede403 100644
--- a/third_party/aom/test/av1_convolve_2d_test.cc
+++ b/third_party/aom/test/av1_convolve_2d_test.cc
@@ -22,14 +22,14 @@ using libaom_test::AV1HighbdConvolve2D::AV1HighbdConvolve2DTest;
 
 namespace {
 
-TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
+TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(2)); }
 
 INSTANTIATE_TEST_CASE_P(
     SSE2, AV1Convolve2DTest,
     libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sse2));
 
 #if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
-TEST_P(AV1HighbdConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(4)); }
+TEST_P(AV1HighbdConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
 
 INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdConvolve2DTest,
                         libaom_test::AV1HighbdConvolve2D::BuildParams(
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.cc b/third_party/aom/test/av1_convolve_2d_test_util.cc
index 8cec216af..3b61f6bb7 100644
--- a/third_party/aom/test/av1_convolve_2d_test_util.cc
+++ b/third_party/aom/test/av1_convolve_2d_test_util.cc
@@ -23,9 +23,9 @@ namespace AV1Convolve2D {
 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
     convolve_2d_func filter) {
   const Convolve2DParam params[] = {
-    make_tuple(4, 4, 20, filter),  make_tuple(8, 8, 10, filter),
-    make_tuple(64, 64, 1, filter), make_tuple(4, 16, 10, filter),
-    make_tuple(32, 8, 5, filter),
+    make_tuple(4, 4, filter),   make_tuple(8, 8, filter),
+    make_tuple(64, 64, filter), make_tuple(4, 16, filter),
+    make_tuple(32, 8, filter),
   };
   return ::testing::ValuesIn(params);
 }
@@ -38,7 +38,6 @@ void AV1Convolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
 void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
   const int w = 128, h = 128;
   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
-  const int num_iters = GET_PARAM(2);
   int i, j, k;
 
   uint8_t *input = new uint8_t[h * w];
@@ -50,9 +49,6 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
   for (i = 0; i < h; ++i)
     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
 
-  memset(output, 0, output_n * sizeof(CONV_BUF_TYPE));
-  memset(output2, 0, output_n * sizeof(CONV_BUF_TYPE));
-
   int hfilter, vfilter, subx, suby;
   for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
     for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
@@ -60,13 +56,20 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
           av1_get_interp_filter_params((InterpFilter)hfilter);
       InterpFilterParams filter_params_y =
           av1_get_interp_filter_params((InterpFilter)vfilter);
+      const int do_average = rnd_.Rand8() & 1;
       ConvolveParams conv_params1 =
-          get_conv_params_no_round(0, 0, 0, output, MAX_SB_SIZE);
+          get_conv_params_no_round(0, do_average, 0, output, MAX_SB_SIZE);
       ConvolveParams conv_params2 =
-          get_conv_params_no_round(0, 0, 0, output2, MAX_SB_SIZE);
+          get_conv_params_no_round(0, do_average, 0, output2, MAX_SB_SIZE);
 
       for (subx = 0; subx < 16; ++subx)
         for (suby = 0; suby < 16; ++suby) {
+          // av1_convolve_2d is designed for accumulate two predicted blocks for
+          // compound mode, so we set num_iter to two here.
+          // A larger number may introduce overflow
+          const int num_iters = 2;
+          memset(output, 0, output_n * sizeof(*output));
+          memset(output2, 0, output_n * sizeof(*output2));
           for (i = 0; i < num_iters; ++i) {
             // Choose random locations within the source block
             int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
@@ -102,14 +105,14 @@ namespace AV1HighbdConvolve2D {
 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
     highbd_convolve_2d_func filter) {
   const HighbdConvolve2DParam params[] = {
-    make_tuple(4, 4, 20, 8, filter),   make_tuple(8, 8, 10, 8, filter),
-    make_tuple(64, 64, 1, 8, filter),  make_tuple(4, 16, 10, 8, filter),
-    make_tuple(32, 8, 10, 8, filter),  make_tuple(4, 4, 20, 10, filter),
-    make_tuple(8, 8, 10, 10, filter),  make_tuple(64, 64, 1, 10, filter),
-    make_tuple(4, 16, 10, 10, filter), make_tuple(32, 8, 10, 10, filter),
-    make_tuple(4, 4, 20, 12, filter),  make_tuple(8, 8, 10, 12, filter),
-    make_tuple(64, 64, 1, 12, filter), make_tuple(4, 16, 10, 12, filter),
-    make_tuple(32, 8, 10, 12, filter),
+    make_tuple(4, 4, 8, filter),    make_tuple(8, 8, 8, filter),
+    make_tuple(64, 64, 8, filter),  make_tuple(4, 16, 8, filter),
+    make_tuple(32, 8, 8, filter),   make_tuple(4, 4, 10, filter),
+    make_tuple(8, 8, 10, filter),   make_tuple(64, 64, 10, filter),
+    make_tuple(4, 16, 10, filter),  make_tuple(32, 8, 10, filter),
+    make_tuple(4, 4, 12, filter),   make_tuple(8, 8, 12, filter),
+    make_tuple(64, 64, 12, filter), make_tuple(4, 16, 12, filter),
+    make_tuple(32, 8, 12, filter),
   };
   return ::testing::ValuesIn(params);
 }
@@ -125,8 +128,7 @@ void AV1HighbdConvolve2DTest::RunCheckOutput(
     highbd_convolve_2d_func test_impl) {
   const int w = 128, h = 128;
   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
-  const int num_iters = GET_PARAM(2);
-  const int bd = GET_PARAM(3);
+  const int bd = GET_PARAM(2);
   int i, j, k;
 
   uint16_t *input = new uint16_t[h * w];
@@ -138,9 +140,6 @@ void AV1HighbdConvolve2DTest::RunCheckOutput(
   for (i = 0; i < h; ++i)
     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
 
-  memset(output, 0, output_n * sizeof(CONV_BUF_TYPE));
-  memset(output2, 0, output_n * sizeof(CONV_BUF_TYPE));
-
   int hfilter, vfilter, subx, suby;
   for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
     for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
@@ -155,6 +154,12 @@ void AV1HighbdConvolve2DTest::RunCheckOutput(
 
       for (subx = 0; subx < 16; ++subx)
         for (suby = 0; suby < 16; ++suby) {
+          // av1_convolve_2d is designed for accumulate two predicted blocks for
+          // compound mode, so we set num_iter to two here.
+          // A larger number may introduce overflow
+          const int num_iters = 2;
+          memset(output, 0, output_n * sizeof(*output));
+          memset(output2, 0, output_n * sizeof(*output2));
           for (i = 0; i < num_iters; ++i) {
             // Choose random locations within the source block
             int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
diff --git a/third_party/aom/test/av1_convolve_2d_test_util.h b/third_party/aom/test/av1_convolve_2d_test_util.h
index ed0eeb450..013126b4a 100644
--- a/third_party/aom/test/av1_convolve_2d_test_util.h
+++ b/third_party/aom/test/av1_convolve_2d_test_util.h
@@ -31,7 +31,7 @@ typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
                                  const int subpel_x_q4, const int subpel_y_q4,
                                  ConvolveParams *conv_params);
 
-typedef std::tr1::tuple<int, int, int, convolve_2d_func> Convolve2DParam;
+typedef std::tr1::tuple<int, int, convolve_2d_func> Convolve2DParam;
 
 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
     convolve_2d_func filter);
@@ -59,7 +59,7 @@ typedef void (*highbd_convolve_2d_func)(
     InterpFilterParams *filter_params_y, const int subpel_x_q4,
     const int subpel_y_q4, ConvolveParams *conv_params, int bd);
 
-typedef std::tr1::tuple<int, int, int, int, highbd_convolve_2d_func>
+typedef std::tr1::tuple<int, int, int, highbd_convolve_2d_func>
     HighbdConvolve2DParam;
 
 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
diff --git a/third_party/aom/test/av1_convolve_scale_test.cc b/third_party/aom/test/av1_convolve_scale_test.cc
new file mode 100644
index 000000000..9d8be888d
--- /dev/null
+++ b/third_party/aom/test/av1_convolve_scale_test.cc
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <vector>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./av1_rtcd.h"
+#include "aom_ports/aom_timer.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+namespace {
+const int kTestIters = 10;
+const int kPerfIters = 1000;
+
+const int kVPad = 32;
+const int kHPad = 32;
+const int kXStepQn = 16;
+const int kYStepQn = 20;
+
+using std::tr1::tuple;
+using std::tr1::make_tuple;
+using libaom_test::ACMRandom;
+
+enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
+int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
+
+// A 16-bit filter with a configurable number of taps.
+class TestFilter {
+ public:
+  void set(NTaps ntaps, bool backwards);
+
+  InterpFilterParams params_;
+
+ private:
+  std::vector<int16_t> coeffs_;
+};
+
+void TestFilter::set(NTaps ntaps, bool backwards) {
+  const int n = NTapsToInt(ntaps);
+  assert(n >= 8 && n <= 12);
+
+  // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
+  // elements at the end so that convolutions can read off the end safely.
+  coeffs_.resize(n * SUBPEL_SHIFTS + 8);
+
+  // The coefficients are pretty much arbitrary, but convolutions shouldn't
+  // over or underflow. For the first filter (subpels = 0), we use an
+  // increasing or decreasing ramp (depending on the backwards parameter). We
+  // don't want any zero coefficients, so we make it have an x-intercept at -1
+  // or n. To ensure absence of under/overflow, we normalise the area under the
+  // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
+  // gives the identity).
+  //
+  // When increasing, the function has the form:
+  //
+  //   f(x) = A * (x + 1)
+  //
+  // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
+  // filter is reversed, we have the same A but with formula
+  //
+  //   g(x) = A * (n - x)
+  const int I = 1 << FILTER_BITS;
+  const float A = 2.f * I / (n * (n + 1.f));
+  for (int i = 0; i < n; ++i) {
+    coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
+  }
+
+  // For the other filters, make them slightly different by swapping two
+  // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
+  const size_t filter_size = sizeof(coeffs_[0] * n);
+  int16_t *const filter0 = &coeffs_[0];
+  for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
+    int16_t *filterk = &coeffs_[k * n];
+    memcpy(filterk, filter0, filter_size);
+
+    const int idx0 = k % n;
+    const int idx1 = (7 * k) % n;
+
+    const int16_t tmp = filterk[idx0];
+    filterk[idx0] = filterk[idx1];
+    filterk[idx1] = tmp;
+  }
+
+  // Finally, write some rubbish at the end to make sure we don't use it.
+  for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
+
+  // Fill in params
+  params_.filter_ptr = &coeffs_[0];
+  params_.taps = n;
+  // These are ignored by the functions being tested. Set them to whatever.
+  params_.subpel_shifts = SUBPEL_SHIFTS;
+  params_.interp_filter = EIGHTTAP_REGULAR;
+}
+
+template <typename SrcPixel>
+class TestImage {
+ public:
+  TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
+    assert(bd < 16);
+    assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
+
+    // Pad width by 2*kHPad and then round up to the next multiple of 16
+    // to get src_stride_. Add another 16 for dst_stride_ (to make sure
+    // something goes wrong if we use the wrong one)
+    src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
+    dst_stride_ = src_stride_ + 16;
+
+    // Allocate image data
+    src_data_.resize(2 * src_block_size());
+    dst_data_.resize(2 * dst_block_size());
+  }
+
+  void Initialize(ACMRandom *rnd);
+  void Check() const;
+
+  int src_stride() const { return src_stride_; }
+  int dst_stride() const { return dst_stride_; }
+
+  int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
+  int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
+
+  const SrcPixel *GetSrcData(bool ref, bool borders) const {
+    const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
+    return borders ? block : block + kHPad + src_stride_ * kVPad;
+  }
+
+  int32_t *GetDstData(bool ref, bool borders) {
+    int32_t *block = &dst_data_[ref ? 0 : dst_block_size()];
+    return borders ? block : block + kHPad + dst_stride_ * kVPad;
+  }
+
+ private:
+  int w_, h_, bd_;
+  int src_stride_, dst_stride_;
+
+  std::vector<SrcPixel> src_data_;
+  std::vector<int32_t> dst_data_;
+};
+
+template <typename Pixel>
+void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
+  if (!trash) {
+    memset(data, 0, sizeof(*data) * num_pixels);
+    return;
+  }
+  const Pixel mask = (1 << bd) - 1;
+  for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
+}
+
+template <typename Pixel>
+void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
+                 bool trash_edges, Pixel *data) {
+  assert(rnd);
+  const Pixel mask = (1 << bd) - 1;
+
+  // Fill in the first buffer with random data
+  // Top border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
+  for (int r = 0; r < h; ++r) {
+    Pixel *row_data = data + (kVPad + r) * stride;
+    // Left border, contents, right border
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data);
+    for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
+    FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
+  }
+  // Bottom border
+  FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
+
+  const int bpp = sizeof(*data);
+  const int block_elts = stride * (h + 2 * kVPad);
+  const int block_size = bpp * block_elts;
+
+  // Now copy that to the second buffer
+  memcpy(data + block_elts, data, block_size);
+}
+
+template <typename SrcPixel>
+void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
+  PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
+  PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
+}
+
+template <typename SrcPixel>
+void TestImage<SrcPixel>::Check() const {
+  // If memcmp returns 0, there's nothing to do.
+  const int num_pixels = dst_block_size();
+  const int32_t *ref_dst = &dst_data_[0];
+  const int32_t *tst_dst = &dst_data_[num_pixels];
+
+  if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return;
+
+  // Otherwise, iterate through the buffer looking for differences (including
+  // the edges)
+  const int stride = dst_stride_;
+  for (int r = 0; r < h_ + 2 * kVPad; ++r) {
+    for (int c = 0; c < w_ + 2 * kHPad; ++c) {
+      const int32_t ref_value = ref_dst[r * stride + c];
+      const int32_t tst_value = tst_dst[r * stride + c];
+
+      EXPECT_EQ(tst_value, ref_value)
+          << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
+    }
+  }
+}
+
+typedef tuple<int, int> BlockDimension;
+
+struct BaseParams {
+  BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg)
+      : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {}
+
+  BlockDimension dims;
+  NTaps ntaps_x, ntaps_y;
+  bool avg;
+};
+
+template <typename SrcPixel>
+class ConvolveScaleTestBase : public ::testing::Test {
+ public:
+  ConvolveScaleTestBase() : image_(NULL) {}
+  virtual ~ConvolveScaleTestBase() { delete image_; }
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+  // Implemented by subclasses (SetUp depends on the parameters passed
+  // in and RunOne depends on the function to be tested. These can't
+  // be templated for low/high bit depths because they have different
+  // numbers of parameters)
+  virtual void SetUp() = 0;
+  virtual void RunOne(bool ref) = 0;
+
+ protected:
+  void SetParams(const BaseParams &params, int bd) {
+    width_ = std::tr1::get<0>(params.dims);
+    height_ = std::tr1::get<1>(params.dims);
+    ntaps_x_ = params.ntaps_x;
+    ntaps_y_ = params.ntaps_y;
+    bd_ = bd;
+    avg_ = params.avg;
+
+    filter_x_.set(ntaps_x_, false);
+    filter_y_.set(ntaps_y_, true);
+    convolve_params_ = get_conv_params_no_round(0, avg_ != false, 0, NULL, 0);
+
+    delete image_;
+    image_ = new TestImage<SrcPixel>(width_, height_, bd_);
+  }
+
+  void Run() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int i = 0; i < kTestIters; ++i) {
+      Prep(&rnd);
+      RunOne(true);
+      RunOne(false);
+      image_->Check();
+    }
+  }
+
+  void SpeedTest() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    Prep(&rnd);
+
+    aom_usec_timer ref_timer;
+    aom_usec_timer_start(&ref_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(true);
+    aom_usec_timer_mark(&ref_timer);
+    const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
+
+    aom_usec_timer tst_timer;
+    aom_usec_timer_start(&tst_timer);
+    for (int i = 0; i < kPerfIters; ++i) RunOne(false);
+    aom_usec_timer_mark(&tst_timer);
+    const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
+
+    std::cout << "[          ] C time = " << ref_time / 1000
+              << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
+
+    EXPECT_GT(ref_time, tst_time)
+        << "Error: CDEFSpeedTest, SIMD slower than C.\n"
+        << "C time: " << ref_time << " us\n"
+        << "SIMD time: " << tst_time << " us\n";
+  }
+
+  static int RandomSubpel(ACMRandom *rnd) {
+    const uint8_t subpel_mode = rnd->Rand8();
+    if ((subpel_mode & 7) == 0) {
+      return 0;
+    } else if ((subpel_mode & 7) == 1) {
+      return SCALE_SUBPEL_SHIFTS - 1;
+    } else {
+      return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
+    }
+  }
+
+  void Prep(ACMRandom *rnd) {
+    assert(rnd);
+
+    // Choose subpel_x_ and subpel_y_. They should be less than
+    // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
+    // values: 0 and SCALE_SUBPEL_SHIFTS - 1
+    subpel_x_ = RandomSubpel(rnd);
+    subpel_y_ = RandomSubpel(rnd);
+
+    image_->Initialize(rnd);
+  }
+
+  int width_, height_, bd_;
+  NTaps ntaps_x_, ntaps_y_;
+  bool avg_;
+  int subpel_x_, subpel_y_;
+  TestFilter filter_x_, filter_y_;
+  TestImage<SrcPixel> *image_;
+  ConvolveParams convolve_params_;
+};
+
+typedef tuple<int, int> BlockDimension;
+
+typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
+                                  int32_t *dst, int dst_stride, int w, int h,
+                                  InterpFilterParams *filter_params_x,
+                                  InterpFilterParams *filter_params_y,
+                                  const int subpel_x_qn, const int x_step_qn,
+                                  const int subpel_y_qn, const int y_step_qn,
+                                  ConvolveParams *conv_params);
+
+// Test parameter list:
+//  <tst_fun, dims, ntaps_x, ntaps_y, avg>
+typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
+    LowBDParams;
+
+class LowBDConvolveScaleTest
+    : public ConvolveScaleTestBase<uint8_t>,
+      public ::testing::WithParamInterface<LowBDParams> {
+ public:
+  virtual ~LowBDConvolveScaleTest() {}
+
+  void SetUp() {
+    tst_fun_ = GET_PARAM(0);
+
+    const BlockDimension &block = GET_PARAM(1);
+    const NTaps ntaps_x = GET_PARAM(2);
+    const NTaps ntaps_y = GET_PARAM(3);
+    const int bd = 8;
+    const bool avg = GET_PARAM(4);
+
+    SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
+  }
+
+  void RunOne(bool ref) {
+    const uint8_t *src = image_->GetSrcData(ref, false);
+    CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+
+    if (ref) {
+      av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
+                              &filter_x_.params_, &filter_y_.params_, subpel_x_,
+                              kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
+               &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
+               subpel_y_, kYStepQn, &convolve_params_);
+    }
+  }
+
+ private:
+  LowbdConvolveFunc tst_fun_;
+};
+
+const BlockDimension kBlockDim[] = {
+  make_tuple(2, 2),    make_tuple(2, 4),    make_tuple(4, 4),
+  make_tuple(4, 8),    make_tuple(8, 4),    make_tuple(8, 8),
+  make_tuple(8, 16),   make_tuple(16, 8),   make_tuple(16, 16),
+  make_tuple(16, 32),  make_tuple(32, 16),  make_tuple(32, 32),
+  make_tuple(32, 64),  make_tuple(64, 32),  make_tuple(64, 64),
+  make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
+};
+
+const NTaps kNTaps[] = { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
+
+TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
+TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, LowBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool()));
+
+#if CONFIG_HIGHBITDEPTH
+typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
+                                   int32_t *dst, int dst_stride, int w, int h,
+                                   InterpFilterParams *filter_params_x,
+                                   InterpFilterParams *filter_params_y,
+                                   const int subpel_x_qn, const int x_step_qn,
+                                   const int subpel_y_qn, const int y_step_qn,
+                                   ConvolveParams *conv_params, int bd);
+
+// Test parameter list:
+//  <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
+typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
+    HighBDParams;
+
+class HighBDConvolveScaleTest
+    : public ConvolveScaleTestBase<uint16_t>,
+      public ::testing::WithParamInterface<HighBDParams> {
+ public:
+  virtual ~HighBDConvolveScaleTest() {}
+
+  void SetUp() {
+    tst_fun_ = GET_PARAM(0);
+
+    const BlockDimension &block = GET_PARAM(1);
+    const NTaps ntaps_x = GET_PARAM(2);
+    const NTaps ntaps_y = GET_PARAM(3);
+    const bool avg = GET_PARAM(4);
+    const int bd = GET_PARAM(5);
+
+    SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
+  }
+
+  void RunOne(bool ref) {
+    const uint16_t *src = image_->GetSrcData(ref, false);
+    CONV_BUF_TYPE *dst = image_->GetDstData(ref, false);
+    const int src_stride = image_->src_stride();
+    const int dst_stride = image_->dst_stride();
+
+    if (ref) {
+      av1_highbd_convolve_2d_scale_c(
+          src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
+          &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
+          &convolve_params_, bd_);
+    } else {
+      tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
+               &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
+               subpel_y_, kYStepQn, &convolve_params_, bd_);
+    }
+  }
+
+ private:
+  HighbdConvolveFunc tst_fun_;
+};
+
+const int kBDs[] = { 8, 10, 12 };
+
+TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
+TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
+
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, HighBDConvolveScaleTest,
+    ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
+                       ::testing::ValuesIn(kBlockDim),
+                       ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
+                       ::testing::Bool(), ::testing::ValuesIn(kBDs)));
+
+#endif  // CONFIG_HIGHBITDEPTH
+}  // namespace
diff --git a/third_party/aom/test/av1_convolve_test.cc b/third_party/aom/test/av1_convolve_test.cc
index 3947c7166..aaef7cfe0 100644
--- a/third_party/aom/test/av1_convolve_test.cc
+++ b/third_party/aom/test/av1_convolve_test.cc
@@ -269,16 +269,9 @@ INSTANTIATE_TEST_CASE_P(
 #ifndef __clang_analyzer__
 TEST(AV1ConvolveTest, av1_highbd_convolve) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-#if CONFIG_DUAL_FILTER
-  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
-                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
+  InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
   InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter[0]);
-#else
-  InterpFilter interp_filter = EIGHTTAP_REGULAR;
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter);
-#endif
+      av1_get_interp_filter_params(EIGHTTAP_REGULAR);
   int filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint16_t src[12 * 12];
@@ -303,7 +296,7 @@ TEST(AV1ConvolveTest, av1_highbd_convolve) {
     for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
       av1_highbd_convolve(
           CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
-          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
+          src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filters,
           subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
 
       const int16_t *x_filter =
@@ -331,16 +324,9 @@ TEST(AV1ConvolveTest, av1_highbd_convolve) {
 
 TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-#if CONFIG_DUAL_FILTER
-  InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
-                                    EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
+  InterpFilters interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
   InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter[0]);
-#else
-  InterpFilter interp_filter = EIGHTTAP_REGULAR;
-  InterpFilterParams filter_params =
-      av1_get_interp_filter_params(interp_filter);
-#endif
+      av1_get_interp_filter_params(EIGHTTAP_REGULAR);
   int filter_size = filter_params.taps;
   int filter_center = filter_size / 2 - 1;
   uint16_t src0[12 * 12];
@@ -373,23 +359,23 @@ TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
       avg = 0;
       av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                           CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
-                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
                           y_step_q4, avg, bd);
       avg = 0;
       av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                           CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
-                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
                           y_step_q4, avg, bd);
 
       avg = 0;
       av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
-                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
                           y_step_q4, avg, bd);
       avg = 1;
       av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
                           CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
-                          interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
+                          interp_filters, subpel_x_q4, x_step_q4, subpel_y_q4,
                           y_step_q4, avg, bd);
 
       EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
diff --git a/third_party/aom/test/av1_dct_test.cc b/third_party/aom/test/av1_dct_test.cc
index 8ce7a79d4..fdaf9abb9 100644
--- a/third_party/aom/test/av1_dct_test.cc
+++ b/third_party/aom/test/av1_dct_test.cc
@@ -23,7 +23,8 @@
 #define CONFIG_COEFFICIENT_RANGE_CHECKING 1
 #define AV1_DCT_GTEST
 #include "av1/encoder/dct.c"
-#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8
+#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
+    CONFIG_DAALA_DCT32
 #include "av1/common/daala_tx.c"
 #endif
 
diff --git a/third_party/aom/test/av1_fht16x16_test.cc b/third_party/aom/test/av1_fht16x16_test.cc
index c0f6974c6..21235a837 100644
--- a/third_party/aom/test/av1_fht16x16_test.cc
+++ b/third_party/aom/test/av1_fht16x16_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x16Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x16Param;
 
 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   TxfmParam *txfm_param) {
@@ -42,15 +42,15 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
 
 #if CONFIG_HIGHBITDEPTH
 typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           int tx_type, int bd);
+                           TX_TYPE tx_type, int bd);
 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd);
+                          TX_TYPE tx_type, int bd);
 
 // Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, int, int> HighbdHt16x16Param;
+typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt16x16Param;
 
 void highbd_fht16x16_ref(const int16_t *in, int32_t *out, int stride,
-                         int tx_type, int bd) {
+                         TX_TYPE tx_type, int bd) {
   av1_fwd_txfm2d_16x16_c(in, out, stride, tx_type, bd);
 }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -128,7 +128,7 @@ class AV1HighbdTrans16x16HT
  private:
   HbdHtFunc fwd_txfm_;
   HbdHtFunc fwd_txfm_ref_;
-  int tx_type_;
+  TX_TYPE tx_type_;
   int bit_depth_;
   int mask_;
   int num_coeffs_;
@@ -164,113 +164,113 @@ TEST_P(AV1HighbdTrans16x16HT, HighbdCoeffCheck) { RunBitexactCheck(); }
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && !CONFIG_DAALA_DCT16
 const Ht16x16Param kArrayHt16x16Param_sse2[] = {
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 0, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 1, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 2, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 3, AOM_BITS_8,
-             256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_ADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_ADST,
+             AOM_BITS_8, 256),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 4, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 5, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 6, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 7, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 8, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 9, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 10, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, IDTX, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 11, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_DCT, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 12, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_DCT, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 13, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_ADST, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 14, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_ADST, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 15, AOM_BITS_8,
-             256)
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, V_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, H_FLIPADST,
+             AOM_BITS_8, 256)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x16HT,
                         ::testing::ValuesIn(kArrayHt16x16Param_sse2));
 #endif  // HAVE_SSE2
 
-#if HAVE_AVX2
+#if HAVE_AVX2 && !CONFIG_DAALA_DCT16
 const Ht16x16Param kArrayHt16x16Param_avx2[] = {
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 0, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 1, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 2, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 3, AOM_BITS_8,
-             256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_ADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_ADST,
+             AOM_BITS_8, 256),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 4, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 5, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 6, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 7, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 8, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 9, AOM_BITS_8,
-             256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 10, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_DCT,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, DCT_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, ADST_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, FLIPADST_ADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, IDTX, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 11, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_DCT, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 12, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_DCT, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 13, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_ADST, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 14, AOM_BITS_8,
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_ADST, AOM_BITS_8,
              256),
-  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 15, AOM_BITS_8,
-             256)
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, V_FLIPADST,
+             AOM_BITS_8, 256),
+  make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, H_FLIPADST,
+             AOM_BITS_8, 256)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans16x16HT,
                         ::testing::ValuesIn(kArrayHt16x16Param_avx2));
 #endif  // HAVE_AVX2
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT16
 const HighbdHt16x16Param kArrayHBDHt16x16Param_sse4_1[] = {
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 0, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 1, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 2, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 3, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_ADST, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_ADST, 12),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 4, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 5, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 6, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 7, 12),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 10),
-  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, 8, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, DCT_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, ADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_16x16_sse4_1, FLIPADST_ADST, 12),
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans16x16HT,
                         ::testing::ValuesIn(kArrayHBDHt16x16Param_sse4_1));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT16
 
 }  // namespace
diff --git a/third_party/aom/test/av1_fht16x32_test.cc b/third_party/aom/test/av1_fht16x32_test.cc
index 099a312e3..0b3928f64 100644
--- a/third_party/aom/test/av1_fht16x32_test.cc
+++ b/third_party/aom/test/av1_fht16x32_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x32Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x32Param;
 
 void fht16x32_ref(const int16_t *in, tran_low_t *out, int stride,
                   TxfmParam *txfm_param) {
@@ -80,23 +80,34 @@ TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
 
 using std::tr1::make_tuple;
 const Ht16x32Param kArrayHt16x32Param_c[] = {
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 0, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 1, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 2, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 3, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_ADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_ADST, AOM_BITS_8,
+             512),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 4, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 5, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 6, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 7, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 8, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 9, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 10, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 11, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 12, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 13, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 14, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, 15, AOM_BITS_8, 512)
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_FLIPADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, IDTX, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_DCT, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_DCT, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_ADST, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_ADST, AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_FLIPADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_FLIPADST, AOM_BITS_8,
+             512)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
@@ -104,39 +115,39 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
 
 #if HAVE_SSE2
 const Ht16x32Param kArrayHt16x32Param_sse2[] = {
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 0, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 1, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 2, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 3, AOM_BITS_8,
-             512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_ADST,
+             AOM_BITS_8, 512),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 4, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 5, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 6, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 7, AOM_BITS_8,
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, IDTX, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 8, AOM_BITS_8,
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_DCT, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 9, AOM_BITS_8,
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_DCT, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 10, AOM_BITS_8,
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_ADST, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 11, AOM_BITS_8,
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_ADST, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 12, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 13, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 14, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, 15, AOM_BITS_8,
-             512)
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_FLIPADST,
+             AOM_BITS_8, 512)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x32HT,
diff --git a/third_party/aom/test/av1_fht16x8_test.cc b/third_party/aom/test/av1_fht16x8_test.cc
index 8277e2865..3ee1a0830 100644
--- a/third_party/aom/test/av1_fht16x8_test.cc
+++ b/third_party/aom/test/av1_fht16x8_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht16x8Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x8Param;
 
 void fht16x8_ref(const int16_t *in, tran_low_t *out, int stride,
                  TxfmParam *txfm_param) {
@@ -81,23 +81,31 @@ TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 using std::tr1::make_tuple;
 
 const Ht16x8Param kArrayHt16x8Param_c[] = {
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 0, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 1, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 2, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 3, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_ADST, AOM_BITS_8,
+             128),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 4, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 5, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 6, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 7, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 8, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 9, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 10, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 11, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 12, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 13, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 14, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, 15, AOM_BITS_8, 128)
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, DCT_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, ADST_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, FLIPADST_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, IDTX, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, V_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_c, &av1_iht16x8_128_add_c, H_FLIPADST, AOM_BITS_8,
+             128)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans16x8HT,
@@ -105,23 +113,39 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans16x8HT,
 
 #if HAVE_SSE2
 const Ht16x8Param kArrayHt16x8Param_sse2[] = {
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 0, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 1, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 2, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 3, AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_ADST,
+             AOM_BITS_8, 128),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 4, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 5, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 6, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 7, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 8, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 9, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 10, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 11, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 12, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 13, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 14, AOM_BITS_8, 128),
-  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, 15, AOM_BITS_8, 128)
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, IDTX, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, V_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht16x8_sse2, &av1_iht16x8_128_add_sse2, H_FLIPADST,
+             AOM_BITS_8, 128)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x8HT,
diff --git a/third_party/aom/test/av1_fht32x16_test.cc b/third_party/aom/test/av1_fht32x16_test.cc
index 1c70fd4fc..cbce074e5 100644
--- a/third_party/aom/test/av1_fht32x16_test.cc
+++ b/third_party/aom/test/av1_fht32x16_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x16Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht32x16Param;
 
 void fht32x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   TxfmParam *txfm_param) {
@@ -80,23 +80,34 @@ TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
 
 using std::tr1::make_tuple;
 const Ht32x16Param kArrayHt32x16Param_c[] = {
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 0, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 1, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 2, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 3, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_ADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_ADST, AOM_BITS_8,
+             512),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 4, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 5, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 6, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 7, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 8, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 9, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 10, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 11, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 12, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 13, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 14, AOM_BITS_8, 512),
-  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, 15, AOM_BITS_8, 512)
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_DCT, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, DCT_FLIPADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, ADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, FLIPADST_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, IDTX, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_DCT, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_DCT, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_ADST, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_ADST, AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, V_FLIPADST, AOM_BITS_8,
+             512),
+  make_tuple(&av1_fht32x16_c, &av1_iht32x16_512_add_c, H_FLIPADST, AOM_BITS_8,
+             512)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT,
@@ -104,39 +115,39 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans32x16HT,
 
 #if HAVE_SSE2
 const Ht32x16Param kArrayHt32x16Param_sse2[] = {
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 0, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 1, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 2, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 3, AOM_BITS_8,
-             512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_ADST,
+             AOM_BITS_8, 512),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 4, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 5, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 6, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 7, AOM_BITS_8,
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, IDTX, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 8, AOM_BITS_8,
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_DCT, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 9, AOM_BITS_8,
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_DCT, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 10, AOM_BITS_8,
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_ADST, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 11, AOM_BITS_8,
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_ADST, AOM_BITS_8,
              512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 12, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 13, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 14, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, 15, AOM_BITS_8,
-             512)
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, V_FLIPADST,
+             AOM_BITS_8, 512),
+  make_tuple(&av1_fht32x16_sse2, &av1_iht32x16_512_add_sse2, H_FLIPADST,
+             AOM_BITS_8, 512)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x16HT,
diff --git a/third_party/aom/test/av1_fht32x32_test.cc b/third_party/aom/test/av1_fht32x32_test.cc
index e96ffffc2..613bc9183 100644
--- a/third_party/aom/test/av1_fht32x32_test.cc
+++ b/third_party/aom/test/av1_fht32x32_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht32x32Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht32x32Param;
 
 void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride,
                   TxfmParam *txfm_param) {
@@ -37,20 +37,20 @@ void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride,
 
 #if CONFIG_HIGHBITDEPTH
 typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           int tx_type, int bd);
+                           TX_TYPE tx_type, int bd);
 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd);
+                          TX_TYPE tx_type, int bd);
 
 // Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, int, int> HighbdHt32x32Param;
+typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt32x32Param;
 
 void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
-                         int tx_type, int bd) {
+                         TX_TYPE tx_type, int bd) {
   av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd);
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-#if HAVE_SSE2 || HAVE_AVX2
+#if (HAVE_SSE2 || HAVE_AVX2) && !CONFIG_DAALA_DCT32
 void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
                     const TxfmParam *txfm_param) {
   (void)in;
@@ -129,7 +129,7 @@ class AV1HighbdTrans32x32HT
  private:
   HbdHtFunc fwd_txfm_;
   HbdHtFunc fwd_txfm_ref_;
-  int tx_type_;
+  TX_TYPE tx_type_;
   int bit_depth_;
   int mask_;
   int num_coeffs_;
@@ -165,53 +165,63 @@ TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); }
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && !CONFIG_DAALA_DCT32
 const Ht32x32Param kArrayHt32x32Param_sse2[] = {
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024)
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x32HT,
                         ::testing::ValuesIn(kArrayHt32x32Param_sse2));
-#endif  // HAVE_SSE2
+#endif  // HAVE_SSE2 && !CONFIG_DAALA_DCT32
 
-#if HAVE_AVX2
+#if HAVE_AVX2 && !CONFIG_DAALA_DCT32
 const Ht32x32Param kArrayHt32x32Param_avx2[] = {
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 0, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 1, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 2, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 3, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 4, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 5, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 13, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 14, AOM_BITS_8, 1024),
-  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 15, AOM_BITS_8, 1024)
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
+             1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
+  make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT,
                         ::testing::ValuesIn(kArrayHt32x32Param_avx2));
-#endif  // HAVE_AVX2
+#endif  // HAVE_AVX2 && !CONFIG_DAALA_DCT32
 }  // namespace
diff --git a/third_party/aom/test/av1_fht4x4_test.cc b/third_party/aom/test/av1_fht4x4_test.cc
index f49d7368d..1d4fc1352 100644
--- a/third_party/aom/test/av1_fht4x4_test.cc
+++ b/third_party/aom/test/av1_fht4x4_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x4Param;
 
 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 TxfmParam *txfm_param) {
@@ -42,16 +42,16 @@ void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride,
 
 #if CONFIG_HIGHBITDEPTH
 typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                              int tx_type, int bd);
+                              TX_TYPE tx_type, int bd);
 typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
-                           int tx_type, int bd);
+                           TX_TYPE tx_type, int bd);
 
 // HighbdHt4x4Param argument list:
 // <Target optimized function, tx_type, bit depth>
-typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param;
+typedef tuple<HBDFhtFunc, TX_TYPE, int> HighbdHt4x4Param;
 
-void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride, int tx_type,
-                       int bd) {
+void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
+                       TX_TYPE tx_type, int bd) {
   av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
 }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -131,7 +131,7 @@ class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
  private:
   HBDFhtFunc fwd_txfm_;
   HBDFhtFunc fwd_txfm_ref_;
-  int tx_type_;
+  TX_TYPE tx_type_;
   int bit_depth_;
   int mask_;
   int num_coeffs_;
@@ -167,58 +167,69 @@ TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); }
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && !CONFIG_DAALA_DCT4
 const Ht4x4Param kArrayHt4x4Param_sse2[] = {
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_DCT, AOM_BITS_8,
+             16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_DCT, AOM_BITS_8,
+             16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_ADST, AOM_BITS_8,
+             16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_ADST, AOM_BITS_8,
+             16),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 4, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 5, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 6, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 7, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 8, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 9, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 10, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 11, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 12, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 13, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 14, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 15, AOM_BITS_8, 16)
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, IDTX, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_DCT, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_DCT, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_ADST, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_ADST, AOM_BITS_8, 16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_FLIPADST, AOM_BITS_8,
+             16),
+  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_FLIPADST, AOM_BITS_8,
+             16)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT,
                         ::testing::ValuesIn(kArrayHt4x4Param_sse2));
 #endif  // HAVE_SSE2
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
 const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 0, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 1, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 2, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 3, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 12),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 4, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 5, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 6, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 7, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, 8, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 12),
 #endif  // CONFIG_EXT_TX
 };
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT,
                         ::testing::ValuesIn(kArrayHighbdHt4x4Param));
 
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
 
 }  // namespace
diff --git a/third_party/aom/test/av1_fht4x8_test.cc b/third_party/aom/test/av1_fht4x8_test.cc
index e447d8e2e..f9d2120e0 100644
--- a/third_party/aom/test/av1_fht4x8_test.cc
+++ b/third_party/aom/test/av1_fht4x8_test.cc
@@ -28,7 +28,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x8Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x8Param;
 
 void fht4x8_ref(const int16_t *in, tran_low_t *out, int stride,
                 TxfmParam *txfm_param) {
@@ -81,23 +81,26 @@ TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
 using std::tr1::make_tuple;
 
 const Ht4x8Param kArrayHt4x8Param_c[] = {
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 0, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 1, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 2, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 3, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_ADST, AOM_BITS_8, 32),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 4, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 5, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 6, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 7, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 8, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 9, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 10, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 11, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 12, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 13, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 14, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, 15, AOM_BITS_8, 32)
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_FLIPADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_ADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, IDTX, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_FLIPADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_FLIPADST, AOM_BITS_8, 32)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT,
@@ -105,23 +108,34 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT,
 
 #if HAVE_SSE2
 const Ht4x8Param kArrayHt4x8Param_sse2[] = {
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 0, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 1, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 2, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 3, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_DCT, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_DCT, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_ADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_ADST, AOM_BITS_8,
+             32),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 4, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 5, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 6, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 7, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 8, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 9, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 10, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 11, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 12, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 13, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 14, AOM_BITS_8, 32),
-  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, 15, AOM_BITS_8, 32)
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, IDTX, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_FLIPADST, AOM_BITS_8,
+             32)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x8HT,
diff --git a/third_party/aom/test/av1_fht64x64_test.cc b/third_party/aom/test/av1_fht64x64_test.cc
index 61ea9f1f3..f2a03e7ee 100644
--- a/third_party/aom/test/av1_fht64x64_test.cc
+++ b/third_party/aom/test/av1_fht64x64_test.cc
@@ -29,7 +29,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht64x64Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht64x64Param;
 
 void fht64x64_ref(const int16_t *in, tran_low_t *out, int stride,
                   TxfmParam *txfm_param) {
@@ -82,23 +82,38 @@ TEST_P(AV1Trans64x64HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
 using std::tr1::make_tuple;
 
 const Ht64x64Param kArrayHt64x64Param_c[] = {
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 0, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 1, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 2, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 3, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_DCT, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_DCT, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_ADST, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_ADST, AOM_BITS_8,
+             4096),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 4, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 5, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 6, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 7, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 8, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 9, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 10, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 11, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 12, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 13, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 14, AOM_BITS_8, 4096),
-  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, 15, AOM_BITS_8, 4096)
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_DCT,
+             AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, DCT_FLIPADST,
+             AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_FLIPADST,
+             AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, ADST_FLIPADST,
+             AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, FLIPADST_ADST,
+             AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, IDTX, AOM_BITS_8, 4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_DCT, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_DCT, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_ADST, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_ADST, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, V_FLIPADST, AOM_BITS_8,
+             4096),
+  make_tuple(&av1_fht64x64_c, &av1_iht64x64_4096_add_c, H_FLIPADST, AOM_BITS_8,
+             4096)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans64x64HT,
diff --git a/third_party/aom/test/av1_fht8x16_test.cc b/third_party/aom/test/av1_fht8x16_test.cc
index 11f085885..689cb0b90 100644
--- a/third_party/aom/test/av1_fht8x16_test.cc
+++ b/third_party/aom/test/av1_fht8x16_test.cc
@@ -27,7 +27,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x16Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x16Param;
 
 void fht8x16_ref(const int16_t *in, tran_low_t *out, int stride,
                  TxfmParam *txfm_param) {
@@ -80,23 +80,31 @@ TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
 using std::tr1::make_tuple;
 
 const Ht8x16Param kArrayHt8x16Param_c[] = {
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 0, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 1, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 2, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 3, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_ADST, AOM_BITS_8,
+             128),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 4, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 5, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 6, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 7, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 8, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 9, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 10, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 11, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 12, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 13, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 14, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, 15, AOM_BITS_8, 128)
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, DCT_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, ADST_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, FLIPADST_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, IDTX, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_DCT, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_ADST, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, V_FLIPADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_c, &av1_iht8x16_128_add_c, H_FLIPADST, AOM_BITS_8,
+             128)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans8x16HT,
@@ -104,23 +112,39 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans8x16HT,
 
 #if HAVE_SSE2
 const Ht8x16Param kArrayHt8x16Param_sse2[] = {
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 0, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 1, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 2, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 3, AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_ADST,
+             AOM_BITS_8, 128),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 4, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 5, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 6, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 7, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 8, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 9, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 10, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 11, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 12, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 13, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 14, AOM_BITS_8, 128),
-  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, 15, AOM_BITS_8, 128)
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, IDTX, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_DCT, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_ADST, AOM_BITS_8,
+             128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, V_FLIPADST,
+             AOM_BITS_8, 128),
+  make_tuple(&av1_fht8x16_sse2, &av1_iht8x16_128_add_sse2, H_FLIPADST,
+             AOM_BITS_8, 128)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x16HT,
diff --git a/third_party/aom/test/av1_fht8x4_test.cc b/third_party/aom/test/av1_fht8x4_test.cc
index c797421af..e50a69457 100644
--- a/third_party/aom/test/av1_fht8x4_test.cc
+++ b/third_party/aom/test/av1_fht8x4_test.cc
@@ -27,7 +27,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using std::tr1::tuple;
 using libaom_test::FhtFunc;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x4Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x4Param;
 
 void fht8x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 TxfmParam *txfm_param) {
@@ -80,23 +80,26 @@ TEST_P(AV1Trans8x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
 using std::tr1::make_tuple;
 
 const Ht8x4Param kArrayHt8x4Param_c[] = {
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 0, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 1, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 2, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 3, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_ADST, AOM_BITS_8, 32),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 4, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 5, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 6, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 7, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 8, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 9, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 10, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 11, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 12, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 13, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 14, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, 15, AOM_BITS_8, 32)
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, DCT_FLIPADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, ADST_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, FLIPADST_ADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, IDTX, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, V_FLIPADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_c, &av1_iht8x4_32_add_c, H_FLIPADST, AOM_BITS_8, 32)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(C, AV1Trans8x4HT,
@@ -104,23 +107,34 @@ INSTANTIATE_TEST_CASE_P(C, AV1Trans8x4HT,
 
 #if HAVE_SSE2
 const Ht8x4Param kArrayHt8x4Param_sse2[] = {
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 0, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 1, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 2, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 3, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_DCT, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_DCT, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_ADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_ADST, AOM_BITS_8,
+             32),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 4, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 5, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 6, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 7, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 8, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 9, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 10, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 11, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 12, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 13, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 14, AOM_BITS_8, 32),
-  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, 15, AOM_BITS_8, 32)
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, IDTX, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_DCT, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_ADST, AOM_BITS_8, 32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, V_FLIPADST, AOM_BITS_8,
+             32),
+  make_tuple(&av1_fht8x4_sse2, &av1_iht8x4_32_add_sse2, H_FLIPADST, AOM_BITS_8,
+             32)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x4HT,
diff --git a/third_party/aom/test/av1_fht8x8_test.cc b/third_party/aom/test/av1_fht8x8_test.cc
index 259557cfb..499fcc338 100644
--- a/third_party/aom/test/av1_fht8x8_test.cc
+++ b/third_party/aom/test/av1_fht8x8_test.cc
@@ -29,7 +29,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
 
 using libaom_test::FhtFunc;
 using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht8x8Param;
+typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht8x8Param;
 
 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride,
                 TxfmParam *txfm_param) {
@@ -43,14 +43,14 @@ void iht8x8_ref(const tran_low_t *in, uint8_t *out, int stride,
 
 #if CONFIG_HIGHBITDEPTH
 typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                           int tx_type, int bd);
+                           TX_TYPE tx_type, int bd);
 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd);
+                          TX_TYPE tx_type, int bd);
 // Target optimized function, tx_type, bit depth
-typedef tuple<HbdHtFunc, int, int> HighbdHt8x8Param;
+typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt8x8Param;
 
-void highbd_fht8x8_ref(const int16_t *in, int32_t *out, int stride, int tx_type,
-                       int bd) {
+void highbd_fht8x8_ref(const int16_t *in, int32_t *out, int stride,
+                       TX_TYPE tx_type, int bd) {
   av1_fwd_txfm2d_8x8_c(in, out, stride, tx_type, bd);
 }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -130,7 +130,7 @@ class AV1HighbdTrans8x8HT : public ::testing::TestWithParam<HighbdHt8x8Param> {
  private:
   HbdHtFunc fwd_txfm_;
   HbdHtFunc fwd_txfm_ref_;
-  int tx_type_;
+  TX_TYPE tx_type_;
   int bit_depth_;
   int mask_;
   int num_coeffs_;
@@ -167,56 +167,67 @@ TEST_P(AV1HighbdTrans8x8HT, HighbdCoeffCheck) { RunBitexactCheck(); }
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && !CONFIG_DAALA_DCT8
 const Ht8x8Param kArrayHt8x8Param_sse2[] = {
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_DCT, AOM_BITS_8,
+             64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_DCT, AOM_BITS_8,
+             64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_ADST, AOM_BITS_8,
+             64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_ADST, AOM_BITS_8,
+             64),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 4, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 5, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 6, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 7, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 8, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 9, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 10, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 11, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 12, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 13, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 14, AOM_BITS_8, 64),
-  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 15, AOM_BITS_8, 64)
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_DCT,
+             AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, DCT_FLIPADST,
+             AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_FLIPADST,
+             AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, ADST_FLIPADST,
+             AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, FLIPADST_ADST,
+             AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, IDTX, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_DCT, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_DCT, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_ADST, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_ADST, AOM_BITS_8, 64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, V_FLIPADST, AOM_BITS_8,
+             64),
+  make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, H_FLIPADST, AOM_BITS_8,
+             64)
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans8x8HT,
                         ::testing::ValuesIn(kArrayHt8x8Param_sse2));
 #endif  // HAVE_SSE2
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT8
 const HighbdHt8x8Param kArrayHBDHt8x8Param_sse4_1[] = {
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 0, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 1, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 2, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 3, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_ADST, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_ADST, 12),
 #if CONFIG_EXT_TX
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 4, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 5, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 6, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 7, 12),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 10),
-  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, 8, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_DCT, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_DCT, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, DCT_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_FLIPADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, ADST_FLIPADST, 12),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_ADST, 10),
+  make_tuple(&av1_fwd_txfm2d_8x8_sse4_1, FLIPADST_ADST, 12),
 #endif  // CONFIG_EXT_TX
 };
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans8x8HT,
                         ::testing::ValuesIn(kArrayHBDHt8x8Param_sse4_1));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT8
 
 }  // namespace
diff --git a/third_party/aom/test/av1_fwd_txfm1d_test.cc b/third_party/aom/test/av1_fwd_txfm1d_test.cc
index b10e84d2c..9deef3c95 100644
--- a/third_party/aom/test/av1_fwd_txfm1d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm1d_test.cc
@@ -51,11 +51,6 @@ TEST(av1_fwd_txfm1d, round_shift) {
   EXPECT_EQ(round_shift(-8, 2), -2);
 }
 
-TEST(av1_fwd_txfm1d, get_max_bit) {
-  int max_bit = get_max_bit(8);
-  EXPECT_EQ(max_bit, 3);
-}
-
 TEST(av1_fwd_txfm1d, cospi_arr_data) {
   for (int i = 0; i < 7; i++) {
     for (int j = 0; j < 64; j++) {
@@ -65,31 +60,6 @@ TEST(av1_fwd_txfm1d, cospi_arr_data) {
   }
 }
 
-TEST(av1_fwd_txfm1d, clamp_block) {
-  int16_t block[5][5] = { { 7, -5, 6, -3, 9 },
-                          { 7, -5, 6, -3, 9 },
-                          { 7, -5, 6, -3, 9 },
-                          { 7, -5, 6, -3, 9 },
-                          { 7, -5, 6, -3, 9 } };
-
-  int16_t ref_block[5][5] = { { 7, -5, 6, -3, 9 },
-                              { 7, -5, 6, -3, 9 },
-                              { 7, -4, 2, -3, 9 },
-                              { 7, -4, 2, -3, 9 },
-                              { 7, -4, 2, -3, 9 } };
-
-  int row = 2;
-  int col = 1;
-  int block_size = 3;
-  int stride = 5;
-  clamp_block(block[row] + col, block_size, block_size, stride, -4, 2);
-  for (int r = 0; r < stride; r++) {
-    for (int c = 0; c < stride; c++) {
-      EXPECT_EQ(block[r][c], ref_block[r][c]);
-    }
-  }
-}
-
 TEST(av1_fwd_txfm1d, accuracy) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   for (int si = 0; si < txfm_size_num; ++si) {
diff --git a/third_party/aom/test/av1_fwd_txfm2d_test.cc b/third_party/aom/test/av1_fwd_txfm2d_test.cc
index af3c8ff44..adf9a803c 100644
--- a/third_party/aom/test/av1_fwd_txfm2d_test.cc
+++ b/third_party/aom/test/av1_fwd_txfm2d_test.cc
@@ -177,5 +177,31 @@ const AV1FwdTxfm2dParam av1_fwd_txfm2d_param_c[] = {
 INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d,
                         ::testing::ValuesIn(av1_fwd_txfm2d_param_c));
 
+TEST(AV1FwdTxfm2d, CfgTest) {
+  for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
+    int bd = libaom_test::bd_arr[bd_idx];
+    int8_t low_range = libaom_test::low_range_arr[bd_idx];
+    int8_t high_range = libaom_test::high_range_arr[bd_idx];
+    // TODO(angiebird): include rect txfm in this test
+    for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+      for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+        TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(
+            static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size));
+        int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+        int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+        av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
+        const TXFM_1D_CFG *col_cfg = cfg.col_cfg;
+        const TXFM_1D_CFG *row_cfg = cfg.row_cfg;
+        libaom_test::txfm_stage_range_check(stage_range_col, col_cfg->stage_num,
+                                            col_cfg->cos_bit, low_range,
+                                            high_range);
+        libaom_test::txfm_stage_range_check(stage_range_row, row_cfg->stage_num,
+                                            row_cfg->cos_bit, low_range,
+                                            high_range);
+      }
+    }
+  }
+}
+
 #endif  // CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/av1_highbd_iht_test.cc b/third_party/aom/test/av1_highbd_iht_test.cc
index 3b263638f..45df5ed84 100644
--- a/third_party/aom/test/av1_highbd_iht_test.cc
+++ b/third_party/aom/test/av1_highbd_iht_test.cc
@@ -26,10 +26,10 @@ using std::tr1::tuple;
 using libaom_test::ACMRandom;
 
 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
-                          int tx_type, int bd);
+                          TX_TYPE tx_type, int bd);
 
 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
-                           int tx_type, int bd);
+                           TX_TYPE tx_type, int bd);
 
 // Test parameter argument list:
 //   <transform reference function,
@@ -38,7 +38,7 @@ typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
 //    num_coeffs,
 //    tx_type,
 //    bit_depth>
-typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, int, int> IHbdHtParam;
+typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
 
 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
  public:
@@ -97,7 +97,7 @@ class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
   IHbdHtFunc inv_txfm_;
   IHbdHtFunc inv_txfm_ref_;
   int num_coeffs_;
-  int tx_type_;
+  TX_TYPE tx_type_;
   int bit_depth_;
 
   int16_t *input_;
@@ -135,21 +135,26 @@ TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && \
+    !(CONFIG_DAALA_DCT4 && CONFIG_DAALA_DCT8 && CONFIG_DAALA_DCT16)
+#if !CONFIG_DAALA_DCT4
 #define PARAM_LIST_4X4                                   \
   &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
       &av1_inv_txfm2d_add_4x4_c, 16
-
+#endif
+#if !CONFIG_DAALA_DCT8
 #define PARAM_LIST_8X8                                   \
   &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \
       &av1_inv_txfm2d_add_8x8_c, 64
-
+#endif
+#if !CONFIG_DAALA_DCT16
 #define PARAM_LIST_16X16                                     \
   &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \
       &av1_inv_txfm2d_add_16x16_c, 256
-
+#endif
 const IHbdHtParam kArrayIhtParam[] = {
-  // 16x16
+// 16x16
+#if !CONFIG_DAALA_DCT16
   make_tuple(PARAM_LIST_16X16, DCT_DCT, 10),
   make_tuple(PARAM_LIST_16X16, DCT_DCT, 12),
   make_tuple(PARAM_LIST_16X16, ADST_DCT, 10),
@@ -170,7 +175,9 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12),
 #endif
-  // 8x8
+#endif
+// 8x8
+#if !CONFIG_DAALA_DCT8
   make_tuple(PARAM_LIST_8X8, DCT_DCT, 10),
   make_tuple(PARAM_LIST_8X8, DCT_DCT, 12),
   make_tuple(PARAM_LIST_8X8, ADST_DCT, 10),
@@ -191,7 +198,9 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12),
 #endif
-  // 4x4
+#endif
+// 4x4
+#if !CONFIG_DAALA_DCT4
   make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
   make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
   make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
@@ -212,13 +221,15 @@ const IHbdHtParam kArrayIhtParam[] = {
   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
 #endif
+#endif
 };
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
                         ::testing::ValuesIn(kArrayIhtParam));
-#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH &&
+        //  !(CONFIG_DAALA_DCT4 && CONFIG_DAALA_DCT8 && CONFIG_DAALA_DCT16)
 
-#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH
+#if HAVE_AVX2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT32
 #define PARAM_LIST_32X32                                   \
   &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
       &av1_inv_txfm2d_add_32x32_c, 1024
diff --git a/third_party/aom/test/av1_inv_txfm1d_test.cc b/third_party/aom/test/av1_inv_txfm1d_test.cc
index b871105eb..b44c04116 100644
--- a/third_party/aom/test/av1_inv_txfm1d_test.cc
+++ b/third_party/aom/test/av1_inv_txfm1d_test.cc
@@ -9,6 +9,8 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#include <math.h>
+
 #include "test/av1_txfm_test.h"
 #include "test/util.h"
 #include "av1/common/av1_fwd_txfm1d.h"
@@ -45,6 +47,68 @@ const TxfmFunc inv_txfm_func_ls[][2] = {
 const int8_t cos_bit[12] = { 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 };
 const int8_t range_bit[12] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 };
 
+void reference_idct_1d_int(const int32_t *in, int32_t *out, int size) {
+  double input[64];
+  for (int i = 0; i < size; ++i) input[i] = in[i];
+
+  double output[64];
+  libaom_test::reference_idct_1d(input, output, size);
+
+  for (int i = 0; i < size; ++i)
+    out[i] = static_cast<int32_t>(round(output[i]));
+}
+
+void random_matrix(int32_t *dst, int len, ACMRandom *rnd) {
+  const int bits = 16;
+  const int maxVal = (1 << (bits - 1)) - 1;
+  const int minVal = -(1 << (bits - 1));
+  for (int i = 0; i < len; ++i) {
+    if (rnd->Rand8() % 10)
+      dst[i] = minVal + rnd->Rand16() % (1 << bits);
+    else
+      dst[i] = rnd->Rand8() % 2 ? minVal : maxVal;
+  }
+}
+
+TEST(av1_inv_txfm1d, InvAccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 20000;
+  const int max_error[] = { 6, 10, 19, 28 };
+  for (int k = 0; k < count_test_block; ++k) {
+    // choose a random transform to test
+    const int txfm_type = rnd.Rand8() % NELEMENTS(inv_txfm_func_ls);
+    const int txfm_size = txfm_size_ls[txfm_type];
+    const TxfmFunc txfm_func = inv_txfm_func_ls[txfm_type][0];
+
+    int32_t input[64];
+    random_matrix(input, txfm_size, &rnd);
+
+    int32_t ref_output[64];
+    reference_idct_1d_int(input, ref_output, txfm_size);
+
+    int32_t output[64];
+    txfm_func(input, output, cos_bit, range_bit);
+
+    for (int i = 0; i < txfm_size; ++i) {
+      EXPECT_LE(abs(output[i] - ref_output[i]), max_error[txfm_type]);
+    }
+  }
+}
+
+static INLINE int get_max_bit(int x) {
+  int max_bit = -1;
+  while (x) {
+    x = x >> 1;
+    max_bit++;
+  }
+  return max_bit;
+}
+
+TEST(av1_inv_txfm1d, get_max_bit) {
+  int max_bit = get_max_bit(8);
+  EXPECT_EQ(max_bit, 3);
+}
+
 TEST(av1_inv_txfm1d, round_trip) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   for (int si = 0; si < NELEMENTS(fwd_txfm_func_ls); ++si) {
diff --git a/third_party/aom/test/av1_inv_txfm2d_test.cc b/third_party/aom/test/av1_inv_txfm2d_test.cc
index 5185c1ca8..bccbdeebf 100644
--- a/third_party/aom/test/av1_inv_txfm2d_test.cc
+++ b/third_party/aom/test/av1_inv_txfm2d_test.cc
@@ -40,11 +40,12 @@ class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
     tx_size_ = GET_PARAM(1);
     max_error_ = GET_PARAM(2);
     max_avg_error_ = GET_PARAM(3);
-    txfm1d_size_ = libaom_test::get_txfm1d_size(tx_size_);
-    txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
   }
 
   void RunRoundtripCheck() {
+    int tx_w = tx_size_wide[tx_size_];
+    int tx_h = tx_size_high[tx_size_];
+    int txfm2d_size = tx_w * tx_h;
     const Fwd_Txfm2d_Func fwd_txfm_func =
         libaom_test::fwd_txfm_func_ls[tx_size_];
     const Inv_Txfm2d_Func inv_txfm_func =
@@ -56,9 +57,9 @@ class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
 
     for (int ci = 0; ci < count; ci++) {
       int16_t expected[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size_, NELEMENTS(expected));
+      ASSERT_LT(txfm2d_size, NELEMENTS(expected));
 
-      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+      for (int ni = 0; ni < txfm2d_size; ++ni) {
         if (ci == 0) {
           int extreme_input = input_base - 1;
           expected[ni] = extreme_input;  // extreme case
@@ -68,25 +69,26 @@ class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
       }
 
       int32_t coeffs[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size_, NELEMENTS(coeffs));
-      fwd_txfm_func(expected, coeffs, txfm1d_size_, tx_type_, bd);
+      ASSERT_LT(txfm2d_size, NELEMENTS(coeffs));
+      fwd_txfm_func(expected, coeffs, tx_w, tx_type_, bd);
 
       uint16_t actual[64 * 64] = { 0 };
-      ASSERT_LT(txfm2d_size_, NELEMENTS(actual));
-      inv_txfm_func(coeffs, actual, txfm1d_size_, tx_type_, bd);
+      ASSERT_LT(txfm2d_size, NELEMENTS(actual));
+      inv_txfm_func(coeffs, actual, tx_w, tx_type_, bd);
 
-      for (int ni = 0; ni < txfm2d_size_; ++ni) {
+      for (int ni = 0; ni < txfm2d_size; ++ni) {
         EXPECT_GE(max_error_, abs(expected[ni] - actual[ni]));
       }
       avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
-          expected, actual, txfm2d_size_);
+          expected, actual, txfm2d_size);
     }
 
     avg_abs_error /= count;
     // max_abs_avg_error comes from upper bound of
     // printf("txfm1d_size: %d accuracy_avg_abs_error: %f\n",
     // txfm1d_size_, avg_abs_error);
-    EXPECT_GE(max_avg_error_, avg_abs_error);
+    EXPECT_GE(max_avg_error_, avg_abs_error)
+        << " tx_w: " << tx_w << " tx_h " << tx_h << " tx_type: " << tx_type_;
   }
 
  private:
@@ -94,14 +96,53 @@ class AV1InvTxfm2d : public ::testing::TestWithParam<AV1InvTxfm2dParam> {
   double max_avg_error_;
   TX_TYPE tx_type_;
   TX_SIZE tx_size_;
-  int txfm1d_size_;
-  int txfm2d_size_;
 };
 
 TEST_P(AV1InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
 
 const AV1InvTxfm2dParam av1_inv_txfm2d_param[] = {
 #if CONFIG_EXT_TX
+#if CONFIG_RECT_TX
+  AV1InvTxfm2dParam(DCT_DCT, TX_4X8, 2, 0.007),
+  AV1InvTxfm2dParam(ADST_DCT, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(DCT_ADST, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(ADST_ADST, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_4X8, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_4X8, 2, 0.012),
+
+  AV1InvTxfm2dParam(DCT_DCT, TX_8X4, 2, 0.007),
+  AV1InvTxfm2dParam(ADST_DCT, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(DCT_ADST, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(ADST_ADST, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X4, 2, 0.007),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X4, 2, 0.012),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X4, 2, 0.012),
+
+  AV1InvTxfm2dParam(DCT_DCT, TX_8X16, 2, 0.025),
+  AV1InvTxfm2dParam(ADST_DCT, TX_8X16, 2, 0.020),
+  AV1InvTxfm2dParam(DCT_ADST, TX_8X16, 2, 0.027),
+  AV1InvTxfm2dParam(ADST_ADST, TX_8X16, 2, 0.023),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_8X16, 2, 0.020),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_8X16, 2, 0.027),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X16, 2, 0.032),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_8X16, 2, 0.023),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_8X16, 2, 0.023),
+
+  AV1InvTxfm2dParam(DCT_DCT, TX_16X8, 2, 0.007),
+  AV1InvTxfm2dParam(ADST_DCT, TX_16X8, 2, 0.012),
+  AV1InvTxfm2dParam(DCT_ADST, TX_16X8, 2, 0.024),
+  AV1InvTxfm2dParam(ADST_ADST, TX_16X8, 2, 0.033),
+  AV1InvTxfm2dParam(FLIPADST_DCT, TX_16X8, 2, 0.015),
+  AV1InvTxfm2dParam(DCT_FLIPADST, TX_16X8, 2, 0.032),
+  AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X8, 2, 0.032),
+  AV1InvTxfm2dParam(ADST_FLIPADST, TX_16X8, 2, 0.033),
+  AV1InvTxfm2dParam(FLIPADST_ADST, TX_16X8, 2, 0.032),
+#endif
   AV1InvTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.002),
   AV1InvTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.002),
   AV1InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.002),
@@ -144,6 +185,32 @@ const AV1InvTxfm2dParam av1_inv_txfm2d_param[] = {
 INSTANTIATE_TEST_CASE_P(C, AV1InvTxfm2d,
                         ::testing::ValuesIn(av1_inv_txfm2d_param));
 
+TEST(AV1InvTxfm2d, CfgTest) {
+  for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
+    int bd = libaom_test::bd_arr[bd_idx];
+    int8_t low_range = libaom_test::low_range_arr[bd_idx];
+    int8_t high_range = libaom_test::high_range_arr[bd_idx];
+    // TODO(angiebird): include rect txfm in this test
+    for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
+      for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+        TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_cfg(
+            static_cast<TX_TYPE>(tx_type), static_cast<TX_SIZE>(tx_size));
+        int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
+        int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
+        av1_gen_inv_stage_range(stage_range_col, stage_range_row, &cfg,
+                                fwd_shift_sum[tx_size], bd);
+        const TXFM_1D_CFG *col_cfg = cfg.col_cfg;
+        const TXFM_1D_CFG *row_cfg = cfg.row_cfg;
+        libaom_test::txfm_stage_range_check(stage_range_col, col_cfg->stage_num,
+                                            col_cfg->cos_bit, low_range,
+                                            high_range);
+        libaom_test::txfm_stage_range_check(stage_range_row, row_cfg->stage_num,
+                                            row_cfg->cos_bit, low_range,
+                                            high_range);
+      }
+    }
+  }
+}
 #endif  // CONFIG_HIGHBITDEPTH
 
 }  // namespace
diff --git a/third_party/aom/test/av1_inv_txfm_test.cc b/third_party/aom/test/av1_inv_txfm_test.cc
index 34d45e08b..873e80685 100644
--- a/third_party/aom/test/av1_inv_txfm_test.cc
+++ b/third_party/aom/test/av1_inv_txfm_test.cc
@@ -18,6 +18,7 @@
 #include "./av1_rtcd.h"
 #include "./aom_dsp_rtcd.h"
 #include "test/acm_random.h"
+#include "test/av1_txfm_test.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
@@ -29,19 +30,6 @@
 using libaom_test::ACMRandom;
 
 namespace {
-const double kInvSqrt2 = 0.707106781186547524400844362104;
-
-void reference_idct_1d(const double *in, double *out, int size) {
-  for (int n = 0; n < size; ++n) {
-    out[n] = 0;
-    for (int k = 0; k < size; ++k) {
-      if (k == 0)
-        out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
-      else
-        out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
-    }
-  }
-}
 
 typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
 
@@ -65,7 +53,7 @@ class TransTestBase {
       }
 
       inv_txfm_(input, output);
-      reference_idct_1d(ref_input, ref_output, txfm_size_);
+      libaom_test::reference_idct_1d(ref_input, ref_output, txfm_size_);
 
       for (int ni = 0; ni < txfm_size_; ++ni) {
         EXPECT_LE(
diff --git a/third_party/aom/test/av1_quantize_test.cc b/third_party/aom/test/av1_quantize_test.cc
index 239b041b2..36ac8c4ad 100644
--- a/third_party/aom/test/av1_quantize_test.cc
+++ b/third_party/aom/test/av1_quantize_test.cc
@@ -99,8 +99,8 @@ class AV1QuantizeTest : public ::testing::TestWithParam<QuantizeFuncParams> {
       for (int j = 0; j < count; ++j) {
         err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
                      (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-        EXPECT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j]) << "qcoeff error: i = " << i
-                                                    << " j = " << j << "\n";
+        EXPECT_EQ(ref_qcoeff_ptr[j], qcoeff_ptr[j])
+            << "qcoeff error: i = " << i << " j = " << j << "\n";
         EXPECT_EQ(ref_dqcoeff_ptr[j], dqcoeff_ptr[j])
             << "dqcoeff error: i = " << i << " j = " << j << "\n";
       }
@@ -195,7 +195,6 @@ TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
 TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
 
 #if HAVE_SSE4_1
-#if !CONFIG_AOM_QM
 const QuantizeFuncParams qfps[4] = {
   QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
                      16),
@@ -208,6 +207,5 @@ const QuantizeFuncParams qfps[4] = {
 };
 
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
-#endif  // !CONFIG_AOM_QM
 #endif  // HAVE_SSE4_1
 }  // namespace
diff --git a/third_party/aom/test/av1_txfm_test.cc b/third_party/aom/test/av1_txfm_test.cc
index 1e473b304..4545de100 100644
--- a/third_party/aom/test/av1_txfm_test.cc
+++ b/third_party/aom/test/av1_txfm_test.cc
@@ -66,16 +66,32 @@ void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
 
 double invSqrt2 = 1 / pow(2, 0.5);
 
+double dct_matrix(double n, double k, int size) {
+  return cos(M_PI * (2 * n + 1) * k / (2 * size));
+}
+
 void reference_dct_1d(const double *in, double *out, int size) {
   for (int k = 0; k < size; ++k) {
     out[k] = 0;
     for (int n = 0; n < size; ++n) {
-      out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
+      out[k] += in[n] * dct_matrix(n, k, size);
     }
     if (k == 0) out[k] = out[k] * invSqrt2;
   }
 }
 
+void reference_idct_1d(const double *in, double *out, int size) {
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      if (n == 0)
+        out[k] += invSqrt2 * in[n] * dct_matrix(k, n, size);
+      else
+        out[k] += in[n] * dct_matrix(k, n, size);
+    }
+  }
+}
+
 void reference_adst_1d(const double *in, double *out, int size) {
   for (int k = 0; k < size; ++k) {
     out[k] = 0;
@@ -161,4 +177,20 @@ template void fliplr<double>(double *dest, int stride, int length);
 template void flipud<double>(double *dest, int stride, int length);
 template void fliplrud<double>(double *dest, int stride, int length);
 
+int bd_arr[BD_NUM] = { 8, 10, 12 };
+int8_t low_range_arr[BD_NUM] = { 16, 32, 32 };
+int8_t high_range_arr[BD_NUM] = { 32, 32, 32 };
+
+void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
+                            const int8_t *cos_bit, int low_range,
+                            int high_range) {
+  for (int i = 0; i < stage_num; ++i) {
+    EXPECT_LE(stage_range[i], low_range);
+  }
+  for (int i = 0; i < stage_num - 1; ++i) {
+    // make sure there is no overflow while doing half_btf()
+    EXPECT_LE(stage_range[i] + cos_bit[i], high_range);
+    EXPECT_LE(stage_range[i + 1] + cos_bit[i], high_range);
+  }
+}
 }  // namespace libaom_test
diff --git a/third_party/aom/test/av1_txfm_test.h b/third_party/aom/test/av1_txfm_test.h
index d46f0bba7..3e64e36ad 100644
--- a/third_party/aom/test/av1_txfm_test.h
+++ b/third_party/aom/test/av1_txfm_test.h
@@ -40,6 +40,7 @@ int get_txfm1d_size(TX_SIZE tx_size);
 void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1);
 
 void reference_dct_1d(const double *in, double *out, int size);
+void reference_idct_1d(const double *in, double *out, int size);
 
 void reference_adst_1d(const double *in, double *out, int size);
 
@@ -70,31 +71,79 @@ void fliplrud(Type *dest, int stride, int length);
 typedef void (*TxfmFunc)(const int32_t *in, int32_t *out, const int8_t *cos_bit,
                          const int8_t *range_bit);
 
-typedef void (*Fwd_Txfm2d_Func)(const int16_t *, int32_t *, int, int, int);
-typedef void (*Inv_Txfm2d_Func)(const int32_t *, uint16_t *, int, int, int);
+typedef void (*Fwd_Txfm2d_Func)(const int16_t *, int32_t *, int, TX_TYPE, int);
+typedef void (*Inv_Txfm2d_Func)(const int32_t *, uint16_t *, int, TX_TYPE, int);
 
 static const int bd = 10;
 static const int input_base = (1 << bd);
 
 #if CONFIG_HIGHBITDEPTH
 #if CONFIG_AV1_ENCODER
-static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = {
+
+static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
   NULL,
 #endif
-  av1_fwd_txfm2d_4x4_c, av1_fwd_txfm2d_8x8_c, av1_fwd_txfm2d_16x16_c,
-  av1_fwd_txfm2d_32x32_c
+  av1_fwd_txfm2d_4x4_c,
+  av1_fwd_txfm2d_8x8_c,
+  av1_fwd_txfm2d_16x16_c,
+  av1_fwd_txfm2d_32x32_c,
+#if CONFIG_TX64X64
+  av1_fwd_txfm2d_64x64_c,
+#endif  // CONFIG_TX64X64
+  av1_fwd_txfm2d_4x8_c,
+  av1_fwd_txfm2d_8x4_c,
+  av1_fwd_txfm2d_8x16_c,
+  av1_fwd_txfm2d_16x8_c,
+  av1_fwd_txfm2d_16x32_c,
+  av1_fwd_txfm2d_32x16_c,
+#if CONFIG_TX64X64
+  av1_fwd_txfm2d_32x64_c,
+  av1_fwd_txfm2d_64x32_c,
+#endif  // CONFIG_TX64X64
+  NULL,
+  NULL,
+  NULL,
+  NULL,
 };
 #endif
 
-static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES] = {
+static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES_ALL] = {
 #if CONFIG_CHROMA_2X2
   NULL,
 #endif
-  av1_inv_txfm2d_add_4x4_c, av1_inv_txfm2d_add_8x8_c,
-  av1_inv_txfm2d_add_16x16_c, av1_inv_txfm2d_add_32x32_c
+  av1_inv_txfm2d_add_4x4_c,
+  av1_inv_txfm2d_add_8x8_c,
+  av1_inv_txfm2d_add_16x16_c,
+  av1_inv_txfm2d_add_32x32_c,
+#if CONFIG_TX64X64
+  av1_inv_txfm2d_add_64x64_c,
+#endif  // CONFIG_TX64X64
+  av1_inv_txfm2d_add_4x8_c,
+  av1_inv_txfm2d_add_8x4_c,
+  av1_inv_txfm2d_add_8x16_c,
+  av1_inv_txfm2d_add_16x8_c,
+  av1_inv_txfm2d_add_16x32_c,
+  av1_inv_txfm2d_add_32x16_c,
+#if CONFIG_TX64X64
+  av1_inv_txfm2d_add_32x64_c,
+  av1_inv_txfm2d_add_64x32_c,
+#endif  // CONFIG_TX64X64
+  NULL,
+  NULL,
+  NULL,
+  NULL,
 };
 #endif  // CONFIG_HIGHBITDEPTH
 
+#define BD_NUM 3
+
+extern int bd_arr[];
+extern int8_t low_range_arr[];
+extern int8_t high_range_arr[];
+
+void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
+                            const int8_t *cos_bit, int low_range,
+                            int high_range);
 }  // namespace libaom_test
 #endif  // AV1_TXFM_TEST_H_
diff --git a/third_party/aom/test/boolcoder_test.cc b/third_party/aom/test/boolcoder_test.cc
index 7abe1b1b6..916a54427 100644
--- a/third_party/aom/test/boolcoder_test.cc
+++ b/third_party/aom/test/boolcoder_test.cc
@@ -86,11 +86,7 @@ TEST(AV1, TestBitIO) {
   }
 }
 
-#if CONFIG_EC_SMALLMUL
 #define FRAC_DIFF_TOTAL_ERROR 0.16
-#else
-#define FRAC_DIFF_TOTAL_ERROR 0.07
-#endif
 
 TEST(AV1, TestTell) {
   const int kBufferSize = 10000;
@@ -116,8 +112,8 @@ TEST(AV1, TestTell) {
       aom_read(&br, p, NULL);
       uint32_t tell = aom_reader_tell(&br);
       uint32_t tell_frac = aom_reader_tell_frac(&br);
-      GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell
-                                       << ", last_tell: " << last_tell;
+      GTEST_ASSERT_GE(tell, last_tell)
+          << "tell: " << tell << ", last_tell: " << last_tell;
       GTEST_ASSERT_GE(tell_frac, last_tell_frac)
           << "tell_frac: " << tell_frac
           << ", last_tell_frac: " << last_tell_frac;
diff --git a/third_party/aom/test/cdef_test.cc b/third_party/aom/test/cdef_test.cc
new file mode 100644
index 000000000..b6250b6e9
--- /dev/null
+++ b/third_party/aom/test/cdef_test.cc
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+*/
+
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+
+#include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/common/cdef_block.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+using libaom_test::ACMRandom;
+
+namespace {
+
+typedef std::tr1::tuple<cdef_filter_block_func, cdef_filter_block_func, int>
+    cdef_dir_param_t;
+
+class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
+ public:
+  virtual ~CDEFBlockTest() {}
+  virtual void SetUp() {
+    cdef = GET_PARAM(0);
+    ref_cdef = GET_PARAM(1);
+    bsize = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  int bsize;
+  cdef_filter_block_func cdef;
+  cdef_filter_block_func ref_cdef;
+};
+
+typedef CDEFBlockTest CDEFSpeedTest;
+
+void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
+               cdef_filter_block_func ref_cdef) {
+  const int size = 8;
+  const int ysize = size + 2 * CDEF_VBORDER;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
+  DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
+  DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
+  memset(ref_d, 0, sizeof(ref_d));
+  memset(d, 0, sizeof(d));
+
+  int error = 0, pristrength = 0, secstrength, dir;
+  int boundary, pridamping, secdamping, depth, bits, level, count,
+      errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
+      errpridamping = 0, errsecdamping = 0;
+  unsigned int pos = 0;
+
+  for (boundary = 0; boundary < 16; boundary++) {
+    for (depth = 8; depth <= 12; depth += 2) {
+      const unsigned int max_pos = size * size >> (depth == 8);
+      for (pridamping = 3 + depth - 8;
+           pridamping < 7 - 3 * !!boundary + depth - 8; pridamping++) {
+        for (secdamping = 3 + depth - 8;
+             secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
+          for (count = 0; count < iterations; count++) {
+            for (level = 0; level < (1 << depth) && !error;
+                 level += (2 + 6 * !!boundary) << (depth - 8)) {
+              for (bits = 1; bits <= depth && !error;
+                   bits += 1 + 3 * !!boundary) {
+                for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+                  s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                               (1 << depth) - 1);
+                if (boundary) {
+                  if (boundary & 1) {  // Left
+                    for (int i = 0; i < ysize; i++)
+                      for (int j = 0; j < CDEF_HBORDER; j++)
+                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+                  }
+                  if (boundary & 2) {  // Right
+                    for (int i = 0; i < ysize; i++)
+                      for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
+                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+                  }
+                  if (boundary & 4) {  // Above
+                    for (int i = 0; i < CDEF_VBORDER; i++)
+                      for (int j = 0; j < CDEF_BSTRIDE; j++)
+                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+                  }
+                  if (boundary & 8) {  // Below
+                    for (int i = CDEF_VBORDER + size; i < ysize; i++)
+                      for (int j = 0; j < CDEF_BSTRIDE; j++)
+                        s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
+                  }
+                }
+                for (dir = 0; dir < 8; dir++) {
+                  for (pristrength = 0;
+                       pristrength <= 19 << (depth - 8) && !error;
+                       pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
+                    if (pristrength == 16) pristrength = 19;
+                    for (secstrength = 0;
+                         secstrength <= 4 << (depth - 8) && !error;
+                         secstrength += 1 << (depth - 8)) {
+                      if (secstrength == 3 << (depth - 8)) continue;
+                      ref_cdef(depth == 8 ? (uint8_t *)ref_d : 0, ref_d, size,
+                               s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                               pristrength, secstrength, dir, pridamping,
+                               secdamping, bsize, (1 << depth) - 1);
+                      // If cdef and ref_cdef are the same, we're just testing
+                      // speed
+                      if (cdef != ref_cdef)
+                        ASM_REGISTER_STATE_CHECK(
+                            cdef(depth == 8 ? (uint8_t *)d : 0, d, size,
+                                 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
+                                 pristrength, secstrength, dir, pridamping,
+                                 secdamping, bsize, (1 << depth) - 1));
+                      if (ref_cdef != cdef) {
+                        for (pos = 0; pos < max_pos && !error; pos++) {
+                          error = ref_d[pos] != d[pos];
+                          errdepth = depth;
+                          errpristrength = pristrength;
+                          errsecstrength = secstrength;
+                          errboundary = boundary;
+                          errpridamping = pridamping;
+                          errsecdamping = secdamping;
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  pos--;
+  EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
+                      << std::endl
+                      << "First error at " << pos % size << "," << pos / size
+                      << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
+                      << ") " << std::endl
+                      << "pristrength: " << errpristrength << std::endl
+                      << "pridamping: " << errpridamping << std::endl
+                      << "secstrength: " << errsecstrength << std::endl
+                      << "secdamping: " << errsecdamping << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << "size: " << bsize << std::endl
+                      << "boundary: " << errboundary << std::endl
+                      << std::endl;
+}
+
+void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
+                     cdef_filter_block_func ref_cdef) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_cdef(bsize, iterations, ref_cdef, ref_cdef);
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_cdef(bsize, iterations, cdef, cdef);
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+
+#if 0
+  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
+            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
+#endif
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
+                          int coeff_shift);
+
+typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
+
+class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
+ public:
+  virtual ~CDEFFindDirTest() {}
+  virtual void SetUp() {
+    finddir = GET_PARAM(0);
+    ref_finddir = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libaom_test::ClearSystemState(); }
+
+ protected:
+  find_dir_t finddir;
+  find_dir_t ref_finddir;
+};
+
+typedef CDEFFindDirTest CDEFFindDirSpeedTest;
+
+void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
+                                 int coeff_shift),
+                  int (*ref_finddir)(const uint16_t *img, int stride,
+                                     int32_t *var, int coeff_shift)) {
+  const int size = 8;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, uint16_t, s[size * size]);
+
+  int error = 0;
+  int depth, bits, level, count, errdepth = 0;
+  int ref_res = 0, res = 0;
+  int32_t ref_var = 0, var = 0;
+
+  for (depth = 8; depth <= 12 && !error; depth += 2) {
+    for (count = 0; count < 512 && !error; count++) {
+      for (level = 0; level < (1 << depth) && !error;
+           level += 1 << (depth - 8)) {
+        for (bits = 1; bits <= depth && !error; bits++) {
+          for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
+            s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
+                         (1 << depth) - 1);
+          for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
+            ref_res = ref_finddir(s, size, &ref_var, depth - 8);
+          if (finddir != ref_finddir)
+            ASM_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
+          if (ref_finddir != finddir) {
+            if (res != ref_res || var != ref_var) error = 1;
+            errdepth = depth;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
+                      << std::endl
+                      << "return: " << res << " : " << ref_res << std::endl
+                      << "var: " << var << " : " << ref_var << std::endl
+                      << "depth: " << errdepth << std::endl
+                      << std::endl;
+}
+
+void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
+                                       int32_t *var, int coeff_shift),
+                        int (*ref_finddir)(const uint16_t *img, int stride,
+                                           int32_t *var, int coeff_shift)) {
+  aom_usec_timer ref_timer;
+  aom_usec_timer timer;
+
+  aom_usec_timer_start(&ref_timer);
+  test_finddir(ref_finddir, ref_finddir);
+  aom_usec_timer_mark(&ref_timer);
+  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
+
+  aom_usec_timer_start(&timer);
+  test_finddir(finddir, finddir);
+  aom_usec_timer_mark(&timer);
+  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
+
+#if 0
+  std::cout << "[          ] C time = " << ref_elapsed_time / 1000
+            << " ms, SIMD time = " << elapsed_time / 1000 << " ms" << std::endl;
+#endif
+
+  EXPECT_GT(ref_elapsed_time, elapsed_time)
+      << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
+      << "C time: " << ref_elapsed_time << " us" << std::endl
+      << "SIMD time: " << elapsed_time << " us" << std::endl;
+}
+
+TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
+  test_cdef(bsize, 1, cdef, ref_cdef);
+}
+
+TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
+  test_cdef_speed(bsize, 4, cdef, ref_cdef);
+}
+
+TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
+  test_finddir(finddir, ref_finddir);
+}
+
+TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
+  test_finddir_speed(finddir, ref_finddir);
+}
+
+using std::tr1::make_tuple;
+
+// VS compiling for 32 bit targets does not support vector types in
+// structs as arguments, which makes the v256 type of the intrinsics
+// hard to support, so optimizations for this target are disabled.
+#if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFBlockTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                     &cdef_find_dir_c)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFBlockTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFBlockTest,
+    ::testing::Values(make_tuple(&cdef_filter_block_sse4_1,
+                                 &cdef_filter_block_c, BLOCK_4X4),
+                      make_tuple(&cdef_filter_block_sse4_1,
+                                 &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+    AVX2, CDEFBlockTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_avx2,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFBlockTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                     &cdef_find_dir_c)));
+#endif
+
+// Test speed for all supported architectures
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, CDEFSpeedTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_sse2, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, CDEFSpeedTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_ssse3, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, CDEFSpeedTest,
+    ::testing::Values(make_tuple(&cdef_filter_block_sse4_1,
+                                 &cdef_filter_block_c, BLOCK_4X4),
+                      make_tuple(&cdef_filter_block_sse4_1,
+                                 &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_CASE_P(
+    AVX2, CDEFSpeedTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_avx2, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(AVX2, CDEFFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_avx2,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, CDEFSpeedTest,
+    ::testing::Values(
+        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_4X4),
+        make_tuple(&cdef_filter_block_neon, &cdef_filter_block_c, BLOCK_8X8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFFindDirSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                     &cdef_find_dir_c)));
+#endif
+
+#endif  // defined(_WIN64) || !defined(_MSC_VER)
+}  // namespace
diff --git a/third_party/aom/test/clpf_test.cc b/third_party/aom/test/clpf_test.cc
index 2c0f8cf7f..ecb042876 100644
--- a/third_party/aom/test/clpf_test.cc
+++ b/third_party/aom/test/clpf_test.cc
@@ -17,7 +17,7 @@
 #include "./aom_config.h"
 #include "./av1_rtcd.h"
 #include "aom_ports/aom_timer.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -128,22 +128,22 @@ void test_clpf(int w, int h, unsigned int depth, unsigned int iterations,
               if (boundary & 1) {  // Left
                 for (int i = 0; i < size; i++)
                   for (int j = 0; j < xpos; j++)
-                    s[i * size + j] = OD_DERING_VERY_LARGE;
+                    s[i * size + j] = CDEF_VERY_LARGE;
               }
               if (boundary & 2) {  // Right
                 for (int i = 0; i < size; i++)
                   for (int j = xpos + w; j < size; j++)
-                    s[i * size + j] = OD_DERING_VERY_LARGE;
+                    s[i * size + j] = CDEF_VERY_LARGE;
               }
               if (boundary & 4) {  // Above
                 for (int i = 0; i < ypos; i++)
                   for (int j = 0; j < size; j++)
-                    s[i * size + j] = OD_DERING_VERY_LARGE;
+                    s[i * size + j] = CDEF_VERY_LARGE;
               }
               if (boundary & 8) {  // Below
                 for (int i = ypos + h; i < size; i++)
                   for (int j = 0; j < size; j++)
-                    s[i * size + j] = OD_DERING_VERY_LARGE;
+                    s[i * size + j] = CDEF_VERY_LARGE;
               }
             }
             for (strength = depth - 8; strength < depth - 5 && !error;
diff --git a/third_party/aom/test/coding_path_sync.cc b/third_party/aom/test/coding_path_sync.cc
index 68ee160bf..5b6409d03 100644
--- a/third_party/aom/test/coding_path_sync.cc
+++ b/third_party/aom/test/coding_path_sync.cc
@@ -15,8 +15,6 @@
 
 #include "./aom_config.h"
 
-#if CONFIG_AV1_ENCODER && CONFIG_AV1_DECODER
-
 #include "aom_ports/mem.h"  // ROUND_POWER_OF_TWO
 #include "aom/aomcx.h"
 #include "aom/aomdx.h"
@@ -26,9 +24,9 @@
 using libaom_test::ACMRandom;
 namespace {
 
-struct CompressedSource {
-  explicit CompressedSource(int seed) : rnd_(seed) {
-    frame_count_ = 0;
+class CompressedSource {
+ public:
+  explicit CompressedSource(int seed) : rnd_(seed), frame_count_(0) {
     aom_codec_iface_t *algo = &aom_codec_av1_cx_algo;
 
     aom_codec_enc_cfg_t cfg;
@@ -39,8 +37,15 @@ struct CompressedSource {
     cfg.rc_end_usage = AOM_CQ;
     cfg.rc_max_quantizer = max_q;
     cfg.rc_min_quantizer = max_q;
-    cfg.g_w = WIDTH;
-    cfg.g_h = HEIGHT;
+
+    // choose the picture size
+    {
+      width_ = rnd_.PseudoUniform(kWidth - 8) + 8;
+      height_ = rnd_.PseudoUniform(kHeight - 8) + 8;
+    }
+
+    cfg.g_w = width_;
+    cfg.g_h = height_;
     cfg.g_lag_in_frames = 0;
 
     aom_codec_enc_init(&enc_, algo, &cfg, 0);
@@ -48,8 +53,8 @@ struct CompressedSource {
 
   ~CompressedSource() { aom_codec_destroy(&enc_); }
 
-  const aom_codec_cx_pkt_t *readFrame() {
-    uint8_t buf[WIDTH * HEIGHT * 3 / 2] = { 0 };
+  const aom_codec_cx_pkt_t *ReadFrame() {
+    uint8_t buf[kWidth * kHeight * 3 / 2] = { 0 };
 
     // render regular pattern
     const int period = rnd_.Rand8() % 32 + 1;
@@ -57,52 +62,63 @@ struct CompressedSource {
 
     const int val_a = rnd_.Rand8();
     const int val_b = rnd_.Rand8();
+
     for (int i = 0; i < (int)sizeof buf; ++i)
       buf[i] = (i + phase) % period < period / 2 ? val_a : val_b;
 
     aom_image_t img;
-    aom_img_wrap(&img, AOM_IMG_FMT_I420, WIDTH, HEIGHT, 0, buf);
+    aom_img_wrap(&img, AOM_IMG_FMT_I420, width_, height_, 0, buf);
     aom_codec_encode(&enc_, &img, frame_count_++, 1, 0, 0);
 
     aom_codec_iter_t iter = NULL;
-    return aom_codec_get_cx_data(&enc_, &iter);
+
+    const aom_codec_cx_pkt_t *pkt = NULL;
+
+    do {
+      pkt = aom_codec_get_cx_data(&enc_, &iter);
+    } while (pkt && pkt->kind != AOM_CODEC_CX_FRAME_PKT);
+
+    return pkt;
   }
 
  private:
+  static const int kWidth = 128;
+  static const int kHeight = 128;
+
   ACMRandom rnd_;
   aom_codec_ctx_t enc_;
   int frame_count_;
-  static const int WIDTH = 32;
-  static const int HEIGHT = 32;
+  int width_, height_;
 };
 
 // lowers an aom_image_t to a easily comparable/printable form
-std::vector<int16_t> serialize(const aom_image_t *img) {
-  const int w_uv = ROUND_POWER_OF_TWO(img->d_w, img->x_chroma_shift);
-  const int h_uv = ROUND_POWER_OF_TWO(img->d_h, img->y_chroma_shift);
-  const int w[] = { (int)img->d_w, w_uv, w_uv };
-  const int h[] = { (int)img->d_h, h_uv, h_uv };
-
+std::vector<int16_t> Serialize(const aom_image_t *img) {
   std::vector<int16_t> bytes;
   bytes.reserve(img->d_w * img->d_h * 3);
-  for (int plane = 0; plane < 3; ++plane)
-    for (int r = 0; r < h[plane]; ++r)
-      for (int c = 0; c < w[plane]; ++c) {
-        const int offset = r * img->stride[plane] + c;
+  for (int plane = 0; plane < 3; ++plane) {
+    const int w = aom_img_plane_width(img, plane);
+    const int h = aom_img_plane_height(img, plane);
+
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < w; ++c) {
+        unsigned char *row = img->planes[plane] + r * img->stride[plane];
         if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH)
-          bytes.push_back(img->planes[plane][offset * 2]);
+          bytes.push_back(row[c * 2]);
         else
-          bytes.push_back(img->planes[plane][offset]);
+          bytes.push_back(row[c]);
       }
+    }
+  }
 
   return bytes;
 }
 
-struct Decoder {
+class Decoder {
+ public:
   explicit Decoder(int allowLowbitdepth) {
     aom_codec_iface_t *algo = &aom_codec_av1_dx_algo;
 
-    aom_codec_dec_cfg cfg = { 0 };
+    aom_codec_dec_cfg_t cfg = aom_codec_dec_cfg_t();
     cfg.allow_lowbitdepth = allowLowbitdepth;
 
     aom_codec_dec_init(&dec_, algo, &cfg, 0);
@@ -111,11 +127,11 @@ struct Decoder {
   ~Decoder() { aom_codec_destroy(&dec_); }
 
   std::vector<int16_t> decode(const aom_codec_cx_pkt_t *pkt) {
-    aom_codec_decode(&dec_, (uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz,
-                     NULL, 0);
+    aom_codec_decode(&dec_, static_cast<uint8_t *>(pkt->data.frame.buf),
+                     static_cast<unsigned int>(pkt->data.frame.sz), NULL, 0);
 
     aom_codec_iter_t iter = NULL;
-    return serialize(aom_codec_get_frame(&dec_, &iter));
+    return Serialize(aom_codec_get_frame(&dec_, &iter));
   }
 
  private:
@@ -124,22 +140,19 @@ struct Decoder {
 
 // Try to reveal a mismatch between LBD and HBD coding paths.
 TEST(CodingPathSync, SearchForHbdLbdMismatch) {
-  // disable test. Re-enable it locally to help diagnosing LBD/HBD mismatches.
-  // And re-enable it once both coding paths match
-  // so they don't diverge anymore.
-  return;
-
   const int count_tests = 100;
   for (int i = 0; i < count_tests; ++i) {
-    Decoder dec_HBD(0);
-    Decoder dec_LBD(1);
+    Decoder dec_hbd(0);
+    Decoder dec_lbd(1);
 
     CompressedSource enc(i);
-    const aom_codec_cx_pkt_t *frame = enc.readFrame();
-    ASSERT_EQ(dec_LBD.decode(frame), dec_HBD.decode(frame));
+    const aom_codec_cx_pkt_t *frame = enc.ReadFrame();
+
+    std::vector<int16_t> lbd_yuv = dec_lbd.decode(frame);
+    std::vector<int16_t> hbd_yuv = dec_hbd.decode(frame);
+
+    ASSERT_EQ(lbd_yuv, hbd_yuv);
   }
 }
 
 }  // namespace
-
-#endif
diff --git a/third_party/aom/test/convolve_round_test.cc b/third_party/aom/test/convolve_round_test.cc
index 6f77dbb80..4976b03c8 100644
--- a/third_party/aom/test/convolve_round_test.cc
+++ b/third_party/aom/test/convolve_round_test.cc
@@ -12,13 +12,13 @@
 #include <assert.h>
 
 #include "./av1_rtcd.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-#include "aom/aom_integer.h"
-#include "aom_ports/aom_timer.h"
 
 using libaom_test::ACMRandom;
 
@@ -68,12 +68,18 @@ class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
   virtual void SetUp() {
     const size_t block_size = 128 * 128;
     src_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, 3 * block_size * sizeof(int32_t)));
-    dst_ref_ = reinterpret_cast<uint16_t *>(src_ + block_size);
-    dst_ = dst_ref_ + block_size;
+        aom_memalign(16, block_size * sizeof(*src_)));
+    dst_ref_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, block_size * sizeof(*dst_ref_)));
+    dst_ = reinterpret_cast<uint16_t *>(
+        aom_memalign(16, block_size * sizeof(*dst_)));
   }
 
-  virtual void TearDown() { aom_free(src_); }
+  virtual void TearDown() {
+    aom_free(src_);
+    aom_free(dst_ref_);
+    aom_free(dst_);
+  }
 
   void ConvolveRoundingRun() {
     int test_num = 0;
@@ -82,7 +88,6 @@ class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
     int bits = 13;
     uint8_t *dst = 0;
     uint8_t *dst_ref = 0;
-    int diff_wide;
 
     if (data_path_ == LOWBITDEPTH_TEST) {
       dst = reinterpret_cast<uint8_t *>(dst_);
@@ -109,14 +114,24 @@ class ConvolveRoundTest : public ::testing::TestWithParam<ConvolveRoundParam> {
       GenerateBufferWithRandom(src_, src_stride, bits, w, h);
 
       func_ref_(src_, src_stride, dst_ref, dst_stride, w, h, bits);
-      func_(src_, src_stride, dst, dst_stride, w, h, bits);
-
-      diff_wide = w;
-      if (data_path_ == LOWBITDEPTH_TEST) diff_wide >>= 1;
-      for (int r = 0; r < h; ++r) {
-        for (int c = 0; c < diff_wide; ++c) {
-          ASSERT_EQ(dst_ref_[r * dst_stride + c], dst_[r * dst_stride + c])
-              << "Mismatch at r: " << r << " c: " << c << " test: " << test_num;
+      ASM_REGISTER_STATE_CHECK(
+          func_(src_, src_stride, dst, dst_stride, w, h, bits));
+
+      if (data_path_ == LOWBITDEPTH_TEST) {
+        for (int r = 0; r < h; ++r) {
+          for (int c = 0; c < w; ++c) {
+            ASSERT_EQ(dst_ref[r * dst_stride + c], dst[r * dst_stride + c])
+                << "Mismatch at r: " << r << " c: " << c << " w: " << w
+                << " h: " << h << " test: " << test_num;
+          }
+        }
+      } else {
+        for (int r = 0; r < h; ++r) {
+          for (int c = 0; c < w; ++c) {
+            ASSERT_EQ(dst_ref_[r * dst_stride + c], dst_[r * dst_stride + c])
+                << "Mismatch at r: " << r << " c: " << c << " w: " << w
+                << " h: " << h << " test: " << test_num;
+          }
         }
       }
 
diff --git a/third_party/aom/test/convolve_test.cc b/third_party/aom/test/convolve_test.cc
index a1fb2087d..ffe0b87d2 100644
--- a/third_party/aom/test/convolve_test.cc
+++ b/third_party/aom/test/convolve_test.cc
@@ -67,18 +67,7 @@ struct ConvolveFunctions {
 
 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 
-#if CONFIG_AV1 && CONFIG_EXT_PARTITION
-#define ALL_SIZES(convolve_fn)                                            \
-  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn),   \
-      make_tuple(128, 128, &convolve_fn), make_tuple(4, 4, &convolve_fn), \
-      make_tuple(8, 4, &convolve_fn), make_tuple(4, 8, &convolve_fn),     \
-      make_tuple(8, 8, &convolve_fn), make_tuple(16, 8, &convolve_fn),    \
-      make_tuple(8, 16, &convolve_fn), make_tuple(16, 16, &convolve_fn),  \
-      make_tuple(32, 16, &convolve_fn), make_tuple(16, 32, &convolve_fn), \
-      make_tuple(32, 32, &convolve_fn), make_tuple(64, 32, &convolve_fn), \
-      make_tuple(32, 64, &convolve_fn), make_tuple(64, 64, &convolve_fn)
-#else
-#define ALL_SIZES(convolve_fn)                                            \
+#define ALL_SIZES_64(convolve_fn)                                         \
   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
@@ -86,6 +75,13 @@ typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
       make_tuple(64, 64, &convolve_fn)
+
+#if CONFIG_AV1 && CONFIG_EXT_PARTITION
+#define ALL_SIZES(convolve_fn)                                          \
+  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
+      make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
+#else
+#define ALL_SIZES ALL_SIZES_64
 #endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
 
 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
@@ -414,7 +410,9 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 
   void CheckGuardBlocks() {
     for (int i = 0; i < kOutputBufferSize; ++i) {
-      if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]);
+      if (IsIndexInBorder(i)) {
+        EXPECT_EQ(255, output_[i]);
+      }
     }
   }
 
@@ -1282,9 +1280,9 @@ const ConvolveFunctions convolve12_avx2(
     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
     wrap_convolve8_avg_c_12, 12);
-const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
-                                               ALL_SIZES(convolve10_avx2),
-                                               ALL_SIZES(convolve12_avx2) };
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2),
+                                               ALL_SIZES_64(convolve10_avx2),
+                                               ALL_SIZES_64(convolve12_avx2) };
 #else
 const ConvolveFunctions convolve8_avx2(
     aom_convolve_copy_c, aom_convolve_avg_c, aom_convolve8_horiz_avx2,
@@ -1293,7 +1291,7 @@ const ConvolveFunctions convolve8_avx2(
     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
 
-const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES_64(convolve8_avx2) };
 #endif  // CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve8_avx2));
@@ -1317,10 +1315,10 @@ const ConvolveFunctions convolve8_neon(
     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
 #endif  // HAVE_NEON_ASM
 
-const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES_64(convolve8_neon) };
 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve8_neon));
-#endif  // HAVE_NEON
+#endif  // HAVE_NEON && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
 
 // TODO(any): Make DSPR2 versions support 128x128 128x64 64x128 block sizes
 #if HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
@@ -1331,10 +1329,10 @@ const ConvolveFunctions convolve8_dspr2(
     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
 
-const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
+const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES_64(convolve8_dspr2) };
 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve8_dspr2));
-#endif  // HAVE_DSPR2
+#endif  // HAVE_DSPR2 && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
 
 // TODO(any): Make MSA versions support 128x128 128x64 64x128 block sizes
 #if HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
@@ -1345,8 +1343,8 @@ const ConvolveFunctions convolve8_msa(
     aom_scaled_horiz_c, aom_scaled_avg_horiz_c, aom_scaled_vert_c,
     aom_scaled_avg_vert_c, aom_scaled_2d_c, aom_scaled_avg_2d_c, 0);
 
-const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
+const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES_64(convolve8_msa) };
 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
                         ::testing::ValuesIn(kArrayConvolve8_msa));
-#endif  // HAVE_MSA
+#endif  // HAVE_MSA && !(CONFIG_AV1 && CONFIG_EXT_PARTITION)
 }  // namespace
diff --git a/third_party/aom/test/datarate_test.cc b/third_party/aom/test/datarate_test.cc
index 6a1b4e101..d577be35a 100644
--- a/third_party/aom/test/datarate_test.cc
+++ b/third_party/aom/test/datarate_test.cc
@@ -89,8 +89,8 @@ class DatarateTestLarge
         duration * timebase_ * cfg_.rc_target_bitrate * 1000);
 
     // Buffer should not go negative.
-    ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
-                                        << pkt->data.frame.pts;
+    ASSERT_GE(bits_in_buffer_model_, 0)
+        << "Buffer Underrun at frame " << pkt->data.frame.pts;
 
     const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
 
diff --git a/third_party/aom/test/dct16x16_test.cc b/third_party/aom/test/dct16x16_test.cc
index c2c072494..3cc0ed8c0 100644
--- a/third_party/aom/test/dct16x16_test.cc
+++ b/third_party/aom/test/dct16x16_test.cc
@@ -230,9 +230,11 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct16x16Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht16x16Param;
-typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t>
+typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
+    Dct16x16Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t>
+    Ht16x16Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
     Idct16x16Param;
 
 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
@@ -510,8 +512,8 @@ class Trans16x16TestBase {
         const int diff = dst[j] - src[j];
 #endif  // CONFIG_HIGHBITDEPTH
         const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
-                             << " at index " << j;
+        EXPECT_GE(1u, error)
+            << "Error: 16x16 IDCT has error " << error << " at index " << j;
       }
     }
   }
@@ -778,94 +780,109 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
                         ::testing::Values(make_tuple(&aom_fdct16x16_c,
                                                      &aom_idct16x16_256_add_c,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
                         ::testing::Values(make_tuple(&aom_fdct16x16_c,
                                                      &aom_idct16x16_256_add_c,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     C, Trans16x16HT,
     ::testing::Values(
-        make_tuple(&fht16x16_10, &iht16x16_10, 0, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, 1, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, 2, AOM_BITS_10),
-        make_tuple(&fht16x16_10, &iht16x16_10, 3, AOM_BITS_10),
-        make_tuple(&fht16x16_12, &iht16x16_12, 0, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, 1, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, 2, AOM_BITS_12),
-        make_tuple(&fht16x16_12, &iht16x16_12, 3, AOM_BITS_12),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
+        make_tuple(&fht16x16_10, &iht16x16_10, DCT_DCT, AOM_BITS_10),
+        make_tuple(&fht16x16_10, &iht16x16_10, ADST_DCT, AOM_BITS_10),
+        make_tuple(&fht16x16_10, &iht16x16_10, DCT_ADST, AOM_BITS_10),
+        make_tuple(&fht16x16_10, &iht16x16_10, ADST_ADST, AOM_BITS_10),
+        make_tuple(&fht16x16_12, &iht16x16_12, DCT_DCT, AOM_BITS_12),
+        make_tuple(&fht16x16_12, &iht16x16_12, ADST_DCT, AOM_BITS_12),
+        make_tuple(&fht16x16_12, &iht16x16_12, DCT_ADST, AOM_BITS_12),
+        make_tuple(&fht16x16_12, &iht16x16_12, ADST_ADST, AOM_BITS_12),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, DCT_DCT,
+                   AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, ADST_DCT,
+                   AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, DCT_ADST,
+                   AOM_BITS_8),
+        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, ADST_ADST,
+                   AOM_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
     C, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c, 3, AOM_BITS_8)));
+    ::testing::Values(make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_c, &av1_iht16x16_256_add_c,
+                                 ADST_ADST, AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans16x16DCT,
     ::testing::Values(make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_neon,
-                                 0, AOM_BITS_8)));
+                                 DCT_DCT, AOM_BITS_8)));
 #endif
 
 #if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans16x16DCT,
-    ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
-                                 &aom_idct16x16_256_add_sse2, 0, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
+                        ::testing::Values(make_tuple(
+                            &aom_fdct16x16_sse2, &aom_idct16x16_256_add_sse2,
+                            DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_DAALA_DCT16
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans16x16HT,
     ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 0, AOM_BITS_8),
+                                 DCT_DCT, AOM_BITS_8),
                       make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 1, AOM_BITS_8),
+                                 ADST_DCT, AOM_BITS_8),
                       make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 2, AOM_BITS_8),
+                                 DCT_ADST, AOM_BITS_8),
                       make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2,
-                                 3, AOM_BITS_8)));
+                                 ADST_ADST, AOM_BITS_8)));
+#endif  // CONFIG_DAALA_DCT16
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
                         ::testing::Values(make_tuple(&aom_fdct16x16_sse2,
                                                      &aom_idct16x16_256_add_c,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_DAALA_DCT16
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 0, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 3,
-                   AOM_BITS_8)));
+    ::testing::Values(make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c,
+                                 ADST_ADST, AOM_BITS_8)));
+#endif
 #endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 
 #if HAVE_MSA && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
                         ::testing::Values(make_tuple(&aom_fdct16x16_msa,
                                                      &aom_idct16x16_256_add_msa,
-                                                     0, AOM_BITS_8)));
-#if !CONFIG_EXT_TX
+                                                     DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT16
 // TODO(yaowu): re-enable this after msa versions are updated to match C.
 INSTANTIATE_TEST_CASE_P(
     DISABLED_MSA, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 0, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 1, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 2, AOM_BITS_8),
-        make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa, 3,
-                   AOM_BITS_8)));
-#endif  // !CONFIG_EXT_TX
+    ::testing::Values(make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht16x16_msa, &av1_iht16x16_256_add_msa,
+                                 ADST_ADST, AOM_BITS_8)));
+#endif  // !CONFIG_EXT_TX && !CONFIG_DAALA_DCT16
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/dct32x32_test.cc b/third_party/aom/test/dct32x32_test.cc
index 0a30f7f38..02a723a9c 100644
--- a/third_party/aom/test/dct32x32_test.cc
+++ b/third_party/aom/test/dct32x32_test.cc
@@ -363,53 +363,63 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans32x32Test,
     ::testing::Values(make_tuple(&aom_fdct32x32_c, &aom_idct32x32_1024_add_neon,
-                                 0, AOM_BITS_8),
+                                 DCT_DCT, AOM_BITS_8),
                       make_tuple(&aom_fdct32x32_rd_c,
-                                 &aom_idct32x32_1024_add_neon, 1, AOM_BITS_8)));
+                                 &aom_idct32x32_1024_add_neon, ADST_DCT,
+                                 AOM_BITS_8)));
 #endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans32x32Test,
     ::testing::Values(make_tuple(&aom_fdct32x32_sse2,
-                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
+                                 AOM_BITS_8),
                       make_tuple(&aom_fdct32x32_rd_sse2,
-                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
+                                 AOM_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans32x32Test,
-    ::testing::Values(make_tuple(&aom_fdct32x32_sse2, &aom_idct32x32_1024_add_c,
-                                 0, AOM_BITS_8),
-                      make_tuple(&aom_fdct32x32_rd_sse2,
-                                 &aom_idct32x32_1024_add_c, 1, AOM_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, Trans32x32Test,
+                        ::testing::Values(make_tuple(&aom_fdct32x32_sse2,
+                                                     &aom_idct32x32_1024_add_c,
+                                                     DCT_DCT, AOM_BITS_8),
+                                          make_tuple(&aom_fdct32x32_rd_sse2,
+                                                     &aom_idct32x32_1024_add_c,
+                                                     ADST_DCT, AOM_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 
 #if HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     AVX2, Trans32x32Test,
     ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
-                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
+                                 AOM_BITS_8),
                       make_tuple(&aom_fdct32x32_rd_avx2,
-                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
+                                 AOM_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_AVX2 && CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     AVX2, Trans32x32Test,
     ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
-                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                                 &aom_idct32x32_1024_add_sse2, DCT_DCT,
+                                 AOM_BITS_8),
                       make_tuple(&aom_fdct32x32_rd_avx2,
-                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+                                 &aom_idct32x32_1024_add_sse2, ADST_DCT,
+                                 AOM_BITS_8)));
 #endif  // HAVE_AVX2 && CONFIG_HIGHBITDEPTH
 
 #if HAVE_MSA && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     MSA, Trans32x32Test,
     ::testing::Values(make_tuple(&aom_fdct32x32_msa,
-                                 &aom_idct32x32_1024_add_msa, 0, AOM_BITS_8),
+                                 &aom_idct32x32_1024_add_msa, DCT_DCT,
+                                 AOM_BITS_8),
                       make_tuple(&aom_fdct32x32_rd_msa,
-                                 &aom_idct32x32_1024_add_msa, 1, AOM_BITS_8)));
+                                 &aom_idct32x32_1024_add_msa, ADST_DCT,
+                                 AOM_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/decode_api_test.cc b/third_party/aom/test/decode_api_test.cc
index 8b1c9d268..187c8e06a 100644
--- a/third_party/aom/test/decode_api_test.cc
+++ b/third_party/aom/test/decode_api_test.cc
@@ -44,8 +44,11 @@ TEST(DecodeAPI, InvalidParams) {
               aom_codec_dec_init(NULL, kCodecs[i], NULL, 0));
 
     EXPECT_EQ(AOM_CODEC_OK, aom_codec_dec_init(&dec, kCodecs[i], NULL, 0));
+#if !CONFIG_OBU
+    // Needs to be fixed
     EXPECT_EQ(AOM_CODEC_UNSUP_BITSTREAM,
               aom_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0));
+#endif
     EXPECT_EQ(AOM_CODEC_INVALID_PARAM,
               aom_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0));
     EXPECT_EQ(AOM_CODEC_INVALID_PARAM, aom_codec_decode(&dec, buf, 0, NULL, 0));
diff --git a/third_party/aom/test/decode_test_driver.cc b/third_party/aom/test/decode_test_driver.cc
index 5f109e092..9a465327e 100644
--- a/third_party/aom/test/decode_test_driver.cc
+++ b/third_party/aom/test/decode_test_driver.cc
@@ -59,14 +59,15 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,
     /* Vp8's implementation of PeekStream returns an error if the frame you
      * pass it is not a keyframe, so we only expect AOM_CODEC_OK on the first
      * frame, which must be a keyframe. */
-    if (video->frame_number() == 0)
-      ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: "
-                                        << aom_codec_err_to_string(res_peek);
+    if (video->frame_number() == 0) {
+      ASSERT_EQ(AOM_CODEC_OK, res_peek)
+          << "Peek return failed: " << aom_codec_err_to_string(res_peek);
+    }
   } else {
     /* The Av1 implementation of PeekStream returns an error only if the
      * data passed to it isn't a valid Av1 chunk. */
-    ASSERT_EQ(AOM_CODEC_OK, res_peek) << "Peek return failed: "
-                                      << aom_codec_err_to_string(res_peek);
+    ASSERT_EQ(AOM_CODEC_OK, res_peek)
+        << "Peek return failed: " << aom_codec_err_to_string(res_peek);
   }
 }
 
diff --git a/third_party/aom/test/dering_test.cc b/third_party/aom/test/dering_test.cc
index 195a60ff8..6b76561c8 100644
--- a/third_party/aom/test/dering_test.cc
+++ b/third_party/aom/test/dering_test.cc
@@ -17,7 +17,7 @@
 #include "./aom_config.h"
 #include "./av1_rtcd.h"
 #include "aom_ports/aom_timer.h"
-#include "av1/common/od_dering.h"
+#include "av1/common/cdef_block.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
@@ -27,8 +27,7 @@ using libaom_test::ACMRandom;
 
 namespace {
 
-typedef std::tr1::tuple<od_filter_dering_direction_func,
-                        od_filter_dering_direction_func, int>
+typedef std::tr1::tuple<cdef_direction_func, cdef_direction_func, int>
     dering_dir_param_t;
 
 class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
@@ -44,19 +43,18 @@ class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
 
  protected:
   int bsize;
-  od_filter_dering_direction_func dering;
-  od_filter_dering_direction_func ref_dering;
+  cdef_direction_func dering;
+  cdef_direction_func ref_dering;
 };
 
 typedef CDEFDeringDirTest CDEFDeringSpeedTest;
 
-void test_dering(int bsize, int iterations,
-                 od_filter_dering_direction_func dering,
-                 od_filter_dering_direction_func ref_dering) {
+void test_dering(int bsize, int iterations, cdef_direction_func dering,
+                 cdef_direction_func ref_dering) {
   const int size = 8;
-  const int ysize = size + 2 * OD_FILT_VBORDER;
+  const int ysize = size + 2 * CDEF_VBORDER;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED(16, uint16_t, s[ysize * OD_FILT_BSTRIDE]);
+  DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
   DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
   DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
   memset(ref_d, 0, sizeof(ref_d));
@@ -80,38 +78,36 @@ void test_dering(int bsize, int iterations,
               if (boundary) {
                 if (boundary & 1) {  // Left
                   for (int i = 0; i < ysize; i++)
-                    for (int j = 0; j < OD_FILT_HBORDER; j++)
-                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                    for (int j = 0; j < CDEF_HBORDER; j++)
+                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
                 }
                 if (boundary & 2) {  // Right
                   for (int i = 0; i < ysize; i++)
-                    for (int j = OD_FILT_HBORDER + size; j < OD_FILT_BSTRIDE;
-                         j++)
-                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                    for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
+                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
                 }
                 if (boundary & 4) {  // Above
-                  for (int i = 0; i < OD_FILT_VBORDER; i++)
-                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
-                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                  for (int i = 0; i < CDEF_VBORDER; i++)
+                    for (int j = 0; j < CDEF_BSTRIDE; j++)
+                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
                 }
                 if (boundary & 8) {  // Below
-                  for (int i = OD_FILT_VBORDER + size; i < ysize; i++)
-                    for (int j = 0; j < OD_FILT_BSTRIDE; j++)
-                      s[i * OD_FILT_BSTRIDE + j] = OD_DERING_VERY_LARGE;
+                  for (int i = CDEF_VBORDER + size; i < ysize; i++)
+                    for (int j = 0; j < CDEF_BSTRIDE; j++)
+                      s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
                 }
               }
               for (dir = 0; dir < 8; dir++) {
                 for (threshold = 0; threshold < 64 << (depth - 8) && !error;
                      threshold += (1 + 4 * !!boundary) << (depth - 8)) {
-                  ref_dering(ref_d, size, s + OD_FILT_HBORDER +
-                                              OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                  ref_dering(ref_d, size,
+                             s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
                              threshold, dir, damping);
                   // If dering and ref_dering are the same, we're just testing
                   // speed
                   if (dering != ref_dering)
                     ASM_REGISTER_STATE_CHECK(dering(
-                        d, size,
-                        s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                        d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
                         threshold, dir, damping));
                   if (ref_dering != dering) {
                     for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error;
@@ -146,9 +142,8 @@ void test_dering(int bsize, int iterations,
                       << std::endl;
 }
 
-void test_dering_speed(int bsize, int iterations,
-                       od_filter_dering_direction_func dering,
-                       od_filter_dering_direction_func ref_dering) {
+void test_dering_speed(int bsize, int iterations, cdef_direction_func dering,
+                       cdef_direction_func ref_dering) {
   aom_usec_timer ref_timer;
   aom_usec_timer timer;
 
@@ -173,7 +168,7 @@ void test_dering_speed(int bsize, int iterations,
       << "SIMD time: " << elapsed_time << " us" << std::endl;
 }
 
-typedef int (*find_dir_t)(const od_dering_in *img, int stride, int32_t *var,
+typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
                           int coeff_shift);
 
 typedef std::tr1::tuple<find_dir_t, find_dir_t> find_dir_param_t;
@@ -196,9 +191,9 @@ class CDEFDeringFindDirTest
 
 typedef CDEFDeringFindDirTest CDEFDeringFindDirSpeedTest;
 
-void test_finddir(int (*finddir)(const od_dering_in *img, int stride,
-                                 int32_t *var, int coeff_shift),
-                  int (*ref_finddir)(const od_dering_in *img, int stride,
+void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
+                                 int coeff_shift),
+                  int (*ref_finddir)(const uint16_t *img, int stride,
                                      int32_t *var, int coeff_shift)) {
   const int size = 8;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -238,9 +233,9 @@ void test_finddir(int (*finddir)(const od_dering_in *img, int stride,
                       << std::endl;
 }
 
-void test_finddir_speed(int (*finddir)(const od_dering_in *img, int stride,
+void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
                                        int32_t *var, int coeff_shift),
-                        int (*ref_finddir)(const od_dering_in *img, int stride,
+                        int (*ref_finddir)(const uint16_t *img, int stride,
                                            int32_t *var, int coeff_shift)) {
   aom_usec_timer ref_timer;
   aom_usec_timer timer;
@@ -289,99 +284,99 @@ using std::tr1::make_tuple;
 // hard to support, so optimizations for this target are disabled.
 #if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFDeringDirTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_sse2,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringDirTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_sse2,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                     &cdef_find_dir_c)));
 #endif
 #if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFDeringDirTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringDirTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_ssse3,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFDeringDirTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringDirTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_sse4_1,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFDeringDirTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_neon,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringDirTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_neon,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_neon,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                     &cdef_find_dir_c)));
 #endif
 
 // Test speed for all supported architectures
 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, CDEFDeringSpeedTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse2,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_sse2,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse2,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_sse2,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSE2, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_sse2,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse2,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, CDEFDeringSpeedTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_ssse3,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_ssse3,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_ssse3,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_ssse3,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSSE3, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_ssse3,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, CDEFDeringSpeedTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_sse4_1,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_sse4_1,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_sse4_1,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_sse4_1,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(SSE4_1, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_sse4_1,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, CDEFDeringSpeedTest,
-    ::testing::Values(make_tuple(&od_filter_dering_direction_4x4_neon,
-                                 &od_filter_dering_direction_4x4_c, 4),
-                      make_tuple(&od_filter_dering_direction_8x8_neon,
-                                 &od_filter_dering_direction_8x8_c, 8)));
+INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringSpeedTest,
+                        ::testing::Values(make_tuple(&cdef_direction_4x4_neon,
+                                                     &cdef_direction_4x4_c, 4),
+                                          make_tuple(&cdef_direction_8x8_neon,
+                                                     &cdef_direction_8x8_c,
+                                                     8)));
 INSTANTIATE_TEST_CASE_P(NEON, CDEFDeringFindDirSpeedTest,
-                        ::testing::Values(make_tuple(&od_dir_find8_neon,
-                                                     &od_dir_find8_c)));
+                        ::testing::Values(make_tuple(&cdef_find_dir_neon,
+                                                     &cdef_find_dir_c)));
 #endif
 
 #endif  // defined(_WIN64) || !defined(_MSC_VER)
diff --git a/third_party/aom/test/encode_test_driver.cc b/third_party/aom/test/encode_test_driver.cc
index ec168e969..6941f0148 100644
--- a/third_party/aom/test/encode_test_driver.cc
+++ b/third_party/aom/test/encode_test_driver.cc
@@ -149,11 +149,6 @@ static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
                         int *const mismatch_row, int *const mismatch_col,
                         int *const mismatch_plane, int *const mismatch_pix1,
                         int *const mismatch_pix2) {
-  const unsigned int w_y = img1->d_w;
-  const unsigned int h_y = img1->d_h;
-  const unsigned int w_uv = ROUND_POWER_OF_TWO(w_y, img1->x_chroma_shift);
-  const unsigned int h_uv = ROUND_POWER_OF_TWO(h_y, img1->y_chroma_shift);
-
   if (img1->fmt != img2->fmt || img1->cs != img2->cs ||
       img1->d_w != img2->d_w || img1->d_h != img2->d_h) {
     if (mismatch_row != NULL) *mismatch_row = -1;
@@ -161,28 +156,15 @@ static bool compare_img(const aom_image_t *img1, const aom_image_t *img2,
     return false;
   }
 
-  if (!compare_plane(img1->planes[AOM_PLANE_Y], img1->stride[AOM_PLANE_Y],
-                     img2->planes[AOM_PLANE_Y], img2->stride[AOM_PLANE_Y], w_y,
-                     h_y, mismatch_row, mismatch_col, mismatch_pix1,
-                     mismatch_pix2)) {
-    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_Y;
-    return false;
-  }
-
-  if (!compare_plane(img1->planes[AOM_PLANE_U], img1->stride[AOM_PLANE_U],
-                     img2->planes[AOM_PLANE_U], img2->stride[AOM_PLANE_U], w_uv,
-                     h_uv, mismatch_row, mismatch_col, mismatch_pix1,
-                     mismatch_pix2)) {
-    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U;
-    return false;
-  }
-
-  if (!compare_plane(img1->planes[AOM_PLANE_V], img1->stride[AOM_PLANE_V],
-                     img2->planes[AOM_PLANE_V], img2->stride[AOM_PLANE_V], w_uv,
-                     h_uv, mismatch_row, mismatch_col, mismatch_pix1,
-                     mismatch_pix2)) {
-    if (mismatch_plane != NULL) *mismatch_plane = AOM_PLANE_U;
-    return false;
+  for (int plane = 0; plane < 3; plane++) {
+    if (!compare_plane(img1->planes[plane], img1->stride[plane],
+                       img2->planes[plane], img2->stride[plane],
+                       aom_img_plane_width(img1, plane),
+                       aom_img_plane_height(img1, plane), mismatch_row,
+                       mismatch_col, mismatch_pix1, mismatch_pix2)) {
+      if (mismatch_plane != NULL) *mismatch_plane = plane;
+      return false;
+    }
   }
 
   return true;
diff --git a/third_party/aom/test/end_to_end_test.cc b/third_party/aom/test/end_to_end_test.cc
index 93bc1625e..e1a833ec4 100644
--- a/third_party/aom/test/end_to_end_test.cc
+++ b/third_party/aom/test/end_to_end_test.cc
@@ -128,13 +128,11 @@ class EndToEndTest
       encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 1);
       encoder->Control(AV1E_SET_TILE_COLUMNS, 4);
       encoder->Control(AOME_SET_CPUUSED, cpu_used_);
-#if CONFIG_PALETTE
       // Test screen coding tools at cpu_used = 1 && encoding mode is two-pass.
       if (cpu_used_ == 1 && encoding_mode_ == ::libaom_test::kTwoPassGood)
         encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
       else
         encoder->Control(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
-#endif  // CONFIG_PALETTE
       if (encoding_mode_ != ::libaom_test::kRealTime) {
         encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
         encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
diff --git a/third_party/aom/test/fdct4x4_test.cc b/third_party/aom/test/fdct4x4_test.cc
index e1bd61254..5fad1667b 100644
--- a/third_party/aom/test/fdct4x4_test.cc
+++ b/third_party/aom/test/fdct4x4_test.cc
@@ -36,9 +36,10 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 using libaom_test::FhtFunc;
 
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t, int>
+typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t, int>
     Dct4x4Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t, int> Ht4x4Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int>
+    Ht4x4Param;
 
 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                  TxfmParam * /*txfm_param*/) {
@@ -211,119 +212,139 @@ using std::tr1::make_tuple;
 
 INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
                         ::testing::Values(make_tuple(&aom_fdct4x4_c,
-                                                     &aom_idct4x4_16_add_c, 0,
-                                                     AOM_BITS_8, 16)));
+                                                     &aom_idct4x4_16_add_c,
+                                                     DCT_DCT, AOM_BITS_8, 16)));
 
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     DISABLED_C, Trans4x4HT,
-    ::testing::Values(make_tuple(&fht4x4_12, &iht4x4_12, 0, AOM_BITS_12, 16),
-                      make_tuple(&fht4x4_12, &iht4x4_12, 1, AOM_BITS_12, 16),
-                      make_tuple(&fht4x4_12, &iht4x4_12, 2, AOM_BITS_12, 16),
-                      make_tuple(&fht4x4_12, &iht4x4_12, 3, AOM_BITS_12, 16)));
+    ::testing::Values(
+        make_tuple(&fht4x4_12, &iht4x4_12, DCT_DCT, AOM_BITS_12, 16),
+        make_tuple(&fht4x4_12, &iht4x4_12, ADST_DCT, AOM_BITS_12, 16),
+        make_tuple(&fht4x4_12, &iht4x4_12, DCT_ADST, AOM_BITS_12, 16),
+        make_tuple(&fht4x4_12, &iht4x4_12, ADST_ADST, AOM_BITS_12, 16)));
 
 INSTANTIATE_TEST_CASE_P(
     C, Trans4x4HT,
     ::testing::Values(
-        make_tuple(&fht4x4_10, &iht4x4_10, 0, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, 1, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, 2, AOM_BITS_10, 16),
-        make_tuple(&fht4x4_10, &iht4x4_10, 3, AOM_BITS_10, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
+        make_tuple(&fht4x4_10, &iht4x4_10, DCT_DCT, AOM_BITS_10, 16),
+        make_tuple(&fht4x4_10, &iht4x4_10, ADST_DCT, AOM_BITS_10, 16),
+        make_tuple(&fht4x4_10, &iht4x4_10, DCT_ADST, AOM_BITS_10, 16),
+        make_tuple(&fht4x4_10, &iht4x4_10, ADST_ADST, AOM_BITS_10, 16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_DCT, AOM_BITS_8,
+                   16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_DCT, AOM_BITS_8,
+                   16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_ADST, AOM_BITS_8,
+                   16),
+        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_ADST, AOM_BITS_8,
+                   16)));
 #else
 INSTANTIATE_TEST_CASE_P(
     C, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
+    ::testing::Values(make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_DCT,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_DCT,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, DCT_ADST,
+                                 AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_c, ADST_ADST,
+                                 AOM_BITS_8, 16)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     C, Trans4x4WHT,
-    ::testing::Values(
-        make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, 0, AOM_BITS_10, 16),
-        make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, 0, AOM_BITS_12, 16),
-        make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0, AOM_BITS_8, 16)));
+    ::testing::Values(make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_10, DCT_DCT,
+                                 AOM_BITS_10, 16),
+                      make_tuple(&av1_highbd_fwht4x4_c, &iwht4x4_12, DCT_DCT,
+                                 AOM_BITS_12, 16),
+                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, DCT_DCT,
+                                 AOM_BITS_8, 16)));
 #else
 INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
                         ::testing::Values(make_tuple(&av1_fwht4x4_c,
-                                                     &aom_iwht4x4_16_add_c, 0,
-                                                     AOM_BITS_8, 16)));
+                                                     &aom_iwht4x4_16_add_c,
+                                                     DCT_DCT, AOM_BITS_8, 16)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
                         ::testing::Values(make_tuple(&aom_fdct4x4_c,
                                                      &aom_idct4x4_16_add_neon,
-                                                     0, AOM_BITS_8, 16)));
+                                                     DCT_DCT, AOM_BITS_8, 16)));
 #endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_NEON && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 0, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 1, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 2, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon, 3, AOM_BITS_8, 16)));
+    ::testing::Values(make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
+                                 DCT_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
+                                 ADST_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
+                                 DCT_ADST, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_c, &av1_iht4x4_16_add_neon,
+                                 ADST_ADST, AOM_BITS_8, 16)));
 #endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
 
-#if HAVE_SSE2
+#if HAVE_SSE2 && !CONFIG_DAALA_DCT4
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4WHT,
-    ::testing::Values(make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, 0,
+    ::testing::Values(make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_c, DCT_DCT,
                                  AOM_BITS_8, 16),
-                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_sse2, 0,
-                                 AOM_BITS_8, 16)));
+                      make_tuple(&av1_fwht4x4_c, &aom_iwht4x4_16_add_sse2,
+                                 DCT_DCT, AOM_BITS_8, 16)));
 #endif
 
 #if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
                         ::testing::Values(make_tuple(&aom_fdct4x4_sse2,
                                                      &aom_idct4x4_16_add_sse2,
-                                                     0, AOM_BITS_8, 16)));
+                                                     DCT_DCT, AOM_BITS_8, 16)));
+#if !CONFIG_DAALA_DCT4
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4HT,
-    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 0,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 1,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 2,
-                                 AOM_BITS_8, 16),
-                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 3,
-                                 AOM_BITS_8, 16)));
+    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
+                                 DCT_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
+                                 ADST_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
+                                 DCT_ADST, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2,
+                                 ADST_ADST, AOM_BITS_8, 16)));
+#endif  // !CONFIG_DAALA_DCT4
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 
-#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 0, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 1, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 2, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c, 3, AOM_BITS_8, 16)));
-#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
+    ::testing::Values(make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
+                                 DCT_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
+                                 ADST_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
+                                 DCT_ADST, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_c,
+                                 ADST_ADST, AOM_BITS_8, 16)));
+#endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_DCT4
 
 #if HAVE_MSA && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
                         ::testing::Values(make_tuple(&aom_fdct4x4_msa,
-                                                     &aom_idct4x4_16_add_msa, 0,
-                                                     AOM_BITS_8, 16)));
-#if !CONFIG_EXT_TX
+                                                     &aom_idct4x4_16_add_msa,
+                                                     DCT_DCT, AOM_BITS_8, 16)));
+#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT4
 INSTANTIATE_TEST_CASE_P(
     MSA, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 0, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 1, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 2, AOM_BITS_8, 16),
-        make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa, 3, AOM_BITS_8,
-                   16)));
-#endif  // !CONFIG_EXT_TX
+    ::testing::Values(make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
+                                 DCT_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
+                                 ADST_DCT, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
+                                 DCT_ADST, AOM_BITS_8, 16),
+                      make_tuple(&av1_fht4x4_msa, &av1_iht4x4_16_add_msa,
+                                 ADST_ADST, AOM_BITS_8, 16)));
+#endif  // !CONFIG_EXT_TX && && !CONFIG_DAALA_DCT4
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/fdct8x8_test.cc b/third_party/aom/test/fdct8x8_test.cc
index 62cdf6229..99ae8d677 100644
--- a/third_party/aom/test/fdct8x8_test.cc
+++ b/third_party/aom/test/fdct8x8_test.cc
@@ -44,8 +44,9 @@ typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
                         const TxfmParam *txfm_param);
 
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, aom_bit_depth_t> Dct8x8Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, aom_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<FdctFunc, IdctFunc, TX_TYPE, aom_bit_depth_t>
+    Dct8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t> Ht8x8Param;
 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, aom_bit_depth_t> Idct8x8Param;
 
 void reference_8x8_dct_1d(const double in[8], double out[8]) {
@@ -485,8 +486,8 @@ class FwdTrans8x8TestBase {
         const int diff = dst[j] - ref[j];
 #endif
         const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error
-                             << " at index " << j;
+        EXPECT_EQ(0u, error)
+            << "Error: 8x8 IDCT has error " << error << " at index " << j;
       }
     }
   }
@@ -614,108 +615,124 @@ using std::tr1::make_tuple;
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_c,
-                                                     &aom_idct8x8_64_add_c, 0,
-                                                     AOM_BITS_8)));
+                                                     &aom_idct8x8_64_add_c,
+                                                     DCT_DCT, AOM_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_c,
-                                                     &aom_idct8x8_64_add_c, 0,
-                                                     AOM_BITS_8)));
+                                                     &aom_idct8x8_64_add_c,
+                                                     DCT_DCT, AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     C, FwdTrans8x8HT,
     ::testing::Values(
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
-        make_tuple(&fht8x8_10, &iht8x8_10, 0, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, 1, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, 2, AOM_BITS_10),
-        make_tuple(&fht8x8_10, &iht8x8_10, 3, AOM_BITS_10),
-        make_tuple(&fht8x8_12, &iht8x8_12, 0, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, 1, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, 2, AOM_BITS_12),
-        make_tuple(&fht8x8_12, &iht8x8_12, 3, AOM_BITS_12),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_DCT, AOM_BITS_8),
+        make_tuple(&fht8x8_10, &iht8x8_10, DCT_DCT, AOM_BITS_10),
+        make_tuple(&fht8x8_10, &iht8x8_10, ADST_DCT, AOM_BITS_10),
+        make_tuple(&fht8x8_10, &iht8x8_10, DCT_ADST, AOM_BITS_10),
+        make_tuple(&fht8x8_10, &iht8x8_10, ADST_ADST, AOM_BITS_10),
+        make_tuple(&fht8x8_12, &iht8x8_12, DCT_DCT, AOM_BITS_12),
+        make_tuple(&fht8x8_12, &iht8x8_12, ADST_DCT, AOM_BITS_12),
+        make_tuple(&fht8x8_12, &iht8x8_12, DCT_ADST, AOM_BITS_12),
+        make_tuple(&fht8x8_12, &iht8x8_12, ADST_ADST, AOM_BITS_12),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_DCT, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_ADST, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_ADST,
+                   AOM_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
     C, FwdTrans8x8HT,
     ::testing::Values(
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_DCT, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_DCT, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, DCT_ADST, AOM_BITS_8),
+        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_c, ADST_ADST,
+                   AOM_BITS_8)));
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_neon,
                                                      &aom_idct8x8_64_add_neon,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
 #endif  // HAVE_NEON_ASM && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_NEON && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
     NEON, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 0, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon, 3, AOM_BITS_8)));
+    ::testing::Values(make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_c, &av1_iht8x8_64_add_neon,
+                                 ADST_ADST, AOM_BITS_8)));
 #endif  // HAVE_NEON && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
                                                      &aom_idct8x8_64_add_sse2,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_DAALA_DCT8
 INSTANTIATE_TEST_CASE_P(
     SSE2, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 0, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 3, AOM_BITS_8)));
+    ::testing::Values(make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2,
+                                 ADST_ADST, AOM_BITS_8)));
+#endif  // !CONFIG_DAALA_DCT8
 #endif  // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_sse2,
-                                                     &aom_idct8x8_64_add_c, 0,
-                                                     AOM_BITS_8)));
-
+                                                     &aom_idct8x8_64_add_c,
+                                                     DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_DAALA_DCT8
 INSTANTIATE_TEST_CASE_P(
     SSE2, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 0, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
-
+    ::testing::Values(make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c,
+                                 ADST_ADST, AOM_BITS_8)));
+#endif  // !CONFIG_DAALA_DCT8
 #endif  // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
 
 #if HAVE_SSSE3 && ARCH_X86_64
 INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_ssse3,
                                                      &aom_idct8x8_64_add_ssse3,
-                                                     0, AOM_BITS_8)));
+                                                     DCT_DCT, AOM_BITS_8)));
 #endif
 
 #if HAVE_MSA && !CONFIG_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
                         ::testing::Values(make_tuple(&aom_fdct8x8_msa,
-                                                     &aom_idct8x8_64_add_msa, 0,
-                                                     AOM_BITS_8)));
-#if !CONFIG_EXT_TX
+                                                     &aom_idct8x8_64_add_msa,
+                                                     DCT_DCT, AOM_BITS_8)));
+#if !CONFIG_EXT_TX && !CONFIG_DAALA_DCT8
 INSTANTIATE_TEST_CASE_P(
     MSA, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 0, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 1, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 2, AOM_BITS_8),
-        make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa, 3, AOM_BITS_8)));
-#endif  // !CONFIG_EXT_TX
+    ::testing::Values(make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
+                                 DCT_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
+                                 ADST_DCT, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
+                                 DCT_ADST, AOM_BITS_8),
+                      make_tuple(&av1_fht8x8_msa, &av1_iht8x8_64_add_msa,
+                                 ADST_ADST, AOM_BITS_8)));
+#endif  // !CONFIG_EXT_TX && !CONFIG_DAALA_DCT8
 #endif  // HAVE_MSA && !CONFIG_HIGHBITDEPTH
 }  // namespace
diff --git a/third_party/aom/test/hiprec_convolve_test.cc b/third_party/aom/test/hiprec_convolve_test.cc
index 0b34c99c9..78e109c9d 100644
--- a/third_party/aom/test/hiprec_convolve_test.cc
+++ b/third_party/aom/test/hiprec_convolve_test.cc
@@ -22,11 +22,13 @@ using libaom_test::AV1HighbdHiprecConvolve::AV1HighbdHiprecConvolveTest;
 
 namespace {
 
+#if HAVE_SSE2
 TEST_P(AV1HiprecConvolveTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
 
 INSTANTIATE_TEST_CASE_P(SSE2, AV1HiprecConvolveTest,
                         libaom_test::AV1HiprecConvolve::BuildParams(
                             aom_convolve8_add_src_hip_sse2));
+#endif
 
 #if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
 TEST_P(AV1HighbdHiprecConvolveTest, CheckOutput) {
diff --git a/third_party/aom/test/hiprec_convolve_test_util.cc b/third_party/aom/test/hiprec_convolve_test_util.cc
index f5661ec07..4dee6ab4d 100644
--- a/third_party/aom/test/hiprec_convolve_test_util.cc
+++ b/third_party/aom/test/hiprec_convolve_test_util.cc
@@ -100,9 +100,9 @@ void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
               vkernel, 16, out_w, out_h);
 
     for (j = 0; j < out_w * out_h; ++j)
-      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
-                                       << " = (" << (j % out_w) << ", "
-                                       << (j / out_w) << ") on iteration " << i;
+      ASSERT_EQ(output[j], output2[j])
+          << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+          << (j / out_w) << ") on iteration " << i;
   }
   delete[] input_;
   delete[] output;
@@ -175,9 +175,9 @@ void AV1HighbdHiprecConvolveTest::RunCheckOutput(
               hkernel, 16, vkernel, 16, out_w, out_h, bd);
 
     for (j = 0; j < out_w * out_h; ++j)
-      ASSERT_EQ(output[j], output2[j]) << "Pixel mismatch at index " << j
-                                       << " = (" << (j % out_w) << ", "
-                                       << (j / out_w) << ") on iteration " << i;
+      ASSERT_EQ(output[j], output2[j])
+          << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
+          << (j / out_w) << ") on iteration " << i;
   }
   delete[] input;
   delete[] output;
diff --git a/third_party/aom/test/intrapred_test.cc b/third_party/aom/test/intrapred_test.cc
index 5dd8c00be..12da1601c 100644
--- a/third_party/aom/test/intrapred_test.cc
+++ b/third_party/aom/test/intrapred_test.cc
@@ -29,27 +29,35 @@ using libaom_test::ACMRandom;
 
 const int count_test_block = 100000;
 
-typedef void (*IntraPred)(uint16_t *dst, ptrdiff_t stride,
-                          const uint16_t *above, const uint16_t *left, int bps);
+typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
+                                const uint16_t *above, const uint16_t *left,
+                                int bps);
+typedef void (*IntraPred)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
+                          const uint8_t *left);
 
+template <typename FuncType>
 struct IntraPredFunc {
-  IntraPredFunc(IntraPred pred = NULL, IntraPred ref = NULL,
-                int block_size_value = 0, int bit_depth_value = 0)
-      : pred_fn(pred), ref_fn(ref), block_size(block_size_value),
-        bit_depth(bit_depth_value) {}
-
-  IntraPred pred_fn;
-  IntraPred ref_fn;
-  int block_size;
+  IntraPredFunc(FuncType pred = NULL, FuncType ref = NULL,
+                int block_width_value = 0, int block_height_value = 0,
+                int bit_depth_value = 0)
+      : pred_fn(pred), ref_fn(ref), block_width(block_width_value),
+        block_height(block_height_value), bit_depth(bit_depth_value) {}
+
+  FuncType pred_fn;
+  FuncType ref_fn;
+  int block_width;
+  int block_height;
   int bit_depth;
 };
 
-class AV1IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
+template <typename FuncType, typename Pixel>
+class AV1IntraPredTest
+    : public ::testing::TestWithParam<IntraPredFunc<FuncType> > {
  public:
-  void RunTest(uint16_t *left_col, uint16_t *above_data, uint16_t *dst,
-               uint16_t *ref_dst) {
+  void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
     ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int block_size = params_.block_size;
+    const int block_width = params_.block_width;
+    const int block_height = params_.block_height;
     above_row_ = above_data + 16;
     left_col_ = left_col;
     dst_ = dst;
@@ -57,14 +65,14 @@ class AV1IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
     int error_count = 0;
     for (int i = 0; i < count_test_block; ++i) {
       // Fill edges with random data, try first with saturated values.
-      for (int x = -1; x <= block_size * 2; x++) {
+      for (int x = -1; x <= block_width * 2; x++) {
         if (i == 0) {
           above_row_[x] = mask_;
         } else {
           above_row_[x] = rnd.Rand16() & mask_;
         }
       }
-      for (int y = 0; y < block_size; y++) {
+      for (int y = 0; y < block_height; y++) {
         if (i == 0) {
           left_col_[y] = mask_;
         } else {
@@ -79,43 +87,59 @@ class AV1IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
 
  protected:
   virtual void SetUp() {
-    params_ = GetParam();
-    stride_ = params_.block_size * 3;
+    params_ = this->GetParam();
+    stride_ = params_.block_width * 3;
     mask_ = (1 << params_.bit_depth) - 1;
   }
 
-  void Predict() {
-    const int bit_depth = params_.bit_depth;
-    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
-    ASM_REGISTER_STATE_CHECK(
-        params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
-  }
+  virtual void Predict() = 0;
 
   void CheckPrediction(int test_case_number, int *error_count) const {
     // For each pixel ensure that the calculated value is the same as reference.
-    const int block_size = params_.block_size;
-    for (int y = 0; y < block_size; y++) {
-      for (int x = 0; x < block_size; x++) {
+    const int block_width = params_.block_width;
+    const int block_height = params_.block_height;
+    for (int y = 0; y < block_height; y++) {
+      for (int x = 0; x < block_width; x++) {
         *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_];
         if (*error_count == 1) {
           ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_])
-              << " Failed on Test Case Number " << test_case_number;
+              << " Failed on Test Case Number " << test_case_number
+              << " location: x = " << x << " y = " << y;
         }
       }
     }
   }
 
-  uint16_t *above_row_;
-  uint16_t *left_col_;
-  uint16_t *dst_;
-  uint16_t *ref_dst_;
+  Pixel *above_row_;
+  Pixel *left_col_;
+  Pixel *dst_;
+  Pixel *ref_dst_;
   ptrdiff_t stride_;
   int mask_;
 
-  IntraPredFunc params_;
+  IntraPredFunc<FuncType> params_;
+};
+
+class HighbdIntraPredTest : public AV1IntraPredTest<HighbdIntraPred, uint16_t> {
+ protected:
+  void Predict() {
+    const int bit_depth = params_.bit_depth;
+    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
+    ASM_REGISTER_STATE_CHECK(
+        params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
+  }
+};
+
+class LowbdIntraPredTest : public AV1IntraPredTest<IntraPred, uint8_t> {
+ protected:
+  void Predict() {
+    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
+    ASM_REGISTER_STATE_CHECK(
+        params_.pred_fn(dst_, stride_, above_row_, left_col_));
+  }
 };
 
-TEST_P(AV1IntraPredTest, IntraPredTests) {
+TEST_P(HighbdIntraPredTest, Bitexact) {
   // max block size is 32
   DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]);
   DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]);
@@ -124,114 +148,186 @@ TEST_P(AV1IntraPredTest, IntraPredTests) {
   RunTest(left_col, above_data, dst, ref_dst);
 }
 
-#if HAVE_SSE2
+TEST_P(LowbdIntraPredTest, Bitexact) {
+  // max block size is 32
+  DECLARE_ALIGNED(16, uint8_t, left_col[2 * 32]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[2 * 32 + 32]);
+  DECLARE_ALIGNED(16, uint8_t, dst[3 * 32 * 32]);
+  DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 32 * 32]);
+  RunTest(left_col, above_data, dst, ref_dst);
+}
+
+// -----------------------------------------------------------------------------
+// High Bit Depth Tests
+
+#define highbd_entry(type, width, height, opt, bd)                          \
+  IntraPredFunc<HighbdIntraPred>(                                           \
+      &aom_highbd_##type##_predictor_##width##x##height##_##opt,            \
+      &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
+      bd)
+
+#define highbd_intrapred(type, opt, bd)                                       \
+  highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd),       \
+      highbd_entry(type, 8, 4, opt, bd), highbd_entry(type, 8, 8, opt, bd),   \
+      highbd_entry(type, 8, 16, opt, bd), highbd_entry(type, 16, 8, opt, bd), \
+      highbd_entry(type, 16, 16, opt, bd),                                    \
+      highbd_entry(type, 16, 32, opt, bd),                                    \
+      highbd_entry(type, 32, 16, opt, bd), highbd_entry(type, 32, 32, opt, bd)
+
 #if CONFIG_HIGHBITDEPTH
-const IntraPredFunc IntraPredTestVector8[] = {
-  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                &aom_highbd_dc_predictor_32x32_c, 32, 8),
-#if !CONFIG_ALT_INTRA
-  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                &aom_highbd_tm_predictor_16x16_c, 16, 8),
-  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                &aom_highbd_tm_predictor_32x32_c, 32, 8),
-#endif  // !CONFIG_ALT_INTRA
-
-  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                &aom_highbd_dc_predictor_4x4_c, 4, 8),
-  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                &aom_highbd_dc_predictor_8x8_c, 8, 8),
-  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                &aom_highbd_dc_predictor_16x16_c, 16, 8),
-  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
-                4, 8),
-  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
-                8, 8),
-  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                &aom_highbd_v_predictor_16x16_c, 16, 8),
-  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                &aom_highbd_v_predictor_32x32_c, 32, 8)
-#if !CONFIG_ALT_INTRA
-      ,
-  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                &aom_highbd_tm_predictor_4x4_c, 4, 8),
-  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                &aom_highbd_tm_predictor_8x8_c, 8, 8)
-#endif  // !CONFIG_ALT_INTRA
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, AV1IntraPredTest,
+#if HAVE_SSE2
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVector8[] = {
+  highbd_intrapred(dc, sse2, 8),     highbd_intrapred(dc_left, sse2, 8),
+  highbd_intrapred(dc_top, sse2, 8), highbd_intrapred(dc_128, sse2, 8),
+  highbd_intrapred(h, sse2, 8),      highbd_intrapred(v, sse2, 8),
+  highbd_entry(d117, 4, 4, sse2, 8), highbd_entry(d135, 4, 4, sse2, 8),
+  highbd_entry(d153, 4, 4, sse2, 8), highbd_entry(d45e, 4, 4, sse2, 8),
+  highbd_entry(d45e, 4, 8, sse2, 8), highbd_entry(d45e, 8, 4, sse2, 8),
+  highbd_entry(d45e, 8, 8, sse2, 8), highbd_entry(d45e, 8, 16, sse2, 8),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, HighbdIntraPredTest,
                         ::testing::ValuesIn(IntraPredTestVector8));
 
-const IntraPredFunc IntraPredTestVector10[] = {
-  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                &aom_highbd_dc_predictor_32x32_c, 32, 10),
-#if !CONFIG_ALT_INTRA
-  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                &aom_highbd_tm_predictor_16x16_c, 16, 10),
-  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                &aom_highbd_tm_predictor_32x32_c, 32, 10),
-#endif  // !CONFIG_ALT_INTRA
-  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                &aom_highbd_dc_predictor_4x4_c, 4, 10),
-  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                &aom_highbd_dc_predictor_8x8_c, 8, 10),
-  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                &aom_highbd_dc_predictor_16x16_c, 16, 10),
-  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
-                4, 10),
-  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
-                8, 10),
-  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                &aom_highbd_v_predictor_16x16_c, 16, 10),
-  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                &aom_highbd_v_predictor_32x32_c, 32, 10)
-#if !CONFIG_ALT_INTRA
-      ,
-  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                &aom_highbd_tm_predictor_4x4_c, 4, 10),
-  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                &aom_highbd_tm_predictor_8x8_c, 8, 10)
-#endif  // !CONFIG_ALT_INTRA
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, AV1IntraPredTest,
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVector10[] = {
+  highbd_intrapred(dc, sse2, 10),     highbd_intrapred(dc_left, sse2, 10),
+  highbd_intrapred(dc_top, sse2, 10), highbd_intrapred(dc_128, sse2, 10),
+  highbd_intrapred(h, sse2, 10),      highbd_intrapred(v, sse2, 10),
+  highbd_entry(d117, 4, 4, sse2, 10), highbd_entry(d135, 4, 4, sse2, 10),
+  highbd_entry(d153, 4, 4, sse2, 10), highbd_entry(d45e, 4, 4, sse2, 10),
+  highbd_entry(d45e, 4, 8, sse2, 10), highbd_entry(d45e, 8, 4, sse2, 10),
+  highbd_entry(d45e, 8, 8, sse2, 10), highbd_entry(d45e, 8, 16, sse2, 10),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, HighbdIntraPredTest,
                         ::testing::ValuesIn(IntraPredTestVector10));
 
-const IntraPredFunc IntraPredTestVector12[] = {
-  IntraPredFunc(&aom_highbd_dc_predictor_32x32_sse2,
-                &aom_highbd_dc_predictor_32x32_c, 32, 12),
-#if !CONFIG_ALT_INTRA
-  IntraPredFunc(&aom_highbd_tm_predictor_16x16_sse2,
-                &aom_highbd_tm_predictor_16x16_c, 16, 12),
-  IntraPredFunc(&aom_highbd_tm_predictor_32x32_sse2,
-                &aom_highbd_tm_predictor_32x32_c, 32, 12),
-#endif  // !CONFIG_ALT_INTRA
-  IntraPredFunc(&aom_highbd_dc_predictor_4x4_sse2,
-                &aom_highbd_dc_predictor_4x4_c, 4, 12),
-  IntraPredFunc(&aom_highbd_dc_predictor_8x8_sse2,
-                &aom_highbd_dc_predictor_8x8_c, 8, 12),
-  IntraPredFunc(&aom_highbd_dc_predictor_16x16_sse2,
-                &aom_highbd_dc_predictor_16x16_c, 16, 12),
-  IntraPredFunc(&aom_highbd_v_predictor_4x4_sse2, &aom_highbd_v_predictor_4x4_c,
-                4, 12),
-  IntraPredFunc(&aom_highbd_v_predictor_8x8_sse2, &aom_highbd_v_predictor_8x8_c,
-                8, 12),
-  IntraPredFunc(&aom_highbd_v_predictor_16x16_sse2,
-                &aom_highbd_v_predictor_16x16_c, 16, 12),
-  IntraPredFunc(&aom_highbd_v_predictor_32x32_sse2,
-                &aom_highbd_v_predictor_32x32_c, 32, 12)
-#if !CONFIG_ALT_INTRA
-      ,
-  IntraPredFunc(&aom_highbd_tm_predictor_4x4_sse2,
-                &aom_highbd_tm_predictor_4x4_c, 4, 12),
-  IntraPredFunc(&aom_highbd_tm_predictor_8x8_sse2,
-                &aom_highbd_tm_predictor_8x8_c, 8, 12)
-#endif  // !CONFIG_ALT_INTRA
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, AV1IntraPredTest,
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVector12[] = {
+  highbd_intrapred(dc, sse2, 12),     highbd_intrapred(dc_left, sse2, 12),
+  highbd_intrapred(dc_top, sse2, 12), highbd_intrapred(dc_128, sse2, 12),
+  highbd_intrapred(h, sse2, 12),      highbd_intrapred(v, sse2, 12),
+  highbd_entry(d117, 4, 4, sse2, 12), highbd_entry(d135, 4, 4, sse2, 12),
+  highbd_entry(d153, 4, 4, sse2, 12), highbd_entry(d45e, 4, 4, sse2, 12),
+  highbd_entry(d45e, 4, 8, sse2, 12), highbd_entry(d45e, 8, 4, sse2, 12),
+  highbd_entry(d45e, 8, 8, sse2, 12), highbd_entry(d45e, 8, 16, sse2, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, HighbdIntraPredTest,
                         ::testing::ValuesIn(IntraPredTestVector12));
 
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_8[] = {
+  highbd_entry(d117, 8, 8, ssse3, 8),   highbd_entry(d117, 16, 16, ssse3, 8),
+  highbd_entry(d117, 32, 32, ssse3, 8), highbd_entry(d135, 8, 8, ssse3, 8),
+  highbd_entry(d135, 16, 16, ssse3, 8), highbd_entry(d135, 32, 32, ssse3, 8),
+  highbd_entry(d153, 8, 8, ssse3, 8),   highbd_entry(d153, 16, 16, ssse3, 8),
+  highbd_entry(d153, 32, 32, ssse3, 8),
+};
+INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_8, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorSsse3_8));
+
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_10[] = {
+  highbd_entry(d117, 8, 8, ssse3, 10),   highbd_entry(d117, 16, 16, ssse3, 10),
+  highbd_entry(d117, 32, 32, ssse3, 10), highbd_entry(d135, 8, 8, ssse3, 10),
+  highbd_entry(d135, 16, 16, ssse3, 10), highbd_entry(d135, 32, 32, ssse3, 10),
+  highbd_entry(d153, 8, 8, ssse3, 10),   highbd_entry(d153, 16, 16, ssse3, 10),
+  highbd_entry(d153, 32, 32, ssse3, 10),
+};
+INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_10, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorSsse3_10));
+
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorSsse3_12[] = {
+  highbd_entry(d117, 8, 8, ssse3, 12),   highbd_entry(d117, 16, 16, ssse3, 12),
+  highbd_entry(d117, 32, 32, ssse3, 12), highbd_entry(d135, 8, 8, ssse3, 12),
+  highbd_entry(d135, 16, 16, ssse3, 12), highbd_entry(d135, 32, 32, ssse3, 12),
+  highbd_entry(d153, 8, 8, ssse3, 12),   highbd_entry(d153, 16, 16, ssse3, 12),
+  highbd_entry(d153, 32, 32, ssse3, 12),
+};
+INSTANTIATE_TEST_CASE_P(SSSE3_TO_C_12, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorSsse3_12));
+#endif  // HAVE_SSSE3
+
+#if HAVE_AVX2
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_8[] = {
+  highbd_entry(d45e, 16, 8, avx2, 8),  highbd_entry(d45e, 16, 16, avx2, 8),
+  highbd_entry(d45e, 16, 32, avx2, 8), highbd_entry(d45e, 32, 16, avx2, 8),
+  highbd_entry(d45e, 32, 32, avx2, 8),
+};
+INSTANTIATE_TEST_CASE_P(AVX2_TO_C_8, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorAvx2_8));
+
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_10[] = {
+  highbd_entry(d45e, 16, 8, avx2, 10),  highbd_entry(d45e, 16, 16, avx2, 10),
+  highbd_entry(d45e, 16, 32, avx2, 10), highbd_entry(d45e, 32, 16, avx2, 10),
+  highbd_entry(d45e, 32, 32, avx2, 10),
+};
+INSTANTIATE_TEST_CASE_P(AVX2_TO_C_10, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorAvx2_10));
+
+const IntraPredFunc<HighbdIntraPred> IntraPredTestVectorAvx2_12[] = {
+  highbd_entry(d45e, 16, 8, avx2, 12),  highbd_entry(d45e, 16, 16, avx2, 12),
+  highbd_entry(d45e, 16, 32, avx2, 12), highbd_entry(d45e, 32, 16, avx2, 12),
+  highbd_entry(d45e, 32, 32, avx2, 12),
+};
+INSTANTIATE_TEST_CASE_P(AVX2_TO_C_12, HighbdIntraPredTest,
+                        ::testing::ValuesIn(IntraPredTestVectorAvx2_12));
+#endif  // HAVE_AVX2
 #endif  // CONFIG_HIGHBITDEPTH
+
+// -----------------------------------------------------------------------------
+// Low Bit Depth Tests
+
+#define lowbd_entry(type, width, height, opt)                                  \
+  IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
+                           &aom_##type##_predictor_##width##x##height##_c,     \
+                           width, height, 8)
+
+#define lowbd_intrapred(type, opt)                                    \
+  lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt),         \
+      lowbd_entry(type, 8, 4, opt), lowbd_entry(type, 8, 8, opt),     \
+      lowbd_entry(type, 8, 16, opt), lowbd_entry(type, 16, 8, opt),   \
+      lowbd_entry(type, 16, 16, opt), lowbd_entry(type, 16, 32, opt), \
+      lowbd_entry(type, 32, 16, opt), lowbd_entry(type, 32, 32, opt)
+
+#if HAVE_SSE2
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVector[] = {
+  lowbd_intrapred(dc, sse2),      lowbd_intrapred(dc_top, sse2),
+  lowbd_intrapred(dc_left, sse2), lowbd_intrapred(dc_128, sse2),
+  lowbd_intrapred(v, sse2),       lowbd_intrapred(h, sse2),
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, LowbdIntraPredTest,
+                        ::testing::ValuesIn(LowbdIntraPredTestVector));
+
 #endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorAvx2[] = {
+  lowbd_entry(dc, 32, 32, avx2),      lowbd_entry(dc_top, 32, 32, avx2),
+  lowbd_entry(dc_left, 32, 32, avx2), lowbd_entry(dc_128, 32, 32, avx2),
+  lowbd_entry(v, 32, 32, avx2),       lowbd_entry(h, 32, 32, avx2),
+  lowbd_entry(dc, 32, 16, avx2),      lowbd_entry(dc_top, 32, 16, avx2),
+  lowbd_entry(dc_left, 32, 16, avx2), lowbd_entry(dc_128, 32, 16, avx2),
+  lowbd_entry(v, 32, 16, avx2),       lowbd_entry(paeth, 16, 8, avx2),
+  lowbd_entry(paeth, 16, 16, avx2),   lowbd_entry(paeth, 16, 32, avx2),
+  lowbd_entry(paeth, 32, 16, avx2),   lowbd_entry(paeth, 32, 32, avx2),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
+                        ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2));
+
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
+  lowbd_intrapred(paeth, ssse3), lowbd_intrapred(smooth, ssse3),
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
+                        ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3));
+
+#endif  // HAVE_SSSE3
+
 }  // namespace
diff --git a/third_party/aom/test/ivf_video_source.h b/third_party/aom/test/ivf_video_source.h
index 0d3e9f9cb..956c145ac 100644
--- a/third_party/aom/test/ivf_video_source.h
+++ b/third_party/aom/test/ivf_video_source.h
@@ -48,8 +48,8 @@ class IVFVideoSource : public CompressedVideoSource {
 
   virtual void Begin() {
     input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-                                     << file_name_;
+    ASSERT_TRUE(input_file_ != NULL)
+        << "Input file open failed. Filename: " << file_name_;
 
     // Read file header
     uint8_t file_hdr[kIvfFileHdrSize];
diff --git a/third_party/aom/test/lpf_8_test.cc b/third_party/aom/test/lpf_8_test.cc
index f050718bb..4859a8ee7 100644
--- a/third_party/aom/test/lpf_8_test.cc
+++ b/third_party/aom/test/lpf_8_test.cc
@@ -35,6 +35,8 @@ const int kNumCoeffs = 1024;
 
 const int number_of_iterations = 10000;
 
+const int kSpeedTestNum = 500000;
+
 #if CONFIG_HIGHBITDEPTH
 typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh, int bd);
@@ -242,6 +244,43 @@ TEST_P(Loop8Test6Param, ValueCheck) {
       << "First failed at test case " << first_failure;
 }
 
+TEST_P(Loop8Test6Param, DISABLED_Speed) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = kSpeedTestNum;
+#if CONFIG_HIGHBITDEPTH
+  const int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+
+  uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                 tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = rnd.Rand8();
+  DECLARE_ALIGNED(16, const uint8_t,
+                  thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+
+  int32_t p = kNumCoeffs / 32;
+  for (int j = 0; j < kNumCoeffs; ++j) {
+    s[j] = rnd.Rand16() & mask_;
+  }
+
+  for (int i = 0; i < count_test_block; ++i) {
+#if CONFIG_HIGHBITDEPTH
+    loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd);
+#else
+    loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh);
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+}
+
 TEST_P(Loop8Test9Param, OperationCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   const int count_test_block = number_of_iterations;
@@ -408,9 +447,59 @@ TEST_P(Loop8Test9Param, ValueCheck) {
       << "First failed at test case " << first_failure;
 }
 
+TEST_P(Loop8Test9Param, DISABLED_Speed) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = kSpeedTestNum;
+#if CONFIG_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+#else
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+#endif  // CONFIG_HIGHBITDEPTH
+
+  uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = rnd.Rand8();
+  DECLARE_ALIGNED(16, const uint8_t,
+                  thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+  DECLARE_ALIGNED(16, const uint8_t,
+                  limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                  tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  tmp = rnd.Rand8();
+  DECLARE_ALIGNED(16, const uint8_t,
+                  thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
+                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
+  int32_t p = kNumCoeffs / 32;  // TODO(pdlf) can we have non-square here?
+  for (int j = 0; j < kNumCoeffs; ++j) {
+    s[j] = rnd.Rand16() & mask_;
+  }
+
+  for (int i = 0; i < count_test_block; ++i) {
+#if CONFIG_HIGHBITDEPTH
+    const int32_t bd = bit_depth_;
+    loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
+                   thresh1, bd);
+#else
+    loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1,
+                   thresh1);
+#endif  // CONFIG_HIGHBITDEPTH
+  }
+}
+
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_SSE2
 #if CONFIG_HIGHBITDEPTH
 
 const loop8_param_t kHbdLoop8Test6[] = {
@@ -470,12 +559,38 @@ const loop8_param_t kLoop8Test6[] = {
   make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
   make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
   make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
+#if !CONFIG_PARALLEL_DEBLOCKING
   make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
+#endif
 };
 
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
                         ::testing::ValuesIn(kLoop8Test6));
 #endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+#if CONFIG_HIGHBITDEPTH
+
+const loop8_param_t kHbdLoop8Test6Avx2[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
+             &aom_highbd_lpf_horizontal_edge_16_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
+             &aom_highbd_lpf_horizontal_edge_16_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_edge_16_avx2,
+             &aom_highbd_lpf_horizontal_edge_16_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
+             &aom_highbd_lpf_vertical_16_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
+             &aom_highbd_lpf_vertical_16_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
+             &aom_highbd_lpf_vertical_16_dual_c, 12)
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test6Param,
+                        ::testing::ValuesIn(kHbdLoop8Test6Avx2));
+
+#endif
 #endif
 
 #if HAVE_AVX2 && (!CONFIG_HIGHBITDEPTH) && (!CONFIG_PARALLEL_DEBLOCKING)
@@ -487,7 +602,7 @@ INSTANTIATE_TEST_CASE_P(
                                  &aom_lpf_horizontal_edge_16_c, 8)));
 #endif
 
-#if HAVE_SSE2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_SSE2
 #if CONFIG_HIGHBITDEPTH
 const dualloop8_param_t kHbdLoop8Test9[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
@@ -519,6 +634,7 @@ const dualloop8_param_t kHbdLoop8Test9[] = {
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
                         ::testing::ValuesIn(kHbdLoop8Test9));
 #else
+#if !CONFIG_PARALLEL_DEBLOCKING
 const dualloop8_param_t kLoop8Test9[] = {
   make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
   make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
@@ -528,7 +644,42 @@ const dualloop8_param_t kLoop8Test9[] = {
 
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
                         ::testing::ValuesIn(kLoop8Test9));
+#endif
 #endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
+#if HAVE_AVX2
+#if CONFIG_HIGHBITDEPTH
+const dualloop8_param_t kHbdLoop8Test9Avx2[] = {
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,
+             &aom_highbd_lpf_horizontal_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_horizontal_8_dual_avx2,
+             &aom_highbd_lpf_horizontal_8_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_4_dual_avx2,
+             &aom_highbd_lpf_vertical_4_dual_c, 12),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 8),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 10),
+  make_tuple(&aom_highbd_lpf_vertical_8_dual_avx2,
+             &aom_highbd_lpf_vertical_8_dual_c, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param,
+                        ::testing::ValuesIn(kHbdLoop8Test9Avx2));
+#endif
 #endif
 
 #if HAVE_NEON && (!CONFIG_PARALLEL_DEBLOCKING)
diff --git a/third_party/aom/test/minmax_test.cc b/third_party/aom/test/minmax_test.cc
index f82529192..aaac72c65 100644
--- a/third_party/aom/test/minmax_test.cc
+++ b/third_party/aom/test/minmax_test.cc
@@ -108,10 +108,10 @@ TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
       int min_ref, max_ref, min, max;
       reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
       ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
-      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
-                              << " and b_stride = " << b_stride;
-      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
-                              << " and b_stride = " << b_stride;
+      EXPECT_EQ(max_ref, max)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min)
+          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
     }
   }
 }
diff --git a/third_party/aom/test/quantize_func_test.cc b/third_party/aom/test/quantize_func_test.cc
index 94dd056b4..2e4829021 100644
--- a/third_party/aom/test/quantize_func_test.cc
+++ b/third_party/aom/test/quantize_func_test.cc
@@ -157,8 +157,8 @@ class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
             << " Q: " << q << " coeff: " << coeff_ptr[j];
       }
 
-      ASSERT_EQ(eob[0], eob[1]) << "eobs mismatch on test: " << i
-                                << " Q: " << q;
+      ASSERT_EQ(eob[0], eob[1])
+          << "eobs mismatch on test: " << i << " Q: " << q;
     }
   }
 
diff --git a/third_party/aom/test/register_state_check.h b/third_party/aom/test/register_state_check.h
index 330820173..cce662a6d 100644
--- a/third_party/aom/test/register_state_check.h
+++ b/third_party/aom/test/register_state_check.h
@@ -49,7 +49,7 @@ namespace libaom_test {
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
-  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+  ~RegisterStateCheck() { Check(); }
 
  private:
   static bool StoreRegisters(CONTEXT *const context) {
@@ -62,10 +62,10 @@ class RegisterStateCheck {
   }
 
   // Compares the register state. Returns true if the states match.
-  bool Check() const {
-    if (!initialized_) return false;
+  void Check() const {
+    ASSERT_TRUE(initialized_);
     CONTEXT post_context;
-    if (!StoreRegisters(&post_context)) return false;
+    ASSERT_TRUE(StoreRegisters(&post_context));
 
     const M128A *xmm_pre = &pre_context_.Xmm6;
     const M128A *xmm_post = &post_context.Xmm6;
@@ -74,7 +74,6 @@ class RegisterStateCheck {
       ++xmm_pre;
       ++xmm_post;
     }
-    return !testing::Test::HasNonfatalFailure();
   }
 
   bool initialized_;
@@ -105,7 +104,7 @@ namespace libaom_test {
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
-  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+  ~RegisterStateCheck() { Check(); }
 
  private:
   static bool StoreRegisters(int64_t store[8]) {
@@ -114,15 +113,14 @@ class RegisterStateCheck {
   }
 
   // Compares the register state. Returns true if the states match.
-  bool Check() const {
-    if (!initialized_) return false;
+  void Check() const {
+    ASSERT_TRUE(initialized_);
     int64_t post_store[8];
     aom_push_neon(post_store);
     for (int i = 0; i < 8; ++i) {
-      EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8
-                                              << " has been modified";
+      EXPECT_EQ(pre_store_[i], post_store[i])
+          << "d" << i + 8 << " has been modified";
     }
-    return !testing::Test::HasNonfatalFailure();
   }
 
   bool initialized_;
@@ -159,12 +157,12 @@ class RegisterStateCheckMMX {
   RegisterStateCheckMMX() {
     __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
   }
-  ~RegisterStateCheckMMX() { EXPECT_TRUE(Check()); }
+  ~RegisterStateCheckMMX() { Check(); }
 
  private:
   // Checks the FPU tag word pre/post execution, returning false if not cleared
   // to 0xffff.
-  bool Check() const {
+  void Check() const {
     EXPECT_EQ(0xffff, pre_fpu_env_[4])
         << "FPU was in an inconsistent state prior to call";
 
@@ -172,7 +170,6 @@ class RegisterStateCheckMMX {
     __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
     EXPECT_EQ(0xffff, post_fpu_env[4])
         << "FPU was left in an inconsistent state after call";
-    return !testing::Test::HasNonfatalFailure();
   }
 
   uint16_t pre_fpu_env_[14];
diff --git a/third_party/aom/test/resize_test.cc b/third_party/aom/test/resize_test.cc
index 802713d32..c4e924de0 100644
--- a/third_party/aom/test/resize_test.cc
+++ b/third_party/aom/test/resize_test.cc
@@ -298,10 +298,10 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
     unsigned int expected_h;
     ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                         &expected_h, 0);
-    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
-                                   << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
-                                   << " had unexpected height";
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
   }
 }
 
@@ -351,11 +351,11 @@ class ResizeInternalTest : public ResizeTest {
         encoder->Config(&cfg_);
       }
     } else {
-      if (video->frame() == kStepDownFrame) {
+      if (video->frame() >= kStepDownFrame && video->frame() < kStepUpFrame) {
         struct aom_scaling_mode mode = { AOME_FOURFIVE, AOME_THREEFIVE };
         encoder->Control(AOME_SET_SCALEMODE, &mode);
       }
-      if (video->frame() == kStepUpFrame) {
+      if (video->frame() >= kStepUpFrame) {
         struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
         encoder->Control(AOME_SET_SCALEMODE, &mode);
       }
@@ -364,7 +364,7 @@ class ResizeInternalTest : public ResizeTest {
 
   virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
     if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0];
-    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.5);
   }
 
 #if WRITE_COMPRESSED_STREAM
@@ -406,6 +406,9 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
 
   for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
        info != frame_info_list_.end(); ++info) {
+  }
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
     const aom_codec_pts_t pts = info->pts;
     if (pts >= kStepDownFrame && pts < kStepUpFrame) {
       ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
@@ -509,10 +512,10 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
     unsigned int expected_h;
     ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                         &expected_h, 1);
-    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
-                                   << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
-                                   << " had unexpected height";
+    EXPECT_EQ(expected_w, info->w)
+        << "Frame " << frame << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h)
+        << "Frame " << frame << " had unexpected height";
     EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
   }
 }
@@ -520,7 +523,7 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
 // Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
 // Run at low bitrate, with resize_allowed = 1, and verify that we get
 // one resize down event.
-TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
+TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDown) {
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 299);
   DefaultConfig();
@@ -558,7 +561,7 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
 // Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
 // Start at low target bitrate, raise the bitrate in the middle of the clip,
 // scaling-up should occur after bitrate changed.
-TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+TEST_P(ResizeRealtimeTest, DISABLED_TestInternalResizeDownUpChangeBitRate) {
   ::libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 359);
   DefaultConfig();
@@ -693,7 +696,11 @@ class ResizingCspVideoSource : public ::libaom_test::DummyVideoSource {
   }
 };
 
+#if (defined(DISABLE_TRELLISQ_SEARCH) && DISABLE_TRELLISQ_SEARCH)
+TEST_P(ResizeCspTest, DISABLED_TestResizeCspWorks) {
+#else
 TEST_P(ResizeCspTest, TestResizeCspWorks) {
+#endif
   ResizingCspVideoSource video;
   init_flags_ = AOM_CODEC_USE_PSNR;
   cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
@@ -704,7 +711,7 @@ TEST_P(ResizeCspTest, TestResizeCspWorks) {
 AV1_INSTANTIATE_TEST_CASE(ResizeTest,
                           ::testing::Values(::libaom_test::kRealTime));
 AV1_INSTANTIATE_TEST_CASE(ResizeInternalTest,
-                          ::testing::Values(::libaom_test::kOnePassBest));
+                          ::testing::Values(::libaom_test::kOnePassGood));
 AV1_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
                           ::testing::Values(::libaom_test::kRealTime),
                           ::testing::Range(5, 9));
diff --git a/third_party/aom/test/scan_test.cc b/third_party/aom/test/scan_test.cc
index 16c831c8e..2b11bd1fb 100644
--- a/third_party/aom/test/scan_test.cc
+++ b/third_party/aom/test/scan_test.cc
@@ -43,6 +43,7 @@ TEST(ScanTest, av1_augment_prob) {
   }
 }
 
+#if USE_TOPOLOGICAL_SORT
 TEST(ScanTest, av1_update_sort_order) {
   const TX_SIZE tx_size = TX_4X4;
   const TX_TYPE tx_type = DCT_DCT;
@@ -54,7 +55,9 @@ TEST(ScanTest, av1_update_sort_order) {
   av1_update_sort_order(tx_size, tx_type, prob, sort_order);
   for (int i = 0; i < 16; ++i) EXPECT_EQ(ref_sort_order[i], sort_order[i]);
 }
+#endif
 
+#if USE_TOPOLOGICAL_SORT
 TEST(ScanTest, av1_update_scan_order) {
   TX_SIZE tx_size = TX_4X4;
   const TX_TYPE tx_type = DCT_DCT;
@@ -74,6 +77,7 @@ TEST(ScanTest, av1_update_scan_order) {
     EXPECT_EQ(i, scan[ref_iscan[i]]);
   }
 }
+#endif
 
 TEST(ScanTest, av1_update_neighbors) {
   TX_SIZE tx_size = TX_4X4;
@@ -94,4 +98,33 @@ TEST(ScanTest, av1_update_neighbors) {
   }
 }
 
+#if USE_2X2_PROB
+TEST(ScanTest, av1_down_sample_scan_count) {
+  const uint32_t non_zero_count[256] = {
+    13, 12, 11, 10, 0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 13, 9, 10, 8, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 11, 12, 9, 8, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  13, 9, 9, 10, 0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0, 0, 0, 0,  0, 0,  0, 0, 0,
+    0,  0,  0,  0,  0,  0, 0, 0,  0, 0, 0,  0,  0, 0,
+  };
+  const uint32_t ref_non_zero_count_ds[64] = {
+    13, 11, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,  0,  0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,  0,  0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  uint32_t non_zero_count_ds[64];
+  av1_down_sample_scan_count(non_zero_count_ds, non_zero_count, TX_16X16);
+  for (int i = 0; i < 64; ++i) {
+    EXPECT_EQ(ref_non_zero_count_ds[i], non_zero_count_ds[i]);
+  }
+}
+#endif
+
 }  // namespace
diff --git a/third_party/aom/test/selfguided_filter_test.cc b/third_party/aom/test/selfguided_filter_test.cc
index 736e3f4c0..55ce1d5de 100644
--- a/third_party/aom/test/selfguided_filter_test.cc
+++ b/third_party/aom/test/selfguided_filter_test.cc
@@ -40,18 +40,25 @@ class AV1SelfguidedFilterTest
 
  protected:
   void RunSpeedTest() {
-    const int w = 256, h = 256;
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    const int width = 256, height = 256, stride = 288, out_stride = 288;
     const int NUM_ITERS = 2000;
-    int i, j;
+    int i, j, k;
 
-    uint8_t *input = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t));
-    uint8_t *output = (uint8_t *)aom_memalign(16, w * h * sizeof(uint8_t));
+    uint8_t *input_ =
+        (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t));
+    uint8_t *output_ = (uint8_t *)aom_memalign(
+        16, out_stride * (height + 32) * sizeof(uint8_t));
     int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    uint8_t *input = input_ + stride * 16 + 16;
+    uint8_t *output = output_ + out_stride * 16 + 16;
 
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
-    for (i = 0; i < h; ++i)
-      for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & 0xFF;
+    for (i = -16; i < height + 16; ++i)
+      for (j = -16; j < width + 16; ++j)
+        input[i * stride + j] = rnd.Rand16() & 0xFF;
 
     int xqd[2] = {
       SGRPROJ_PRJ_MIN0 +
@@ -67,20 +74,30 @@ class AV1SelfguidedFilterTest
 
     std::clock_t start = std::clock();
     for (i = 0; i < NUM_ITERS; ++i) {
-      apply_selfguided_restoration(input, w, h, w, eps, xqd, output, w, tmpbuf);
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
+                                       output_p, out_stride, tmpbuf);
+        }
     }
     std::clock_t end = std::clock();
     double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
 
-    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h,
-           elapsed, elapsed * 1000000. / NUM_ITERS);
+    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
+           height, elapsed, elapsed * 1000000. / NUM_ITERS);
 
-    aom_free(input);
-    aom_free(output);
+    aom_free(input_);
+    aom_free(output_);
     aom_free(tmpbuf);
   }
 
   void RunCorrectnessTest() {
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     // Set the maximum width/height to test here. We actually test a small
     // range of sizes *up to* this size, so that we can check, eg.,
     // the behaviour on tiles which are not a multiple of 4 wide.
@@ -88,21 +105,26 @@ class AV1SelfguidedFilterTest
     const int NUM_ITERS = 81;
     int i, j, k;
 
-    uint8_t *input =
-        (uint8_t *)aom_memalign(16, stride * max_h * sizeof(uint8_t));
-    uint8_t *output =
-        (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t));
-    uint8_t *output2 =
-        (uint8_t *)aom_memalign(16, out_stride * max_h * sizeof(uint8_t));
+    uint8_t *input_ =
+        (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t));
+    uint8_t *output_ = (uint8_t *)aom_memalign(
+        16, out_stride * (max_h + 32) * sizeof(uint8_t));
+    uint8_t *output2_ = (uint8_t *)aom_memalign(
+        16, out_stride * (max_h + 32) * sizeof(uint8_t));
     int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
 
+    uint8_t *input = input_ + stride * 16 + 16;
+    uint8_t *output = output_ + out_stride * 16 + 16;
+    uint8_t *output2 = output2_ + out_stride * 16 + 16;
+
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
     av1_loop_restoration_precal();
 
     for (i = 0; i < NUM_ITERS; ++i) {
-      for (j = 0; j < max_h; ++j)
-        for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & 0xFF;
+      for (j = -16; j < max_h + 16; ++j)
+        for (k = -16; k < max_w + 16; ++k)
+          input[j * stride + k] = rnd.Rand16() & 0xFF;
 
       int xqd[2] = {
         SGRPROJ_PRJ_MIN0 +
@@ -116,18 +138,33 @@ class AV1SelfguidedFilterTest
       int test_w = max_w - (i / 9);
       int test_h = max_h - (i % 9);
 
+      for (k = 0; k < test_h; k += pu_height)
+        for (j = 0; j < test_w; j += pu_width) {
+          int w = AOMMIN(pu_width, test_w - j);
+          int h = AOMMIN(pu_height, test_h - k);
+          uint8_t *input_p = input + k * stride + j;
+          uint8_t *output_p = output + k * out_stride + j;
+          uint8_t *output2_p = output2 + k * out_stride + j;
+          apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
+                                       output_p, out_stride, tmpbuf);
+          apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
+                                         output2_p, out_stride, tmpbuf);
+        }
+      /*
       apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd,
                                    output, out_stride, tmpbuf);
       apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd,
                                      output2, out_stride, tmpbuf);
+                                     */
       for (j = 0; j < test_h; ++j)
-        for (k = 0; k < test_w; ++k)
+        for (k = 0; k < test_w; ++k) {
           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
+        }
     }
 
-    aom_free(input);
-    aom_free(output);
-    aom_free(output2);
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output2_);
     aom_free(tmpbuf);
   }
 };
@@ -135,9 +172,8 @@ class AV1SelfguidedFilterTest
 TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
 TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
 
-const FilterTestParam params[] = { make_tuple() };
-
 #if HAVE_SSE4_1
+const FilterTestParam params[] = { make_tuple() };
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
                         ::testing::ValuesIn(params));
 #endif
@@ -156,20 +192,27 @@ class AV1HighbdSelfguidedFilterTest
 
  protected:
   void RunSpeedTest() {
-    const int w = 256, h = 256;
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
+    const int width = 256, height = 256, stride = 288, out_stride = 288;
     const int NUM_ITERS = 2000;
-    int i, j;
+    int i, j, k;
     int bit_depth = GET_PARAM(0);
     int mask = (1 << bit_depth) - 1;
 
-    uint16_t *input = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t));
-    uint16_t *output = (uint16_t *)aom_memalign(16, w * h * sizeof(uint16_t));
+    uint16_t *input_ =
+        (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t));
+    uint16_t *output_ = (uint16_t *)aom_memalign(
+        16, out_stride * (height + 32) * sizeof(uint16_t));
     int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
+    uint16_t *input = input_ + stride * 16 + 16;
+    uint16_t *output = output_ + out_stride * 16 + 16;
 
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
-    for (i = 0; i < h; ++i)
-      for (j = 0; j < w; ++j) input[i * w + j] = rnd.Rand16() & mask;
+    for (i = -16; i < height + 16; ++i)
+      for (j = -16; j < width + 16; ++j)
+        input[i * stride + j] = rnd.Rand16() & mask;
 
     int xqd[2] = {
       SGRPROJ_PRJ_MIN0 +
@@ -185,21 +228,31 @@ class AV1HighbdSelfguidedFilterTest
 
     std::clock_t start = std::clock();
     for (i = 0; i < NUM_ITERS; ++i) {
-      apply_selfguided_restoration_highbd(input, w, h, w, bit_depth, eps, xqd,
-                                          output, w, tmpbuf);
+      for (k = 0; k < height; k += pu_height)
+        for (j = 0; j < width; j += pu_width) {
+          int w = AOMMIN(pu_width, width - j);
+          int h = AOMMIN(pu_height, height - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
+                                              eps, xqd, output_p, out_stride,
+                                              tmpbuf);
+        }
     }
     std::clock_t end = std::clock();
     double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
 
-    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, w, h,
-           elapsed, elapsed * 1000000. / NUM_ITERS);
+    printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
+           height, elapsed, elapsed * 1000000. / NUM_ITERS);
 
-    aom_free(input);
-    aom_free(output);
+    aom_free(input_);
+    aom_free(output_);
     aom_free(tmpbuf);
   }
 
   void RunCorrectnessTest() {
+    const int pu_width = RESTORATION_PROC_UNIT_SIZE;
+    const int pu_height = RESTORATION_PROC_UNIT_SIZE;
     // Set the maximum width/height to test here. We actually test a small
     // range of sizes *up to* this size, so that we can check, eg.,
     // the behaviour on tiles which are not a multiple of 4 wide.
@@ -209,21 +262,26 @@ class AV1HighbdSelfguidedFilterTest
     int bit_depth = GET_PARAM(0);
     int mask = (1 << bit_depth) - 1;
 
-    uint16_t *input =
-        (uint16_t *)aom_memalign(16, stride * max_h * sizeof(uint16_t));
-    uint16_t *output =
-        (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t));
-    uint16_t *output2 =
-        (uint16_t *)aom_memalign(16, out_stride * max_h * sizeof(uint16_t));
+    uint16_t *input_ =
+        (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t));
+    uint16_t *output_ = (uint16_t *)aom_memalign(
+        16, out_stride * (max_h + 32) * sizeof(uint16_t));
+    uint16_t *output2_ = (uint16_t *)aom_memalign(
+        16, out_stride * (max_h + 32) * sizeof(uint16_t));
     int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
 
+    uint16_t *input = input_ + stride * 16 + 16;
+    uint16_t *output = output_ + out_stride * 16 + 16;
+    uint16_t *output2 = output2_ + out_stride * 16 + 16;
+
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
     av1_loop_restoration_precal();
 
     for (i = 0; i < NUM_ITERS; ++i) {
-      for (j = 0; j < max_h; ++j)
-        for (k = 0; k < max_w; ++k) input[j * stride + k] = rnd.Rand16() & mask;
+      for (j = -16; j < max_h + 16; ++j)
+        for (k = -16; k < max_w + 16; ++k)
+          input[j * stride + k] = rnd.Rand16() & mask;
 
       int xqd[2] = {
         SGRPROJ_PRJ_MIN0 +
@@ -237,20 +295,37 @@ class AV1HighbdSelfguidedFilterTest
       int test_w = max_w - (i / 9);
       int test_h = max_h - (i % 9);
 
+      for (k = 0; k < test_h; k += pu_height)
+        for (j = 0; j < test_w; j += pu_width) {
+          int w = AOMMIN(pu_width, test_w - j);
+          int h = AOMMIN(pu_height, test_h - k);
+          uint16_t *input_p = input + k * stride + j;
+          uint16_t *output_p = output + k * out_stride + j;
+          uint16_t *output2_p = output2 + k * out_stride + j;
+          apply_selfguided_restoration_highbd(input_p, w, h, stride, bit_depth,
+                                              eps, xqd, output_p, out_stride,
+                                              tmpbuf);
+          apply_selfguided_restoration_highbd_c(input_p, w, h, stride,
+                                                bit_depth, eps, xqd, output2_p,
+                                                out_stride, tmpbuf);
+        }
+
+      /*
       apply_selfguided_restoration_highbd(input, test_w, test_h, stride,
                                           bit_depth, eps, xqd, output,
                                           out_stride, tmpbuf);
       apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride,
                                             bit_depth, eps, xqd, output2,
                                             out_stride, tmpbuf);
+                                            */
       for (j = 0; j < test_h; ++j)
         for (k = 0; k < test_w; ++k)
           ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
     }
 
-    aom_free(input);
-    aom_free(output);
-    aom_free(output2);
+    aom_free(input_);
+    aom_free(output_);
+    aom_free(output2_);
     aom_free(tmpbuf);
   }
 };
@@ -258,10 +333,9 @@ class AV1HighbdSelfguidedFilterTest
 TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
 TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
 
+#if HAVE_SSE4_1
 const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10),
                                                 make_tuple(12) };
-
-#if HAVE_SSE4_1
 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest,
                         ::testing::ValuesIn(highbd_params));
 #endif
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h
index 46f46d751..03fe703d9 100644
--- a/third_party/aom/test/simd_cmp_impl.h
+++ b/third_party/aom/test/simd_cmp_impl.h
@@ -371,10 +371,10 @@ typedef struct {
   fptr simd;
 } mapping;
 
-#define MAP(name)                                                      \
-  {                                                                    \
-    #name, reinterpret_cast < fptr > (c_##name),                       \
-                                      reinterpret_cast < fptr > (name) \
+#define MAP(name)                                                              \
+  {                                                                            \
+    #name,                                                                     \
+        reinterpret_cast < fptr > (c_##name), reinterpret_cast < fptr > (name) \
   }
 
 const mapping m[] = { MAP(v64_sad_u8),
diff --git a/third_party/aom/test/subtract_test.cc b/third_party/aom/test/subtract_test.cc
index ad39f56b3..725a6a2c6 100644
--- a/third_party/aom/test/subtract_test.cc
+++ b/third_party/aom/test/subtract_test.cc
@@ -130,7 +130,11 @@ class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
 
     rnd_.Reset(ACMRandom::DeterministicSeed());
 
+#if CONFIG_EXT_PARTITION
     const size_t max_width = 128;
+#else
+    const size_t max_width = 64;
+#endif
     const size_t max_block_size = max_width * max_width;
     src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
         aom_memalign(16, max_block_size * sizeof(uint16_t))));
@@ -147,8 +151,8 @@ class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
   }
 
  protected:
-  void RunForSpeed();
   void CheckResult();
+  void RunForSpeed();
 
  private:
   ACMRandom rnd_;
@@ -161,27 +165,13 @@ class AV1HBDSubtractBlockTest : public ::testing::TestWithParam<Params> {
   int16_t *diff_;
 };
 
-void AV1HBDSubtractBlockTest::RunForSpeed() {
-  const int test_num = 200000;
-  const int max_width = 128;
-  const int max_block_size = max_width * max_width;
-  const int mask = (1 << bit_depth_) - 1;
-  int i, j;
-
-  for (j = 0; j < max_block_size; ++j) {
-    CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
-    CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
-  }
-
-  for (i = 0; i < test_num; ++i) {
-    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
-          pred_, block_width_, bit_depth_);
-  }
-}
-
 void AV1HBDSubtractBlockTest::CheckResult() {
   const int test_num = 100;
-  const int max_width = 128;
+#if CONFIG_EXT_PARTITION
+  const size_t max_width = 128;
+#else
+  const size_t max_width = 64;
+#endif
   const int max_block_size = max_width * max_width;
   const int mask = (1 << bit_depth_) - 1;
   int i, j;
@@ -208,9 +198,29 @@ void AV1HBDSubtractBlockTest::CheckResult() {
 
 TEST_P(AV1HBDSubtractBlockTest, CheckResult) { CheckResult(); }
 
-#if USE_SPEED_TEST
-TEST_P(AV1HBDSubtractBlockTest, CheckSpeed) { RunForSpeed(); }
-#endif  // USE_SPEED_TEST
+void AV1HBDSubtractBlockTest::RunForSpeed() {
+  const int test_num = 200000;
+#if CONFIG_EXT_PARTITION
+  const size_t max_width = 128;
+#else
+  const size_t max_width = 64;
+#endif
+  const int max_block_size = max_width * max_width;
+  const int mask = (1 << bit_depth_) - 1;
+  int i, j;
+
+  for (j = 0; j < max_block_size; ++j) {
+    CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask;
+    CONVERT_TO_SHORTPTR(pred_)[j] = rnd_.Rand16() & mask;
+  }
+
+  for (i = 0; i < test_num; ++i) {
+    func_(block_height_, block_width_, diff_, block_width_, src_, block_width_,
+          pred_, block_width_, bit_depth_);
+  }
+}
+
+TEST_P(AV1HBDSubtractBlockTest, DISABLED_Speed) { RunForSpeed(); }
 
 #if HAVE_SSE2
 
@@ -241,12 +251,14 @@ const Params kAV1HBDSubtractBlock_sse2[] = {
   make_tuple(64, 32, 12, &aom_highbd_subtract_block_c),
   make_tuple(64, 64, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(64, 64, 12, &aom_highbd_subtract_block_c),
+#if CONFIG_EXT_PARTITION
   make_tuple(64, 128, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(64, 128, 12, &aom_highbd_subtract_block_c),
   make_tuple(128, 64, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(128, 64, 12, &aom_highbd_subtract_block_c),
   make_tuple(128, 128, 12, &aom_highbd_subtract_block_sse2),
   make_tuple(128, 128, 12, &aom_highbd_subtract_block_c)
+#endif  // CONFIG_EXT_PARTITION
 };
 
 INSTANTIATE_TEST_CASE_P(SSE2, AV1HBDSubtractBlockTest,
diff --git a/third_party/aom/test/test-data.mk b/third_party/aom/test/test-data.mk
index 083b34953..d82033e3b 100644
--- a/third_party/aom/test/test-data.mk
+++ b/third_party/aom/test/test-data.mk
@@ -40,6 +40,10 @@ LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += tacomasmallcameramovement_640_480_30.y
 LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += thaloundeskmtg_640_480_30.yuv
 endif  # CONFIG_ENCODE_PERF_TESTS
 
+ifeq ($(CONFIG_EXT_TILE),yes)
+LIBAOM_TEST_DATA-$(CONFIG_AV1_ENCODER) += vase10x10.yuv
+endif  # CONFIG_EXT_TILE
+
 # sort and remove duplicates
 LIBAOM_TEST_DATA-yes := $(sort $(LIBAOM_TEST_DATA-yes))
 
diff --git a/third_party/aom/test/test-data.sha1 b/third_party/aom/test/test-data.sha1
index 3d9bfc7c4..0caf21e1e 100644
--- a/third_party/aom/test/test-data.sha1
+++ b/third_party/aom/test/test-data.sha1
@@ -26,3 +26,4 @@ e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv
 717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m
 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
 5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
+36ddab9b99eb7545aa0bf362d6f498212d596516 *vase10x10.yuv
diff --git a/third_party/aom/test/test.cmake b/third_party/aom/test/test.cmake
index a02f9203f..26937c96a 100644
--- a/third_party/aom/test/test.cmake
+++ b/third_party/aom/test/test.cmake
@@ -103,6 +103,7 @@ set(AOM_UNIT_TEST_ENCODER_SOURCES
     "${AOM_ROOT}/test/encode_test_driver.h"
     "${AOM_ROOT}/test/error_resilience_test.cc"
     "${AOM_ROOT}/test/i420_video_source.h"
+    "${AOM_ROOT}/test/resize_test.cc"
     "${AOM_ROOT}/test/y4m_test.cc"
     "${AOM_ROOT}/test/y4m_video_source.h"
     "${AOM_ROOT}/test/yuv_video_source.h")
@@ -133,24 +134,35 @@ if (NOT BUILD_SHARED_LIBS)
         "${AOM_ROOT}/test/av1_txfm_test.h"
         "${AOM_ROOT}/test/intrapred_test.cc"
         "${AOM_ROOT}/test/lpf_8_test.cc"
-        "${AOM_ROOT}/test/motion_vector_test.cc"
         "${AOM_ROOT}/test/simd_cmp_impl.h")
 
-    if (CONFIG_CDEF)
-      set(AOM_UNIT_TEST_COMMON_SOURCES
-          ${AOM_UNIT_TEST_COMMON_SOURCES}
-          "${AOM_ROOT}/test/clpf_test.cc"
-          "${AOM_ROOT}/test/dering_test.cc")
-    endif ()
+    set(AOM_UNIT_TEST_ENCODER_SOURCES
+        ${AOM_UNIT_TEST_ENCODER_SOURCES}
+        "${AOM_ROOT}/test/motion_vector_test.cc")
 
-    if (CONFIG_FILTER_INTRA)
-      if (HAVE_SSE4_1)
+    if (CONFIG_CDEF)
+      if (CONFIG_CDEF_SINGLEPASS)
+        set(AOM_UNIT_TEST_COMMON_SOURCES
+            ${AOM_UNIT_TEST_COMMON_SOURCES}
+            "${AOM_ROOT}/test/cdef_test.cc")
+      else ()
         set(AOM_UNIT_TEST_COMMON_SOURCES
             ${AOM_UNIT_TEST_COMMON_SOURCES}
-            "${AOM_ROOT}/test/filterintra_predictors_test.cc")
+            "${AOM_ROOT}/test/clpf_test.cc"
+            "${AOM_ROOT}/test/dering_test.cc")
       endif ()
     endif ()
 
+    # Omit 4-tap filter intra predictor test-- currently a 3-tap filter is in
+    # use.
+    #if (CONFIG_FILTER_INTRA)
+    #  if (HAVE_SSE4_1)
+    #    set(AOM_UNIT_TEST_COMMON_SOURCES
+    #        ${AOM_UNIT_TEST_COMMON_SOURCES}
+    #        "${AOM_ROOT}/test/filterintra_predictors_test.cc")
+    #  endif ()
+    #endif ()
+
     if (CONFIG_INTRABC)
         set(AOM_UNIT_TEST_COMMON_SOURCES
             ${AOM_UNIT_TEST_COMMON_SOURCES}
@@ -160,10 +172,15 @@ if (NOT BUILD_SHARED_LIBS)
     if (CONFIG_LOOP_RESTORATION)
       set(AOM_UNIT_TEST_COMMON_SOURCES
           ${AOM_UNIT_TEST_COMMON_SOURCES}
-           "${AOM_ROOT}/test/hiprec_convolve_test.cc"
+          "${AOM_ROOT}/test/selfguided_filter_test.cc")
+
+      if (HAVE_SSE2)
+        set(AOM_UNIT_TEST_COMMON_SOURCES
+            ${AOM_UNIT_TEST_COMMON_SOURCES}
+            "${AOM_ROOT}/test/hiprec_convolve_test.cc"
             "${AOM_ROOT}/test/hiprec_convolve_test_util.cc"
-            "${AOM_ROOT}/test/hiprec_convolve_test_util.h"
-            "${AOM_ROOT}/test/selfguided_filter_test.cc")
+            "${AOM_ROOT}/test/hiprec_convolve_test_util.h")
+      endif ()
     endif ()
 
     set(AOM_UNIT_TEST_COMMON_INTRIN_NEON
@@ -202,11 +219,12 @@ if (CONFIG_AV1_ENCODER)
         "${AOM_ROOT}/test/av1_fht16x16_test.cc"
         "${AOM_ROOT}/test/av1_fht32x32_test.cc"
         "${AOM_ROOT}/test/av1_fht8x8_test.cc"
-        "${AOM_ROOT}/test/av1_inv_txfm_test.cc"
         "${AOM_ROOT}/test/av1_fwd_txfm1d_test.cc"
         "${AOM_ROOT}/test/av1_fwd_txfm2d_test.cc"
         "${AOM_ROOT}/test/av1_inv_txfm1d_test.cc"
         "${AOM_ROOT}/test/av1_inv_txfm2d_test.cc"
+        "${AOM_ROOT}/test/av1_inv_txfm_test.cc"
+        "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
         "${AOM_ROOT}/test/avg_test.cc"
         "${AOM_ROOT}/test/blend_a64_mask_1d_test.cc"
         "${AOM_ROOT}/test/blend_a64_mask_test.cc"
@@ -214,27 +232,37 @@ if (CONFIG_AV1_ENCODER)
         "${AOM_ROOT}/test/fdct4x4_test.cc"
         "${AOM_ROOT}/test/fdct8x8_test.cc"
         "${AOM_ROOT}/test/hadamard_test.cc"
+        "${AOM_ROOT}/test/masked_sad_test.cc"
+        "${AOM_ROOT}/test/masked_variance_test.cc"
         "${AOM_ROOT}/test/minmax_test.cc"
-        "${AOM_ROOT}/test/quantize_func_test.cc"
         "${AOM_ROOT}/test/subtract_test.cc"
         "${AOM_ROOT}/test/sum_squares_test.cc"
         "${AOM_ROOT}/test/variance_test.cc")
 
-    if (CONFIG_CONVOLVE_ROUND)
+    if (NOT CONFIG_AOM_QM AND NOT CONFIG_NEW_QUANT)
       set(AOM_UNIT_TEST_ENCODER_SOURCES
           ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
-          "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
-          "${AOM_ROOT}/test/av1_convolve_2d_test_util.h"
-          "${AOM_ROOT}/test/convolve_round_test.cc")
-      endif ()
+          "${AOM_ROOT}/test/quantize_func_test.cc")
+    endif ()
 
-    if (CONFIG_EXT_INTER)
+    if (CONFIG_CONVOLVE_ROUND)
       set(AOM_UNIT_TEST_ENCODER_SOURCES
           ${AOM_UNIT_TEST_ENCODER_SOURCES}
-          "${AOM_ROOT}/test/av1_wedge_utils_test.cc"
-          "${AOM_ROOT}/test/masked_sad_test.cc"
-          "${AOM_ROOT}/test/masked_variance_test.cc")
+          "${AOM_ROOT}/test/convolve_round_test.cc")
+      if (HAVE_SSE2)
+        set(AOM_UNIT_TEST_ENCODER_SOURCES
+            ${AOM_UNIT_TEST_ENCODER_SOURCES}
+            "${AOM_ROOT}/test/av1_convolve_2d_test.cc"
+            "${AOM_ROOT}/test/av1_convolve_2d_test_util.cc"
+            "${AOM_ROOT}/test/av1_convolve_2d_test_util.h")
+      endif ()
+      if (NOT CONFIG_COMPOUND_ROUND)
+        if (HAVE_SSE4_1)
+          set(AOM_UNIT_TEST_ENCODER_SOURCES
+              ${AOM_UNIT_TEST_ENCODER_SOURCES}
+              "${AOM_ROOT}/test/av1_convolve_scale_test.cc")
+        endif ()
+      endif ()
     endif ()
 
     if (CONFIG_EXT_TX)
@@ -274,9 +302,9 @@ if (NOT BUILD_SHARED_LIBS)
   if (CONFIG_AV1_DECODER AND CONFIG_AV1_ENCODER)
     set(AOM_UNIT_TEST_COMMON_SOURCES
         ${AOM_UNIT_TEST_COMMON_SOURCES}
-        "${AOM_ROOT}/test/binary_codes_test.cc"
         "${AOM_ROOT}/test/divu_small_test.cc"
         "${AOM_ROOT}/test/ethread_test.cc"
+        "${AOM_ROOT}/test/coding_path_sync.cc"
         "${AOM_ROOT}/test/idct8x8_test.cc"
         "${AOM_ROOT}/test/partial_idct_test.cc"
         "${AOM_ROOT}/test/superframe_test.cc"
@@ -290,6 +318,7 @@ if (NOT BUILD_SHARED_LIBS)
     else ()
       set(AOM_UNIT_TEST_COMMON_SOURCES
           ${AOM_UNIT_TEST_COMMON_SOURCES}
+          "${AOM_ROOT}/test/binary_codes_test.cc"
           "${AOM_ROOT}/test/boolcoder_test.cc")
     endif ()
 
@@ -327,22 +356,25 @@ if (CONFIG_UNIT_TESTS)
     # Force static run time to avoid collisions with googletest.
     include("${AOM_ROOT}/build/cmake/msvc_runtime.cmake")
   endif ()
-  include_directories(
-    "${AOM_ROOT}/third_party/googletest/src/googletest/src"
-    "${AOM_ROOT}/third_party/googletest/src/googletest/include")
 
   if (BUILD_SHARED_LIBS AND APPLE)
     # Silence an RPATH warning.
     set(CMAKE_MACOSX_RPATH 1)
   endif ()
-  add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest"
-                   EXCLUDE_FROM_ALL)
-
-  # Generate a stub file containing the C function usage_exit(); this is
-  # required because of the test dependency on aom_common_app_util.
-  # Specifically, the function die() in tools_common.c calls usage_exit() to
-  # terminate the program on the caller's behalf.
-  file(WRITE "${AOM_CONFIG_DIR}/usage_exit.c" "void usage_exit(void) {}")
+
+  include_directories(
+    "${AOM_ROOT}/third_party/googletest/src/googletest/src"
+    "${AOM_ROOT}/third_party/googletest/src/googletest/include")
+
+  if (AOM_DISABLE_GTEST_CMAKE)
+    include_directories("${AOM_ROOT}/third_party/googletest/src/googletest")
+    add_library(gtest STATIC
+      "${AOM_ROOT}/third_party/googletest/src/googletest/src/gtest-all.cc")
+  else ()
+    add_subdirectory("${AOM_ROOT}/third_party/googletest/src/googletest"
+                     EXCLUDE_FROM_ALL)
+  endif ()
+
 endif ()
 
 # Setup the targets for CONFIG_UNIT_TESTS. The libaom and app util targets must
@@ -364,6 +396,7 @@ function (setup_aom_test_targets)
   add_executable(test_libaom ${AOM_UNIT_TEST_WRAPPER_SOURCES}
                  $<TARGET_OBJECTS:aom_common_app_util>
                  $<TARGET_OBJECTS:test_aom_common>)
+  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} test_libaom)
 
   if (CONFIG_AV1_DECODER)
     target_sources(test_libaom PRIVATE
@@ -390,6 +423,7 @@ function (setup_aom_test_targets)
                      $<TARGET_OBJECTS:aom_common_app_util>)
       target_link_libraries(test_intra_pred_speed ${AOM_LIB_LINK_TYPE}
                             aom gtest)
+      set(AOM_APP_TARGETS ${AOM_APP_TARGETS} test_intra_pred_speed)
     endif ()
   endif ()
 
@@ -483,6 +517,8 @@ function (setup_aom_test_targets)
   endforeach ()
   add_custom_target(runtests)
   add_dependencies(runtests ${test_targets})
+
+  set(AOM_APP_TARGETS ${AOM_APP_TARGETS} PARENT_SCOPE)
 endfunction ()
 
 endif ()  # AOM_TEST_TEST_CMAKE_
diff --git a/third_party/aom/test/test.mk b/third_party/aom/test/test.mk
index 4132e4f74..e6b0c534c 100644
--- a/third_party/aom/test/test.mk
+++ b/third_party/aom/test/test.mk
@@ -33,11 +33,10 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += altref_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += aq_segment_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += datarate_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += encode_api_test.cc
-LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += coding_path_sync.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += error_resilience_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += i420_video_source.h
 #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += realtime_test.cc
-#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += resize_test.cc
+LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += resize_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += y4m_video_source.h
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER)    += yuv_video_source.h
 
@@ -107,6 +106,7 @@ ifeq ($(CONFIG_AV1),yes)
 # These tests require both the encoder and decoder to be built.
 ifeq ($(CONFIG_AV1_ENCODER)$(CONFIG_AV1_DECODER),yesyes)
 # IDCT test currently depends on FDCT function
+LIBAOM_TEST_SRCS-yes                   += coding_path_sync.cc
 LIBAOM_TEST_SRCS-yes                   += idct8x8_test.cc
 LIBAOM_TEST_SRCS-yes                   += partial_idct_test.cc
 LIBAOM_TEST_SRCS-yes                   += superframe_test.cc
@@ -135,8 +135,12 @@ endif
 LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
 LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += lpf_8_test.cc
+ifeq ($(CONFIG_CDEF_SINGLEPASS),yes)
+LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += cdef_test.cc
+else
 LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += dering_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_CDEF)        += clpf_test.cc
+endif
 LIBAOM_TEST_SRCS-yes                   += simd_cmp_impl.h
 LIBAOM_TEST_SRCS-$(HAVE_SSE2)          += simd_cmp_sse2.cc
 LIBAOM_TEST_SRCS-$(HAVE_SSSE3)         += simd_cmp_ssse3.cc
@@ -163,11 +167,9 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc
 #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
-ifneq ($(CONFIG_AOM_QM), yes)
 ifneq ($(CONFIG_NEW_QUANT), yes)
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
 endif
-endif
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc
 
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
@@ -193,11 +195,9 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += blend_a64_mask_1d_test.cc
 
-ifeq ($(CONFIG_EXT_INTER),yes)
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_variance_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += masked_sad_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc
-endif
 
 ## Skip the unit test written for 4-tap filter intra predictor, because we
 ## revert to 3-tap filter.
@@ -252,6 +252,10 @@ LIBAOM_TEST_SRCS-$(HAVE_SSE2) += av1_convolve_2d_test_util.cc
 LIBAOM_TEST_SRCS-yes          += convolve_round_test.cc
 endif
 
+ifeq (yesx,$(CONFIG_CONVOLVE_ROUND)x$(CONFIG_COMPOUND_ROUND))
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += av1_convolve_scale_test.cc
+endif
+
 ifeq ($(CONFIG_GLOBAL_MOTION)$(CONFIG_AV1_ENCODER),yesyes)
 LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += corner_match_test.cc
 endif
diff --git a/third_party/aom/test/test_data_util.cmake b/third_party/aom/test/test_data_util.cmake
index e4641049d..3904734b5 100644
--- a/third_party/aom/test/test_data_util.cmake
+++ b/third_party/aom/test/test_data_util.cmake
@@ -9,6 +9,47 @@
 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ##
 
+set(AOM_TEST_DATA_FILE_NAMES
+    "hantro_collage_w352h288.yuv"
+    "hantro_odd.yuv"
+    "park_joy_90p_10_420.y4m"
+    "park_joy_90p_10_422.y4m"
+    "park_joy_90p_10_444.y4m"
+    "park_joy_90p_10_440.yuv"
+    "park_joy_90p_12_420.y4m"
+    "park_joy_90p_12_422.y4m"
+    "park_joy_90p_12_444.y4m"
+    "park_joy_90p_12_440.yuv"
+    "park_joy_90p_8_420_a10-1.y4m"
+    "park_joy_90p_8_420.y4m"
+    "park_joy_90p_8_422.y4m"
+    "park_joy_90p_8_444.y4m"
+    "park_joy_90p_8_440.yuv"
+    "desktop_credits.y4m"
+    "niklas_1280_720_30.y4m"
+    "rush_hour_444.y4m"
+    "screendata.y4m"
+    "niklas_640_480_30.yuv")
+
+if (CONFIG_DECODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  set(AOM_TEST_DATA_FILE_NAMES
+      ${AOM_TEST_DATA_FILE_NAMES}
+      "niklas_1280_720_30.yuv")
+endif ()
+
+if (CONFIG_ENCODE_PERF_TESTS AND CONFIG_AV1_ENCODER)
+  set(AOM_TEST_DATA_FILE_NAMES
+      ${AOM_TEST_DATA_FILE_NAMES}
+      "desktop_640_360_30.yuv"
+      "kirland_640_480_30.yuv"
+      "macmarcomoving_640_480_30.yuv"
+      "macmarcostationary_640_480_30.yuv"
+      "niklas_1280_720_30.yuv"
+      "tacomanarrows_640_480_30.yuv"
+      "tacomasmallcameramovement_640_480_30.yuv"
+      "thaloundeskmtg_640_480_30.yuv")
+endif ()
+
 # Parses test/test-data.sha1 and writes captured file names and checksums to
 # $out_files and $out_checksums as lists.
 function (make_test_data_lists test_data_file out_files out_checksums)
@@ -28,8 +69,12 @@ function (make_test_data_lists test_data_file out_files out_checksums)
     string(SUBSTRING "${line}" 0 ${delim_pos} checksum)
     string(SUBSTRING "${line}" ${filename_pos} -1 filename)
 
-    set(checksums ${checksums} ${checksum})
-    set(filenames ${filenames} ${filename})
+    list(FIND AOM_TEST_DATA_FILE_NAMES ${filename} list_index)
+    if (NOT ${list_index} EQUAL -1)
+      # Include the name and checksum in output only when the file is needed.
+      set(checksums ${checksums} ${checksum})
+      set(filenames ${filenames} ${filename})
+    endif ()
   endforeach ()
 
   list(LENGTH filenames num_files)
diff --git a/third_party/aom/test/test_intra_pred_speed.cc b/third_party/aom/test/test_intra_pred_speed.cc
index 70d82484c..25289446f 100644
--- a/third_party/aom/test/test_intra_pred_speed.cc
+++ b/third_party/aom/test/test_intra_pred_speed.cc
@@ -31,199 +31,356 @@ namespace {
 typedef void (*AvxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
                             const uint8_t *above, const uint8_t *left);
 
+const int kBPS = 32;
+const int kTotalPixels = kBPS * kBPS;
 const int kNumAv1IntraFuncs = INTRA_MODES + 3;  // 4 DC predictor variants.
 const char *kAv1IntraPredNames[kNumAv1IntraFuncs] = {
   "DC_PRED",       "DC_LEFT_PRED",  "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
   "H_PRED",        "D45_PRED",      "D135_PRED",   "D117_PRED",   "D153_PRED",
-  "D207_PRED",     "D63_PRED",      "TM_PRED",
-#if CONFIG_ALT_INTRA
-  "SMOOTH_PRED",
+  "D207_PRED",     "D63_PRED",      "TM_PRED",     "SMOOTH_PRED",
 #if CONFIG_SMOOTH_HV
   "SMOOTH_V_PRED", "SMOOTH_H_PRED",
 #endif  // CONFIG_SMOOTH_HV
-#endif  // CONFIG_ALT_INTRA
 };
 
+template <typename Pixel>
+struct IntraPredTestMem {
+  void Init(int block_width, int bd) {
+    libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
+    Pixel *const above = above_mem + 16;
+    const int mask = (1 << bd) - 1;
+    for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand16() & mask;
+    for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand16() & mask;
+    for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand16() & mask;
+
+    ASSERT_LE(block_width, kBPS);
+    for (int i = kBPS; i < 2 * kBPS; ++i) {
+      left[i] = rnd.Rand16() & mask;
+      above[i] = rnd.Rand16() & mask;
+    }
+  }
+
+  DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, left[2 * kBPS]);
+  DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
+};
+
+// -----------------------------------------------------------------------------
+// Low Bittdepth
+
+typedef IntraPredTestMem<uint8_t> Av1IntraPredTestMem;
+
+// Note:
+// APPLY_UNIT_TESTS
+// 1: Do unit tests
+// 0: Generate MD5 array as required
+#define APPLY_UNIT_TESTS 1
+
+void CheckMd5Signature(const char name[], const char *const signatures[],
+                       const void *data, size_t data_size, int elapsed_time,
+                       int idx) {
+  libaom_test::MD5 md5;
+  md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
+#if APPLY_UNIT_TESTS
+  printf("Mode %s[%13s]: %5d ms     MD5: %s\n", name, kAv1IntraPredNames[idx],
+         elapsed_time, md5.Get());
+  EXPECT_STREQ(signatures[idx], md5.Get());
+#else
+  printf("\"%s\",\n", md5.Get());
+#endif
+}
+
 void TestIntraPred(const char name[], AvxPredFunc const *pred_funcs,
-                   const char *const pred_func_names[], int num_funcs,
-                   const char *const signatures[], int /*block_size*/,
-                   int num_pixels_per_test) {
-  libaom_test::ACMRandom rnd(libaom_test::ACMRandom::DeterministicSeed());
-  const int kBPS = 32;
-  const int kTotalPixels = 32 * kBPS;
-  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
-  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
-  DECLARE_ALIGNED(16, uint8_t, left[2 * kBPS]);
-  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
-  uint8_t *const above = above_mem + 16;
-  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
-  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
-  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
+                   const char *const signatures[], int block_width,
+                   int block_height) {
+  const int num_pixels_per_test =
+      block_width * block_height * kNumAv1IntraFuncs;
   const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+  Av1IntraPredTestMem intra_pred_test_mem;
+  const uint8_t *const above = intra_pred_test_mem.above_mem + 16;
 
-  // Fill up bottom-left and top-right pixels.
-  for (int i = kBPS; i < 2 * kBPS; ++i) {
-    left[i] = rnd.Rand8();
-    above[i] = rnd.Rand8();
-  }
+  intra_pred_test_mem.Init(block_width, 8);
 
-  for (int k = 0; k < num_funcs; ++k) {
+  for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
     if (pred_funcs[k] == NULL) continue;
-    memcpy(src, ref_src, sizeof(src));
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
     aom_usec_timer timer;
     aom_usec_timer_start(&timer);
     for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](src, kBPS, above, left);
+      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
+                    intra_pred_test_mem.left);
     }
     libaom_test::ClearSystemState();
     aom_usec_timer_mark(&timer);
     const int elapsed_time =
         static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
-    libaom_test::MD5 md5;
-    md5.Add(src, sizeof(src));
-    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
-           elapsed_time, md5.Get());
-    EXPECT_STREQ(signatures[k], md5.Get());
+    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
+                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
   }
 }
 
-void TestIntraPred4(AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumAv1IntraFuncs] = {
-    "4334156168b34ab599d9b5b30f522fe9",
-    "bc4649d5ba47c7ff178d92e475960fb0",
-    "8d316e5933326dcac24e1064794b5d12",
-    "a27270fed024eafd762c95de85f4da51",
-    "c33dff000d4256c2b8f3bf9e9bab14d2",
-    "44d8cddc2ad8f79b8ed3306051722b4f",
-    "df62e96dfcb25d8a435482756a6fa990",
-    "ecb0d56ae5f677ea45127ce9d5c058e4",
-    "0b7936841f6813da818275944895b574",
-    "9117972ef64f91a58ff73e1731c81db2",
-    "46d493dccf6e5356c6f3c0c73b7dd141",
-    "b852f42e6c4991d415400332d567872f",
-#if CONFIG_ALT_INTRA
-    "828c49a4248993cce4876fa26eab697f",
-    "718c8cee9011f92ef31f77a9a7560010",
-#if CONFIG_SMOOTH_HV
-    "b37eeadbbd9e3bdff023a5097b59213a",
-    "d6fb9c659d82c78f0d0c891da6cba87f",
+void TestIntraPred4(const char *block_name, AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures4x4[kNumAv1IntraFuncs] = {
+    "e7ed7353c3383fff942e500e9bfe82fe",
+    "2a4a26fcc6ce005eadc08354d196c8a9",
+    "269d92eff86f315d9c38fe7640d85b15",
+    "ae2960eea9f71ee3dabe08b282ec1773",
+    "6c1abcc44e90148998b51acd11144e9c",
+    "f7bb3186e1ef8a2b326037ff898cad8e",
+    "87e72798518d62e84bcc77dcb17d0f3b",
+    "141624072a4a56773f68fadbdd07c4a7",
+    "7be49b08687a5f24df3a2c612fca3876",
+    "459bb5d9fd5b238348179c9a22108cd6",
+    "3d98810f418a9de92acfe2c68909c61c",
+    "6310eecda3cc9496987ca10186255558",
+    "59fc0e923a08cfac0a493fb38988e2bb",
+    "9ff8bb37d9c830e6ab8ecb0c435d3c91",
+#if CONFIG_SMOOTH_HV
+    "de6937fca02354f2874dbc5dbec5d5b3",
+    "723cf948137f7d8c7860d814e55ae67d",
 #endif  // CONFIG_SMOOTH_HV
-#else
-    "309a618577b27c648f9c5ee45252bc8f",
-#endif  // CONFIG_ALT_INTRA
   };
-  TestIntraPred("Intra4", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
-                kSignatures, 4, 4 * 4 * kNumAv1IntraFuncs);
+  static const char *const kSignatures4x8[kNumAv1IntraFuncs] = {
+    "d9fbebdc85f71ab1e18461b2db4a2adc",
+    "5ccb2a68284bc9714d94b8a06ccadbb2",
+    "735d059abc2744f3ff3f9590f7191b37",
+    "d9fbebdc85f71ab1e18461b2db4a2adc",
+    "6819497c44cd0ace120add83672996ee",
+    "7e3244f5a2d3edf81c7e962a842b97f9",
+    "3fa52ee9acf5a25594cac684be263f32",
+    "c18dd23d57def4df4c6147c572dfc827",
+    "d007fbf7e43cb8f49702daa20f0c9153",
+    "5c0226c44c5df285728296b80cc6de4b",
+    "b55d7b558bebc8c2042dfac58b3c4688",
+    "6549362baa389b8faa2d954926b64e2f",
+    "809350f164cd4d1650850bb0f59c3260",
+    "1b60a394331eeab6927a6f8aaff57040",
+#if CONFIG_SMOOTH_HV
+    "5307de1bd7329ba6b281d2c1b0b457f9",
+    "24c58a8138339846d95568efb91751db",
+#endif
+  };
+  if (!strcmp(block_name, "intra4x4")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures4x4, 4, 4);
+  }
+  if (!strcmp(block_name, "intra4x8")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures4x8, 4, 8);
+  }
 }
 
-void TestIntraPred8(AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumAv1IntraFuncs] = {
-    "7694ddeeefed887faf9d339d18850928",
-    "7d726b1213591b99f736be6dec65065b",
-    "19c5711281357a485591aaf9c96c0a67",
-    "ba6b66877a089e71cd938e3b8c40caac",
-    "802440c93317e0f8ba93fab02ef74265",
-    "9e09a47a15deb0b9d8372824f9805080",
-    "a2fd4b66e1a667a3e582588934a7e4bd",
-    "78339c1c60bb1d67d248ab8c4da08b7f",
-    "5c97d70f7d47de1882a6cd86c165c8a9",
-    "8182bf60688b42205acd95e59e967157",
-    "9d69fcaf12398e67242d3fcf5cf2267e",
-    "7a09adb0fa6c2bf889a99dd816622feb",
-#if CONFIG_ALT_INTRA
-    "f6ade499c626d38eb70661184b79bc57",
-    "1ad5b106c79b792e514ba25e87139b5e",
-#if CONFIG_SMOOTH_HV
-    "fe0d359b91a1d8141483d2e032f1b75f",
-    "0cfd7603ced02829d1ce18b6795d73d0",
+void TestIntraPred8(const char *block_name, AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures8x8[kNumAv1IntraFuncs] = {
+    "d8bbae5d6547cfc17e4f5f44c8730e88",
+    "373bab6d931868d41a601d9d88ce9ac3",
+    "6fdd5ff4ff79656c14747598ca9e3706",
+    "d9661c2811d6a73674f40ffb2b841847",
+    "7c722d10b19ccff0b8c171868e747385",
+    "f81dd986eb2b50f750d3a7da716b7e27",
+    "e0b1292448f3350bf1c92ca283ca872a",
+    "0e3523f9cab2142dd37fd07ec0760bce",
+    "79ac4efe907f0a0f1885d43066cfedee",
+    "19ecf2432ac305057de3b6578474eec6",
+    "7ae38292cbe47b4aa0807c3bd5a543df",
+    "d0ecffec1bb01f4b61ab5738164695c4",
+    "064404361748dd111a890a1470d7f0ea",
+    "dc29b7e1f78cc8e7525d5ea4c0ab9b78",
+#if CONFIG_SMOOTH_HV
+    "97111eb1bc26bade6272015df829f1ae",
+    "d19a8a73cc46b807f2c5e817576cc1e1",
 #endif  // CONFIG_SMOOTH_HV
-#else
-    "815b75c8e0d91cc1ae766dc5d3e445a3",
-#endif  // CONFIG_ALT_INTRA
   };
-  TestIntraPred("Intra8", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
-                kSignatures, 8, 8 * 8 * kNumAv1IntraFuncs);
+  static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
+    "23f9fc11344426c9bee2e06d57dfd628",
+    "2d71a26d1bae1fb34734de7b42fc5eb7",
+    "5af9c1b2fd9d5721fad67b67b3f7c816",
+    "00d71b17be662753813d515f197d145e",
+    "bef10ec984427e28f4390f43809d10af",
+    "77773cdfb7ed6bc882ab202a64b0a470",
+    "cba356970f6b9a1b6024e1dbe4a66f9b",
+    "c58c21efc804242848e6f29a93a7984d",
+    "dc92cc45a51c7a397506cab19f74e66d",
+    "391f6a12224f81a3719ea09a2cf7a5ad",
+    "b74b8b11f7eb2bbf723b25f381104ca9",
+    "2234aaa06ca245624211cf53a0261017",
+    "2cc48bd66d6b0121b5221d52ccd732af",
+    "b302155e1c9eeeafe2ba2bf68e807a46",
+#if CONFIG_SMOOTH_HV
+    "561bc8d0e76d5041ebd5168fc6a115e1",
+    "81d0113fb1d0a9a24ffd6f1987b77948",
+#endif
+  };
+  static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
+    "c849de88b24f773dfcdd1d48d1209796",
+    "6cb807c1897b94866a0f3d3c56ed8695",
+    "d56db05a8ac7981762f5b877f486c4ef",
+    "b4bc01eb6e59a40922ad17715cafb04b",
+    "09d178439534f4062ae687c351f66d64",
+    "644501399cf73080ac606e5cef7ca09b",
+    "0e8e968fa177204d7e73d7e04ce69ebb",
+    "1d25f9287fdf7ba48a5105f1529b7e75",
+    "02cacccf3752451763a6a6e2e784494f",
+    "6044a1416d53e324ddc012d2e7763339",
+    "57ac6e8f3ab5e943c9280043eeb174b8",
+    "d51b9d65471194d9caebc7d67e75ef10",
+    "278076495180e17c065a95ab7278539a",
+    "9dd7f324816f242be408ffeb0c673732",
+#if CONFIG_SMOOTH_HV
+    "f520c4a20acfa0bea1d253c6f0f040fd",
+    "85f38df809df2c2d7c8b4a157a65cd44",
+#endif
+  };
+  if (!strcmp(block_name, "intra8x8")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures8x8, 8, 8);
+  }
+  if (!strcmp(block_name, "intra8x4")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures8x4, 8, 4);
+  }
+  if (!strcmp(block_name, "intra8x16")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures8x16, 8, 16);
+  }
 }
 
-void TestIntraPred16(AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumAv1IntraFuncs] = {
-    "b40dbb555d5d16a043dc361e6694fe53",
-    "fb08118cee3b6405d64c1fd68be878c6",
-    "6c190f341475c837cc38c2e566b64875",
-    "db5c34ccbe2c7f595d9b08b0dc2c698c",
-    "a62cbfd153a1f0b9fed13e62b8408a7a",
-    "143df5b4c89335e281103f610f5052e4",
-    "404944b521d16f6edd160feeeb31ff35",
-    "7841fae7d4d47b519322e6a03eeed9dc",
-    "f6ebed3f71cbcf8d6d0516ce87e11093",
-    "3cc480297dbfeed01a1c2d78dd03d0c5",
-    "fbd607f15da218c5390a5b183b634a10",
-    "f7063ccbc29f87303d5c3d0555b08944",
-#if CONFIG_ALT_INTRA
-    "7adcaaa3554eb71a81fc48cb9043984b",
-    "c0acea4397c1b4d54a21bbcec5731dff",
-#if CONFIG_SMOOTH_HV
-    "f15b8712f0f064e98a7d804d3074afa7",
-    "01a09cdb8edd06d840c84643032fc02f",
+void TestIntraPred16(const char *block_name, AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures16x16[kNumAv1IntraFuncs] = {
+    "50971c07ce26977d30298538fffec619",
+    "527a6b9e0dc5b21b98cf276305432bef",
+    "7eff2868f80ebc2c43a4f367281d80f7",
+    "67cd60512b54964ef6aff1bd4816d922",
+    "48371c87dc95c08a33b2048f89cf6468",
+    "b0acf2872ee411d7530af6d2625a7084",
+    "31d901ab2289d1e61e704e40240382a7",
+    "dae208f3dca583529cff49b73f7c4183",
+    "7af66a2f4c8e0b4908e40f047e60c47c",
+    "125e3ab6ab9bc961f183ec366a7afa88",
+    "ff230677e800977757d14b85a9eba404",
+    "eb42dc39140515dd4f3ab1afe6c3e71b",
+    "93d6b5352b571805ab16a55e1bbed86a",
+    "03764e4c0aebbc180e4e2c68fb06df2b",
+#if CONFIG_SMOOTH_HV
+    "bb6c74c9076c9f266ab11fb57060d8e6",
+    "0c5162bc28489756ddb847b5678e6f07",
 #endif  // CONFIG_SMOOTH_HV
-#else
-    "b8a41aa968ec108af447af4217cba91b",
-#endif  // CONFIG_ALT_INTRA
   };
-  TestIntraPred("Intra16", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
-                kSignatures, 16, 16 * 16 * kNumAv1IntraFuncs);
+  static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
+    "b4cbdbdf10ce13300b4063a3daf99e04",
+    "3731e1e6202064a9d0604d7c293ecee4",
+    "6c856188c4256a06452f0d5d70cac436",
+    "1f2192b4c8c497589484ea7bf9c944e8",
+    "84011bd4b7f565119d06787840e333a0",
+    "0e48949f7a6aa36f0d76b5d01f91124a",
+    "58114c06f6b9d8285e5020c7afd834ab",
+    "e37afe84a8b3c5e0f048d4652ecbe09e",
+    "c216348473fb029b45f8fb4f2862a7bd",
+    "0b7385155dcef742cc456d5741ae93a3",
+    "d55fadb221f0ea20266e57cd413e7b94",
+    "9bd6eb226c7e169b8d53cf70aea98b3a",
+    "60eff8064634b6c73b10681356baeee9",
+    "1559aeb081a9c0c71111d6093c2ff9fd",
+#if CONFIG_SMOOTH_HV
+    "c15479b739713773e5cabb748451987b",
+    "72e33ec12c9b67aea26d8d005fb82de2",
+#endif
+  };
+  static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
+    "abe5233d189cdbf79424721571bbaa7b",
+    "282759f81e3cfb2e2d396fe406b72a8b",
+    "e2224926c264f6f174cbc3167a233168",
+    "6814e85c2b33f8c9415d62e80394b47b",
+    "99cbbb60459c08a3061d72c4e4f6276a",
+    "1d1567d40b8e816f8c1f71e576fe0f87",
+    "5e989f9c748a0d2cd8c4ebf9d3fe1278",
+    "7135a2f419452a3a192a35156f68b019",
+    "06e10af5a726d2c81b8f8c708204f9fb",
+    "c0882f0e7ba1ffa0aeef6d5c751df6de",
+    "8477429e17d39a423f30e2082f651549",
+    "ba35068a30c2d1d10901e4bfabd02a11",
+    "36fdd371b624a075814d497c4832ec85",
+    "8ab8da61b727442b6ff692b40d0df018",
+#if CONFIG_SMOOTH_HV
+    "e35a10ad7fdf2327e821504a90f6a6eb",
+    "1f7211e727dc1de7d6a55d082fbdd821",
+#endif
+  };
+  if (!strcmp(block_name, "intra16x16")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures16x16, 16, 16);
+  }
+  if (!strcmp(block_name, "intra16x8")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures16x8, 16, 8);
+  }
+  if (!strcmp(block_name, "intra16x32")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures16x32, 16, 32);
+  }
 }
 
-void TestIntraPred32(AvxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumAv1IntraFuncs] = {
-    "558541656d84f9ae7896db655826febe",
-    "b3587a1f9a01495fa38c8cd3c8e2a1bf",
-    "4c6501e64f25aacc55a2a16c7e8f0255",
+void TestIntraPred32(const char *block_name, AvxPredFunc const *pred_funcs) {
+  static const char *const kSignatures32x32[kNumAv1IntraFuncs] = {
+    "a0a618c900e65ae521ccc8af789729f2",
+    "985aaa7c72b4a6c2fb431d32100cf13a",
+    "10662d09febc3ca13ee4e700120daeb5",
     "b3b01379ba08916ef6b1b35f7d9ad51c",
-    "0f1eb38b6cbddb3d496199ef9f329071",
-    "911c06efb9ed1c3b4c104b232b55812f",
-    "b4f9f177a8a259514f039cfb403a61e3",
-    "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
-    "b01c9076525216925f3456f034fb6eee",
-    "d267e20ad9e5cd2915d1a47254d3d149",
-    "3c45418137114cb6cef4c7a7baf4855c",
-    "d520125ebd512c63c301bf67fea8e059",
-#if CONFIG_ALT_INTRA
-    "297e8fbb5d33c29b12b228fa9d7c40a4",
-    "31b9296d70dd82238c87173e6d5e65fd",
-#if CONFIG_SMOOTH_HV
-    "f1041f77a34e86aaf30ea779ba84a2e8",
-    "83e2b744a6a3d82321744442b1db945c",
+    "9f4261755795af97e34679c333ec7004",
+    "bc2c9da91ad97ef0d1610fb0a9041657",
+    "f524b1a7e31c7bb9bfb2487fac3e16d8",
+    "4039bb7da0f6860090d3c57b5c85468f",
+    "b29fff7b61804e68383e3a609b33da58",
+    "e1aa5e49067fd8dba66c2eb8d07b7a89",
+    "db217e7891581cf93895ef5974bebb21",
+    "beb6cdc52b52c8976b4d2407ec8d2313",
+    "ef1653982b69e1f64bee3759f3e1ec45",
+    "1a51a675deba2c83282142eb48d3dc3d",
+#if CONFIG_SMOOTH_HV
+    "866c224746dc260cda861a7b1b383fb3",
+    "cea23799fc3526e1b6a6ff02b42b82af",
 #endif  // CONFIG_SMOOTH_HV
-#else
-    "9e1370c6d42e08d357d9612c93a71cfc",
-#endif  // CONFIG_ALT_INTRA
   };
-  TestIntraPred("Intra32", pred_funcs, kAv1IntraPredNames, kNumAv1IntraFuncs,
-                kSignatures, 32, 32 * 32 * kNumAv1IntraFuncs);
+  static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
+    "d1aeb8d5fdcfd3307922af01a798a4dc",
+    "b0bcb514ebfbee065faea9d34c12ae75",
+    "d6a18c63b4e909871c0137ca652fad23",
+    "fd047f2fc1b8ffb95d0eeef3e8796a45",
+    "645ab60779ea348fd93c81561c31bab9",
+    "4409633c9db8dff41ade4292a3a56e7f",
+    "b9b2935b2287a9a461ac5c11251ac706",
+    "43b05f808c0ac4fe8accd84d293b0488",
+    "1d2cb43872d20c205ffb185102bcd22a",
+    "2c1551b5e99592fd21053b5d14e397d9",
+    "cd499ef0dd41e2e38d5dac3319dfdd97",
+    "cd2610426637003f3b5d3984cb3320d5",
+    "5e36a11e069b31c2a739f3a9c7b37c24",
+    "e83b9483d702cfae496991c3c7fa92c0",
+#if CONFIG_SMOOTH_HV
+    "12f6ddf98c7f30a277307f1ea935b030",
+    "354321d6c32bbdb0739e4fa2acbf41e1",
+#endif
+  };
+  if (!strcmp(block_name, "intra32x32")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures32x32, 32, 32);
+  }
+  if (!strcmp(block_name, "intra32x16")) {
+    TestIntraPred(block_name, pred_funcs, kSignatures32x16, 32, 16);
+  }
 }
 
 }  // namespace
 
 // Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
 // to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
-#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
-                        d45e, d135, d117, d153, d207e, d63e, tm, smooth,    \
-                        smooth_v, smooth_h)                                 \
-  TEST(arch, test_func) {                                                   \
-    static const AvxPredFunc aom_intra_pred[] = {                           \
-      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,            \
-      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h         \
-    };                                                                      \
-    test_func(aom_intra_pred);                                              \
+#define INTRA_PRED_TEST(arch, test_func, blk, dc, dc_left, dc_top, dc_128, v, \
+                        h, d45e, d135, d117, d153, d207e, d63e, tm, smooth,   \
+                        smooth_v, smooth_h)                                   \
+  TEST(arch, DISABLED_##test_func) {                                          \
+    static const AvxPredFunc aom_intra_pred[] = {                             \
+      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,              \
+      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h           \
+    };                                                                        \
+    test_func(blk, aom_intra_pred);                                           \
   }
 
 // -----------------------------------------------------------------------------
 // 4x4
 
-#if CONFIG_ALT_INTRA
-#define tm_pred_func aom_paeth_predictor_4x4_c
-#define smooth_pred_func aom_smooth_predictor_4x4_c
 #if CONFIG_SMOOTH_HV
 #define smooth_v_pred_func aom_smooth_v_predictor_4x4_c
 #define smooth_h_pred_func aom_smooth_h_predictor_4x4_c
@@ -231,95 +388,91 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
 #define smooth_v_pred_func NULL
 #define smooth_h_pred_func NULL
 #endif  // CONFIG_SMOOTH_HV
-#else
-#define tm_pred_func aom_tm_predictor_4x4_c
-#define smooth_pred_func NULL
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_ALT_INTRA
 
-INTRA_PRED_TEST(C, TestIntraPred4, aom_dc_predictor_4x4_c,
+INTRA_PRED_TEST(C_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_c,
                 aom_dc_left_predictor_4x4_c, aom_dc_top_predictor_4x4_c,
                 aom_dc_128_predictor_4x4_c, aom_v_predictor_4x4_c,
                 aom_h_predictor_4x4_c, aom_d45e_predictor_4x4_c,
                 aom_d135_predictor_4x4_c, aom_d117_predictor_4x4_c,
                 aom_d153_predictor_4x4_c, aom_d207e_predictor_4x4_c,
-                aom_d63e_predictor_4x4_c, tm_pred_func, smooth_pred_func,
-                smooth_v_pred_func, smooth_h_pred_func)
+                aom_d63e_predictor_4x4_c, aom_paeth_predictor_4x4_c,
+                aom_smooth_predictor_4x4_c, smooth_v_pred_func,
+                smooth_h_pred_func)
 
-#undef tm_pred_func
-#undef smooth_pred_func
 #undef smooth_v_pred_func
 #undef smooth_h_pred_func
 
-#if HAVE_SSE2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_4x8_c
+#define smooth_h_pred_func aom_smooth_h_predictor_4x8_c
 #else
-#define tm_pred_func aom_tm_predictor_4x4_sse2
-#endif  // CONFIG_ALT_INTRA
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+INTRA_PRED_TEST(C_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_c,
+                aom_dc_left_predictor_4x8_c, aom_dc_top_predictor_4x8_c,
+                aom_dc_128_predictor_4x8_c, aom_v_predictor_4x8_c,
+                aom_h_predictor_4x8_c, aom_d45e_predictor_4x8_c,
+                aom_d135_predictor_4x8_c, aom_d117_predictor_4x8_c,
+                aom_d153_predictor_4x8_c, aom_d207e_predictor_4x8_c,
+                aom_d63e_predictor_4x8_c, aom_paeth_predictor_4x8_c,
+                aom_smooth_predictor_4x8_c, smooth_v_pred_func,
+                smooth_h_pred_func)
 
-INTRA_PRED_TEST(SSE2, TestIntraPred4, aom_dc_predictor_4x4_sse2,
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2_1, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_sse2,
                 aom_dc_left_predictor_4x4_sse2, aom_dc_top_predictor_4x4_sse2,
                 aom_dc_128_predictor_4x4_sse2, aom_v_predictor_4x4_sse2,
                 aom_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_sse2,
+                aom_dc_left_predictor_4x8_sse2, aom_dc_top_predictor_4x8_sse2,
+                aom_dc_128_predictor_4x8_sse2, aom_v_predictor_4x8_sse2,
+                aom_h_predictor_4x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, aom_d153_predictor_4x4_ssse3, NULL,
-                aom_d63e_predictor_4x4_ssse3, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_1, TestIntraPred4, "intra4x4", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_4x4_ssse3,
+                NULL, aom_d63e_predictor_4x4_ssse3,
+                aom_paeth_predictor_4x4_ssse3, aom_smooth_predictor_4x4_ssse3,
+                NULL, NULL)
+INTRA_PRED_TEST(SSSE3_2, TestIntraPred4, "intra4x8", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_4x8_ssse3, aom_smooth_predictor_4x8_ssse3,
+                NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_4x4_dspr2
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(DSPR2, TestIntraPred4, aom_dc_predictor_4x4_dspr2, NULL, NULL,
-                NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_dspr2,
+                NULL, NULL, NULL, NULL, aom_h_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_4x4_neon
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(NEON, TestIntraPred4, aom_dc_predictor_4x4_neon,
+INTRA_PRED_TEST(NEON, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_neon,
                 aom_dc_left_predictor_4x4_neon, aom_dc_top_predictor_4x4_neon,
                 aom_dc_128_predictor_4x4_neon, aom_v_predictor_4x4_neon,
                 aom_h_predictor_4x4_neon, NULL, aom_d135_predictor_4x4_neon,
-                NULL, NULL, NULL, NULL, tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_4x4_msa
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa,
+INTRA_PRED_TEST(MSA, TestIntraPred4, "intra4x4", aom_dc_predictor_4x4_msa,
                 aom_dc_left_predictor_4x4_msa, aom_dc_top_predictor_4x4_msa,
                 aom_dc_128_predictor_4x4_msa, aom_v_predictor_4x4_msa,
                 aom_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
 // 8x8
 
-#if CONFIG_ALT_INTRA
-#define tm_pred_func aom_paeth_predictor_8x8_c
-#define smooth_pred_func aom_smooth_predictor_8x8_c
 #if CONFIG_SMOOTH_HV
 #define smooth_v_pred_func aom_smooth_v_predictor_8x8_c
 #define smooth_h_pred_func aom_smooth_h_predictor_8x8_c
@@ -327,91 +480,114 @@ INTRA_PRED_TEST(MSA, TestIntraPred4, aom_dc_predictor_4x4_msa,
 #define smooth_v_pred_func NULL
 #define smooth_h_pred_func NULL
 #endif  // CONFIG_SMOOTH_HV
-#else
-#define tm_pred_func aom_tm_predictor_8x8_c
-#define smooth_pred_func NULL
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(C, TestIntraPred8, aom_dc_predictor_8x8_c,
+INTRA_PRED_TEST(C_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_c,
                 aom_dc_left_predictor_8x8_c, aom_dc_top_predictor_8x8_c,
                 aom_dc_128_predictor_8x8_c, aom_v_predictor_8x8_c,
                 aom_h_predictor_8x8_c, aom_d45e_predictor_8x8_c,
                 aom_d135_predictor_8x8_c, aom_d117_predictor_8x8_c,
                 aom_d153_predictor_8x8_c, aom_d207e_predictor_8x8_c,
-                aom_d63e_predictor_8x8_c, tm_pred_func, smooth_pred_func,
-                smooth_v_pred_func, smooth_h_pred_func)
-#undef tm_pred_func
-#undef smooth_pred_func
+                aom_d63e_predictor_8x8_c, aom_paeth_predictor_8x8_c,
+                aom_smooth_predictor_8x8_c, smooth_v_pred_func,
+                smooth_h_pred_func)
 #undef smooth_v_pred_func
 #undef smooth_h_pred_func
 
-#if HAVE_SSE2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_8x4_c
+#define smooth_h_pred_func aom_smooth_h_predictor_8x4_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+INTRA_PRED_TEST(C_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_c,
+                aom_dc_left_predictor_8x4_c, aom_dc_top_predictor_8x4_c,
+                aom_dc_128_predictor_8x4_c, aom_v_predictor_8x4_c,
+                aom_h_predictor_8x4_c, aom_d45e_predictor_8x4_c,
+                aom_d135_predictor_8x4_c, aom_d117_predictor_8x4_c,
+                aom_d153_predictor_8x4_c, aom_d207e_predictor_8x4_c,
+                aom_d63e_predictor_8x4_c, aom_paeth_predictor_8x4_c,
+                aom_smooth_predictor_8x4_c, smooth_v_pred_func,
+                smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_8x16_c
+#define smooth_h_pred_func aom_smooth_h_predictor_8x16_c
 #else
-#define tm_pred_func aom_tm_predictor_8x8_sse2
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(SSE2, TestIntraPred8, aom_dc_predictor_8x8_sse2,
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+INTRA_PRED_TEST(C_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_c,
+                aom_dc_left_predictor_8x16_c, aom_dc_top_predictor_8x16_c,
+                aom_dc_128_predictor_8x16_c, aom_v_predictor_8x16_c,
+                aom_h_predictor_8x16_c, aom_d45e_predictor_8x16_c,
+                aom_d135_predictor_8x16_c, aom_d117_predictor_8x16_c,
+                aom_d153_predictor_8x16_c, aom_d207e_predictor_8x16_c,
+                aom_d63e_predictor_8x16_c, aom_paeth_predictor_8x16_c,
+                aom_smooth_predictor_8x16_c, smooth_v_pred_func,
+                smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2_1, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_sse2,
                 aom_dc_left_predictor_8x8_sse2, aom_dc_top_predictor_8x8_sse2,
                 aom_dc_128_predictor_8x8_sse2, aom_v_predictor_8x8_sse2,
                 aom_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TestIntraPred8, "intra8x4", aom_dc_predictor_8x4_sse2,
+                aom_dc_left_predictor_8x4_sse2, aom_dc_top_predictor_8x4_sse2,
+                aom_dc_128_predictor_8x4_sse2, aom_v_predictor_8x4_sse2,
+                aom_h_predictor_8x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_sse2,
+                aom_dc_left_predictor_8x16_sse2, aom_dc_top_predictor_8x16_sse2,
+                aom_dc_128_predictor_8x16_sse2, aom_v_predictor_8x16_sse2,
+                aom_h_predictor_8x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, aom_d153_predictor_8x8_ssse3, NULL, NULL, NULL,
-                NULL, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_1, TestIntraPred8, "intra8x8", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_8x8_ssse3,
+                NULL, NULL, aom_paeth_predictor_8x8_ssse3,
+                aom_smooth_predictor_8x8_ssse3, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_2, TestIntraPred8, "intra8x4", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_8x4_ssse3, aom_smooth_predictor_8x4_ssse3,
+                NULL, NULL)
+INTRA_PRED_TEST(SSSE3_3, TestIntraPred8, "intra8x16", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_8x16_ssse3, aom_smooth_predictor_8x16_ssse3,
+                NULL, NULL)
 #endif  // HAVE_SSSE3
 
 #if HAVE_DSPR2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_8x8_dspr2
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(DSPR2, TestIntraPred8, aom_dc_predictor_8x8_dspr2, NULL, NULL,
-                NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_dspr2,
+                NULL, NULL, NULL, NULL, aom_h_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_8x8_neon
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(NEON, TestIntraPred8, aom_dc_predictor_8x8_neon,
+INTRA_PRED_TEST(NEON, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_neon,
                 aom_dc_left_predictor_8x8_neon, aom_dc_top_predictor_8x8_neon,
                 aom_dc_128_predictor_8x8_neon, aom_v_predictor_8x8_neon,
                 aom_h_predictor_8x8_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_8x8_msa
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa,
+INTRA_PRED_TEST(MSA, TestIntraPred8, "intra8x8", aom_dc_predictor_8x8_msa,
                 aom_dc_left_predictor_8x8_msa, aom_dc_top_predictor_8x8_msa,
                 aom_dc_128_predictor_8x8_msa, aom_v_predictor_8x8_msa,
                 aom_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
 // 16x16
 
-#if CONFIG_ALT_INTRA
-#define tm_pred_func aom_paeth_predictor_16x16_c
-#define smooth_pred_func aom_smooth_predictor_16x16_c
 #if CONFIG_SMOOTH_HV
 #define smooth_v_pred_func aom_smooth_v_predictor_16x16_c
 #define smooth_h_pred_func aom_smooth_h_predictor_16x16_c
@@ -419,87 +595,130 @@ INTRA_PRED_TEST(MSA, TestIntraPred8, aom_dc_predictor_8x8_msa,
 #define smooth_v_pred_func NULL
 #define smooth_h_pred_func NULL
 #endif  // CONFIG_SMOOTH_HV
-#else
-#define tm_pred_func aom_tm_predictor_16x16_c
-#define smooth_pred_func NULL
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(C, TestIntraPred16, aom_dc_predictor_16x16_c,
+INTRA_PRED_TEST(C_1, TestIntraPred16, "intra16x16", aom_dc_predictor_16x16_c,
                 aom_dc_left_predictor_16x16_c, aom_dc_top_predictor_16x16_c,
                 aom_dc_128_predictor_16x16_c, aom_v_predictor_16x16_c,
                 aom_h_predictor_16x16_c, aom_d45e_predictor_16x16_c,
                 aom_d135_predictor_16x16_c, aom_d117_predictor_16x16_c,
                 aom_d153_predictor_16x16_c, aom_d207e_predictor_16x16_c,
-                aom_d63e_predictor_16x16_c, tm_pred_func, smooth_pred_func,
-                smooth_v_pred_func, smooth_h_pred_func)
-#undef tm_pred_func
-#undef smooth_pred_func
+                aom_d63e_predictor_16x16_c, aom_paeth_predictor_16x16_c,
+                aom_smooth_predictor_16x16_c, smooth_v_pred_func,
+                smooth_h_pred_func)
 #undef smooth_v_pred_func
 #undef smooth_h_pred_func
 
-#if HAVE_SSE2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_16x8_c
+#define smooth_h_pred_func aom_smooth_h_predictor_16x8_c
 #else
-#define tm_pred_func aom_tm_predictor_16x16_sse2
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(SSE2, TestIntraPred16, aom_dc_predictor_16x16_sse2,
-                aom_dc_left_predictor_16x16_sse2,
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+INTRA_PRED_TEST(C_2, TestIntraPred16, "intra16x8", aom_dc_predictor_16x8_c,
+                aom_dc_left_predictor_16x8_c, aom_dc_top_predictor_16x8_c,
+                aom_dc_128_predictor_16x8_c, aom_v_predictor_16x8_c,
+                aom_h_predictor_16x8_c, aom_d45e_predictor_16x8_c,
+                aom_d135_predictor_16x8_c, aom_d117_predictor_16x8_c,
+                aom_d153_predictor_16x8_c, aom_d207e_predictor_16x8_c,
+                aom_d63e_predictor_16x8_c, aom_paeth_predictor_16x8_c,
+                aom_smooth_predictor_16x8_c, smooth_v_pred_func,
+                smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_16x32_c
+#define smooth_h_pred_func aom_smooth_h_predictor_16x32_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+INTRA_PRED_TEST(C_3, TestIntraPred16, "intra16x32", aom_dc_predictor_16x32_c,
+                aom_dc_left_predictor_16x32_c, aom_dc_top_predictor_16x32_c,
+                aom_dc_128_predictor_16x32_c, aom_v_predictor_16x32_c,
+                aom_h_predictor_16x32_c, aom_d45e_predictor_16x32_c,
+                aom_d135_predictor_16x32_c, aom_d117_predictor_16x32_c,
+                aom_d153_predictor_16x32_c, aom_d207e_predictor_16x32_c,
+                aom_d63e_predictor_16x32_c, aom_paeth_predictor_16x32_c,
+                aom_smooth_predictor_16x32_c, smooth_v_pred_func,
+                smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2_1, TestIntraPred16, "intra16x16",
+                aom_dc_predictor_16x16_sse2, aom_dc_left_predictor_16x16_sse2,
                 aom_dc_top_predictor_16x16_sse2,
                 aom_dc_128_predictor_16x16_sse2, aom_v_predictor_16x16_sse2,
                 aom_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TestIntraPred16, "intra16x8",
+                aom_dc_predictor_16x8_sse2, aom_dc_left_predictor_16x8_sse2,
+                aom_dc_top_predictor_16x8_sse2, aom_dc_128_predictor_16x8_sse2,
+                aom_v_predictor_16x8_sse2, aom_h_predictor_16x8_sse2, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32",
+                aom_dc_predictor_16x32_sse2, aom_dc_left_predictor_16x32_sse2,
+                aom_dc_top_predictor_16x32_sse2,
+                aom_dc_128_predictor_16x32_sse2, aom_v_predictor_16x32_sse2,
+                aom_h_predictor_16x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_1, TestIntraPred16, "intra16x16", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3,
+                NULL, NULL, aom_paeth_predictor_16x16_ssse3,
+                aom_smooth_predictor_16x16_ssse3, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_2, TestIntraPred16, "intra16x8", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x8_ssse3, aom_smooth_predictor_16x8_ssse3,
+                NULL, NULL)
+INTRA_PRED_TEST(SSSE3_3, TestIntraPred16, "intra16x32", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x32_ssse3,
+                aom_smooth_predictor_16x32_ssse3, NULL, NULL)
 #endif  // HAVE_SSSE3
 
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2_1, TestIntraPred16, "intra16x16", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x16_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_2, TestIntraPred16, "intra16x8", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x8_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_3, TestIntraPred16, "intra16x32", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_16x32_avx2, NULL, NULL, NULL)
+#endif  // HAVE_AVX2
+
 #if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred16, aom_dc_predictor_16x16_dspr2, NULL,
-                NULL, NULL, NULL, aom_h_predictor_16x16_dspr2, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, "intra16x16",
+                aom_dc_predictor_16x16_dspr2, NULL, NULL, NULL, NULL,
+                aom_h_predictor_16x16_dspr2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_DSPR2
 
 #if HAVE_NEON
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_16x16_neon
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(NEON, TestIntraPred16, aom_dc_predictor_16x16_neon,
-                aom_dc_left_predictor_16x16_neon,
+INTRA_PRED_TEST(NEON, TestIntraPred16, "intra16x16",
+                aom_dc_predictor_16x16_neon, aom_dc_left_predictor_16x16_neon,
                 aom_dc_top_predictor_16x16_neon,
                 aom_dc_128_predictor_16x16_neon, aom_v_predictor_16x16_neon,
                 aom_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_16x16_msa
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa,
+INTRA_PRED_TEST(MSA, TestIntraPred16, "intra16x16", aom_dc_predictor_16x16_msa,
                 aom_dc_left_predictor_16x16_msa, aom_dc_top_predictor_16x16_msa,
                 aom_dc_128_predictor_16x16_msa, aom_v_predictor_16x16_msa,
                 aom_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
 // -----------------------------------------------------------------------------
 // 32x32
 
-#if CONFIG_ALT_INTRA
-#define tm_pred_func aom_paeth_predictor_32x32_c
-#define smooth_pred_func aom_smooth_predictor_32x32_c
 #if CONFIG_SMOOTH_HV
 #define smooth_v_pred_func aom_smooth_v_predictor_32x32_c
 #define smooth_h_pred_func aom_smooth_h_predictor_32x32_c
@@ -507,73 +726,765 @@ INTRA_PRED_TEST(MSA, TestIntraPred16, aom_dc_predictor_16x16_msa,
 #define smooth_v_pred_func NULL
 #define smooth_h_pred_func NULL
 #endif  // CONFIG_SMOOTH_HV
-#else
-#define tm_pred_func aom_tm_predictor_32x32_c
-#define smooth_pred_func NULL
-#define smooth_v_pred_func NULL
-#define smooth_h_pred_func NULL
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(C, TestIntraPred32, aom_dc_predictor_32x32_c,
+INTRA_PRED_TEST(C_1, TestIntraPred32, "intra32x32", aom_dc_predictor_32x32_c,
                 aom_dc_left_predictor_32x32_c, aom_dc_top_predictor_32x32_c,
                 aom_dc_128_predictor_32x32_c, aom_v_predictor_32x32_c,
                 aom_h_predictor_32x32_c, aom_d45e_predictor_32x32_c,
                 aom_d135_predictor_32x32_c, aom_d117_predictor_32x32_c,
                 aom_d153_predictor_32x32_c, aom_d207e_predictor_32x32_c,
-                aom_d63e_predictor_32x32_c, tm_pred_func, smooth_pred_func,
-                smooth_v_pred_func, smooth_h_pred_func)
-#undef tm_pred_func
-#undef smooth_pred_func
+                aom_d63e_predictor_32x32_c, aom_paeth_predictor_32x32_c,
+                aom_smooth_predictor_32x32_c, smooth_v_pred_func,
+                smooth_h_pred_func)
 #undef smooth_v_pred_func
 #undef smooth_h_pred_func
 
-#if HAVE_SSE2
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_smooth_v_predictor_32x16_c
+#define smooth_h_pred_func aom_smooth_h_predictor_32x16_c
 #else
-#define tm_pred_func aom_tm_predictor_32x32_sse2
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(SSE2, TestIntraPred32, aom_dc_predictor_32x32_sse2,
-                aom_dc_left_predictor_32x32_sse2,
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+INTRA_PRED_TEST(C_2, TestIntraPred32, "intra32x16", aom_dc_predictor_32x16_c,
+                aom_dc_left_predictor_32x16_c, aom_dc_top_predictor_32x16_c,
+                aom_dc_128_predictor_32x16_c, aom_v_predictor_32x16_c,
+                aom_h_predictor_32x16_c, aom_d45e_predictor_32x16_c,
+                aom_d135_predictor_32x16_c, aom_d117_predictor_32x16_c,
+                aom_d153_predictor_32x16_c, aom_d207e_predictor_32x16_c,
+                aom_d63e_predictor_32x16_c, aom_paeth_predictor_32x16_c,
+                aom_smooth_predictor_32x16_c, smooth_v_pred_func,
+                smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2_1, TestIntraPred32, "intra32x32",
+                aom_dc_predictor_32x32_sse2, aom_dc_left_predictor_32x32_sse2,
                 aom_dc_top_predictor_32x32_sse2,
                 aom_dc_128_predictor_32x32_sse2, aom_v_predictor_32x32_sse2,
                 aom_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16",
+                aom_dc_predictor_32x16_sse2, aom_dc_left_predictor_32x16_sse2,
+                aom_dc_top_predictor_32x16_sse2,
+                aom_dc_128_predictor_32x16_sse2, aom_v_predictor_32x16_sse2,
+                aom_h_predictor_32x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_SSE2
 
 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3, NULL, NULL,
-                NULL, NULL, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_1, TestIntraPred32, "intra32x32", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3,
+                NULL, NULL, aom_paeth_predictor_32x32_ssse3,
+                aom_smooth_predictor_32x32_ssse3, NULL, NULL)
+INTRA_PRED_TEST(SSSE3_2, TestIntraPred32, "intra32x16", NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x16_ssse3,
+                aom_smooth_predictor_32x16_ssse3, NULL, NULL)
 #endif  // HAVE_SSSE3
 
+#if HAVE_AVX2
+INTRA_PRED_TEST(AVX2_1, TestIntraPred32, "intra32x32",
+                aom_dc_predictor_32x32_avx2, aom_dc_left_predictor_32x32_avx2,
+                aom_dc_top_predictor_32x32_avx2,
+                aom_dc_128_predictor_32x32_avx2, aom_v_predictor_32x32_avx2,
+                aom_h_predictor_32x32_avx2, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x32_avx2, NULL, NULL, NULL)
+INTRA_PRED_TEST(AVX2_2, TestIntraPred32, "intra32x16",
+                aom_dc_predictor_32x16_avx2, aom_dc_left_predictor_32x16_avx2,
+                aom_dc_top_predictor_32x16_avx2,
+                aom_dc_128_predictor_32x16_avx2, aom_v_predictor_32x16_avx2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                aom_paeth_predictor_32x16_avx2, NULL, NULL, NULL)
+#endif  // HAVE_AVX2
+
 #if HAVE_NEON
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_32x32_neon
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(NEON, TestIntraPred32, aom_dc_predictor_32x32_neon,
-                aom_dc_left_predictor_32x32_neon,
+INTRA_PRED_TEST(NEON, TestIntraPred32, "intra32x32",
+                aom_dc_predictor_32x32_neon, aom_dc_left_predictor_32x32_neon,
                 aom_dc_top_predictor_32x32_neon,
                 aom_dc_128_predictor_32x32_neon, aom_v_predictor_32x32_neon,
                 aom_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_NEON
 
 #if HAVE_MSA
-#if CONFIG_ALT_INTRA
-#define tm_pred_func NULL
-#else
-#define tm_pred_func aom_tm_predictor_32x32_msa
-#endif  // CONFIG_ALT_INTRA
-INTRA_PRED_TEST(MSA, TestIntraPred32, aom_dc_predictor_32x32_msa,
+INTRA_PRED_TEST(MSA, TestIntraPred32, "intra32x32", aom_dc_predictor_32x32_msa,
                 aom_dc_left_predictor_32x32_msa, aom_dc_top_predictor_32x32_msa,
                 aom_dc_128_predictor_32x32_msa, aom_v_predictor_32x32_msa,
                 aom_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                tm_pred_func, NULL, NULL, NULL)
-#undef tm_pred_func
+                NULL, NULL, NULL, NULL)
 #endif  // HAVE_MSA
 
+// -----------------------------------------------------------------------------
+// High Bitdepth
+#if CONFIG_HIGHBITDEPTH
+namespace {
+
+typedef void (*AvxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
+                                  const uint16_t *above, const uint16_t *left,
+                                  int bd);
+
+typedef IntraPredTestMem<uint16_t> Av1HighbdIntraPredTestMem;
+
+void TestHighbdIntraPred(const char name[], AvxHighbdPredFunc const *pred_funcs,
+                         const char *const signatures[], int block_width,
+                         int block_height) {
+  const int num_pixels_per_test =
+      block_width * block_height * kNumAv1IntraFuncs;
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+  Av1HighbdIntraPredTestMem intra_pred_test_mem;
+  const uint16_t *const above = intra_pred_test_mem.above_mem + 16;
+  const int bd = 12;
+
+  intra_pred_test_mem.Init(block_width, bd);
+
+  for (int k = 0; k < kNumAv1IntraFuncs; ++k) {
+    if (pred_funcs[k] == NULL) continue;
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
+    aom_usec_timer timer;
+    aom_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
+                    intra_pred_test_mem.left, bd);
+    }
+    libaom_test::ClearSystemState();
+    aom_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
+    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
+                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
+  }
+}
+
+void TestHighbdIntraPred4(const char *block_name,
+                          AvxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures4x4[kNumAv1IntraFuncs] = {
+    "11f74af6c5737df472f3275cbde062fa",
+    "51bea056b6447c93f6eb8f6b7e8f6f71",
+    "27e97f946766331795886f4de04c5594",
+    "53ab15974b049111fb596c5168ec7e3f",
+    "f0b640bb176fbe4584cf3d32a9b0320a",
+    "729783ca909e03afd4b47111c80d967b",
+    "d631a8544ccc87702db3e98fac494657",
+    "293fc903254a33754133314c6cdba81f",
+    "f8074d704233e73dfd35b458c6092374",
+    "aa6363d08544a1ec4da33d7a0be5640d",
+    "0bdc21a3acdebc393bc2c22e71bbeada",
+    "a48f7a484ba4ad3916055c7160665b56",
+    "6e30009c45474a22032678b1bd579c8f",
+    "e57cba016d808aa8a35619df2a65f049",
+#if CONFIG_SMOOTH_HV
+    "55a6c37f39afcbbf5abca4a985b96459",
+    "a623d45b37dafec1f8a75c4c5218913d",
+#endif  // CONFIG_SMOOTH_HV
+  };
+  static const char *const kSignatures4x8[kNumAv1IntraFuncs] = {
+    "22d519b796d59644043466320e4ccd14",
+    "09513a738c49b3f9542d27f34abbe1d5",
+    "807ae5e8813443ff01e71be6efacfb69",
+    "cbfa18d0293430b6e9708b0be1fd2394",
+    "346c354c34ec7fa780b576db355dab88",
+    "f97dae85c35359632380b09ca98d611e",
+    "aed1beef71de33856c814ff7d63dd9db",
+    "49c47c04dd3d23d6fc5cc32bf9d40ae4",
+    "a24aade6e22b323ee28c8bf08aa2d234",
+    "aefef502f9e144e71cd27dc7383b3c28",
+    "b284ae5277b85ebdd16b5952149f7458",
+    "8dc5791167271f6f347582e07379f580",
+    "698ae351d8896d89ed9e4e67b6e53eda",
+    "dcc197034a9c45a3d8238bf085835f4e",
+#if CONFIG_SMOOTH_HV
+    "7a35e2c42ffdc2efc2d6d1d75a100fc7",
+    "41ab6cebd4516c87a91b2a593e2c2506",
+#endif
+  };
+
+  if (!strcmp(block_name, "Hbd Intra4x4")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures4x4, 4, 4);
+  }
+  if (!strcmp(block_name, "Hbd Intra4x8")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures4x8, 4, 8);
+  }
+}
+
+void TestHighbdIntraPred8(const char *block_name,
+                          AvxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures8x8[kNumAv1IntraFuncs] = {
+    "03da8829fe94663047fd108c5fcaa71d",
+    "ecdb37b8120a2d3a4c706b016bd1bfd7",
+    "1d4543ed8d2b9368cb96898095fe8a75",
+    "f791c9a67b913cbd82d9da8ecede30e2",
+    "065c70646f4dbaff913282f55a45a441",
+    "51f87123616662ef7c35691497dfd0ba",
+    "4f53cf8e5f43894dc0759f43c7081f60",
+    "9ffe186a6bc7db95275f1bbddd6f7aba",
+    "a3258a2eae2e2bd55cb8f71351b22998",
+    "8d909f0a2066e39b3216092c6289ece4",
+    "6751f60655aba44aff78aaaf4e967377",
+    "d31a449872fab968a8d41de578338780",
+    "85c01ba03df68f9ece7bd3fa0f8980e6",
+    "ad19b7dac092f56df6d054e1f67f21e7",
+#if CONFIG_SMOOTH_HV
+    "0edc415b5dd7299f7a34fb9f71d31d78",
+    "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
+#endif  // CONFIG_SMOOTH_HV
+  };
+  static const char *const kSignatures8x4[kNumAv1IntraFuncs] = {
+    "d58cd4c4bf3b7bbaa5db5e1a5622ec78",
+    "6e572c35aa782d00cafcb99e9ea047ea",
+    "e8c22a3702b416dc9ab974505afbed09",
+    "aaa4e4762a795aad7ad74de0c662c4e4",
+    "a19f9101967383c3dcbd516dc317a291",
+    "9ab8cb91f1a595b9ebe3fe8de58031aa",
+    "c6c7d65264397d4d31e378e1f1cfd921",
+    "5804158e463ff794b6b8a623f5d2c10d",
+    "c342cdeb39aae4c4f7be10e057029298",
+    "c1bbbcfe4b25f6b8eca6ad2f7ee793d3",
+    "98d1dab8b949859b9c65298ee9f105f8",
+    "396e803aaf6d7a03a231edc48b396051",
+    "2cf9021d5f1169268699807ee118b65f",
+    "ee9605fcbd6fb871f1c5cd81a6989327",
+#if CONFIG_SMOOTH_HV
+    "0edc415b5dd7299f7a34fb9f71d31d78",
+    "2bc8ec19e9f4b77a64b8a0a1f6aec7e7",
+#endif
+  };
+  static const char *const kSignatures8x16[kNumAv1IntraFuncs] = {
+    "4562de1d0336610880fdd5685498a9ec",
+    "16310fa7076394f16fc85c4b149d89c9",
+    "0e94af88e1dc573b6f0f499cddd1f530",
+    "dfd245ee20d091c67809160340365aa9",
+    "d3562504327f70c096c5be23fd8a3747",
+    "601b853558502acbb5135eadd2da117a",
+    "e83f9a8bc16b507d2ed0b6b31a25d6f5",
+    "fc8427d942246e8cba81247bb294afb5",
+    "89cde712e4c1ef675ea156ad679c62c7",
+    "0a68c2b28c3b171ad797cf76a7058f10",
+    "e70724010e12d8f374cedd3910ceb0d5",
+    "ad7987e91267503ba6fd3e8be42eb48c",
+    "3c624345a723a1b2b1bea05a6a08bc99",
+    "2a9c781de609e0184cc7ab442050f4e5",
+#if CONFIG_SMOOTH_HV
+    "0ddc5035c22252747126b61fc238c74d",
+    "e43f5d83bab759af69c7b6773fc8f9b2",
+#endif
+  };
+  if (!strcmp(block_name, "Hbd Intra8x8")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x8, 8, 8);
+  }
+  if (!strcmp(block_name, "Hbd Intra8x4")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x4, 8, 4);
+  }
+  if (!strcmp(block_name, "Hbd Intra8x16")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures8x16, 8, 16);
+  }
+}
+
+void TestHighbdIntraPred16(const char *block_name,
+                           AvxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures16x16[kNumAv1IntraFuncs] = {
+    "e33cb3f56a878e2fddb1b2fc51cdd275",
+    "c7bff6f04b6052c8ab335d726dbbd52d",
+    "d0b0b47b654a9bcc5c6008110a44589b",
+    "78f5da7b10b2b9ab39f114a33b6254e9",
+    "c78e31d23831abb40d6271a318fdd6f3",
+    "90d1347f4ec9198a0320daecb6ff90b8",
+    "e38e12830e2ee5a01a064ec5998d5948",
+    "cf28bd387b81ad3e5f1a1c779a4b70a0",
+    "24c304330431ddeaf630f6ce94af2eac",
+    "91a329798036bf64e8e00a87b131b8b1",
+    "e536338d1a8ee192b9e591855db1a222",
+    "54ecd47737f71c62d24e3779585113f2",
+    "e63ded54ab3d0e8728b6f24d4f01e53f",
+    "35ce21fbe0ea114c089fc3489a78155d",
+#if CONFIG_SMOOTH_HV
+    "f277f6ef8e4d717f1f0dfe2706ac197d",
+    "e8014d3f41256976c02e0f1e622ba2b9",
+#endif  // CONFIG_SMOOTH_HV
+  };
+  static const char *const kSignatures16x8[kNumAv1IntraFuncs] = {
+    "a57d6b5a9bfd30c29591d8717ace9c51",
+    "f5907ba97ee6c53e339e953fc8d845ee",
+    "ea3aa727913ce45af06f89dd1808db5f",
+    "408af4f23e48d14b48ee35ae094fcd18",
+    "85c41cbcb5d744f7961e8950026fbffe",
+    "8a4e588a837638887ba671f8d4910485",
+    "caae3cc3d419bbd28aa389dbe4febee1",
+    "ea67fb80d71b6471467c79662af1186c",
+    "c83f7252412dd1ad2fc6af848e7f6be8",
+    "f45af3d697f42f1b9b8def4e46bac78c",
+    "dca4a2aaf5f63db387e264ba5963943a",
+    "d01b1bcc50b4b66c1231142eae628cd3",
+    "b792d8826b67a21757ea7097cff9e05b",
+    "f94ce7101bb87fd3bb9312112527dbf4",
+#if CONFIG_SMOOTH_HV
+    "688c6660a6dc6fa61fa1aa38e708c209",
+    "0cdf641b4f81d69509c92ae0b93ef5ff",
+#endif
+  };
+  static const char *const kSignatures16x32[kNumAv1IntraFuncs] = {
+    "aee4b3b0e3cc02d48e2c40d77f807927",
+    "8baef2b2e789f79c8df9d90ad10f34a4",
+    "038c38ee3c4f090bb8d736eab136aafc",
+    "1a3de2aaeaffd68a9fd6c7f6557b83f3",
+    "385c6e0ea29421dd81011a2934641e26",
+    "6cf96c285d1a2d4787f955dad715b08c",
+    "21f82421fda1c3afca8baca0dc048a52",
+    "eac3734852c99a051f6d15a921d9e7b9",
+    "c81f7ffec79508bf78d0f2c67d8abe96",
+    "14b8c62304f65a06653b9b35dfe12d97",
+    "e0893310042511275ae04e5186ee5326",
+    "b4f05903a6191093be719794417ac6fd",
+    "2d7f75dcd73b9528c8396279ff09ff3a",
+    "5a63cd1841e4ed470e4ca5ef845f2281",
+#if CONFIG_SMOOTH_HV
+    "610d899ca945fbead33287d4335a8b32",
+    "6bafaad81fce37be46730187e78d8b11",
+#endif
+  };
+  if (!strcmp(block_name, "Hbd Intra16x16")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x16, 16, 16);
+  }
+  if (!strcmp(block_name, "Hbd Intra16x8")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x8, 16, 8);
+  }
+  if (!strcmp(block_name, "Hbd Intra16x32")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures16x32, 16, 32);
+  }
+}
+
+void TestHighbdIntraPred32(const char *block_name,
+                           AvxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures32x32[kNumAv1IntraFuncs] = {
+    "a3e8056ba7e36628cce4917cd956fedd",
+    "cc7d3024fe8748b512407edee045377e",
+    "2aab0a0f330a1d3e19b8ecb8f06387a3",
+    "a547bc3fb7b06910bf3973122a426661",
+    "26f712514da95042f93d6e8dc8e431dc",
+    "bb08c6e16177081daa3d936538dbc2e3",
+    "4e10f10b082a5b4265080c102d34eb47",
+    "42867c8553285e94ee8e4df7abafbda8",
+    "6496bdee96100667833f546e1be3d640",
+    "2ebfa25bf981377e682e580208504300",
+    "1788695b10a6f82ae1a56686dcbcd0a9",
+    "c3b9c506604a7132bbb5f4e97bdb03f0",
+    "84bf83f94a51b33654ca940c6f8bc057",
+    "7168b03fc31bf29596a344d6a35d007c",
+#if CONFIG_SMOOTH_HV
+    "b073a70d3672f1282236994f5d12e94b",
+    "c51607aebad5dcb3c1e3b58ef9e5b84e",
+#endif  // CONFIG_SMOOTH_HV
+  };
+  static const char *const kSignatures32x16[kNumAv1IntraFuncs] = {
+    "290b23c9f5a1de7905bfa71a942da29b",
+    "701e7b82593c66da5052fc4b6afd79ce",
+    "4da828c5455cd246735a663fbb204989",
+    "e3fbeaf234efece8dbd752b77226200c",
+    "4d1d8c969f05155a7e7e84cf7aad021b",
+    "c22e4877c2c946d5bdc0d542e29e70cf",
+    "ffd86b234d65c2e1386a5b5b5c188a69",
+    "50aaaa7d90e300b635ab18cdd73e189b",
+    "a945dc7429df168e2169d81b58a15859",
+    "66725070d7fad02dee78730ba0843e19",
+    "33d873cb05d45df2af4ff59033833db7",
+    "0dd783695b69271f65d56f5516fa6dc0",
+    "8ac1ce815e7780500f842b0beb0bb980",
+    "9fee2e2502b507f25bfad30a55b0b610",
+#if CONFIG_SMOOTH_HV
+    "4ced9c212ec6f9956e27f68a91b59fef",
+    "4a7a0b93f138bb0863e4e465b01ec0b1",
+#endif
+  };
+  if (!strcmp(block_name, "Hbd Intra32x32")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures32x32, 32, 32);
+  }
+  if (!strcmp(block_name, "Hbd Intra32x16")) {
+    TestHighbdIntraPred(block_name, pred_funcs, kSignatures32x16, 32, 16);
+  }
+}
+
+}  // namespace
+
+#define HIGHBD_INTRA_PRED_TEST(arch, test_func, block_size, dc, dc_left,     \
+                               dc_top, dc_128, v, h, d45e, d135, d117, d153, \
+                               d207e, d63e, tm, smooth, smooth_v, smooth_h)  \
+  TEST(arch, DISABLED_##test_func) {                                         \
+    static const AvxHighbdPredFunc aom_intra_pred[] = {                      \
+      dc,   dc_left, dc_top, dc_128, v,  h,      d45e,     d135,             \
+      d117, d153,    d207e,  d63e,   tm, smooth, smooth_v, smooth_h          \
+    };                                                                       \
+    test_func(block_size, aom_intra_pred);                                   \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_4x4_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_4x4_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_1, TestHighbdIntraPred4, "Hbd Intra4x4", aom_highbd_dc_predictor_4x4_c,
+    aom_highbd_dc_left_predictor_4x4_c, aom_highbd_dc_top_predictor_4x4_c,
+    aom_highbd_dc_128_predictor_4x4_c, aom_highbd_v_predictor_4x4_c,
+    aom_highbd_h_predictor_4x4_c, aom_highbd_d45e_predictor_4x4_c,
+    aom_highbd_d135_predictor_4x4_c, aom_highbd_d117_predictor_4x4_c,
+    aom_highbd_d153_predictor_4x4_c, aom_highbd_d207e_predictor_4x4_c,
+    aom_highbd_d63e_predictor_4x4_c, aom_highbd_paeth_predictor_4x4_c,
+    aom_highbd_smooth_predictor_4x4_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(
+    SSE2_1, TestHighbdIntraPred4, "Hbd Intra4x4",
+    aom_highbd_dc_predictor_4x4_sse2, aom_highbd_dc_left_predictor_4x4_sse2,
+    aom_highbd_dc_top_predictor_4x4_sse2, aom_highbd_dc_128_predictor_4x4_sse2,
+    aom_highbd_v_predictor_4x4_sse2, aom_highbd_h_predictor_4x4_sse2,
+    aom_highbd_d45e_predictor_4x4_sse2, aom_highbd_d135_predictor_4x4_sse2,
+    aom_highbd_d117_predictor_4x4_sse2, aom_highbd_d153_predictor_4x4_sse2,
+    NULL, NULL, NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred4, "Hbd Intra4x8",
+                       aom_highbd_dc_predictor_4x8_sse2,
+                       aom_highbd_dc_left_predictor_4x8_sse2,
+                       aom_highbd_dc_top_predictor_4x8_sse2,
+                       aom_highbd_dc_128_predictor_4x8_sse2,
+                       aom_highbd_v_predictor_4x8_sse2,
+                       aom_highbd_h_predictor_4x8_sse2,
+                       aom_highbd_d45e_predictor_4x8_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_4x8_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_4x8_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TestHighbdIntraPred4, "Hbd Intra4x8", aom_highbd_dc_predictor_4x8_c,
+    aom_highbd_dc_left_predictor_4x8_c, aom_highbd_dc_top_predictor_4x8_c,
+    aom_highbd_dc_128_predictor_4x8_c, aom_highbd_v_predictor_4x8_c,
+    aom_highbd_h_predictor_4x8_c, aom_highbd_d45e_predictor_4x8_c,
+    aom_highbd_d135_predictor_4x8_c, aom_highbd_d117_predictor_4x8_c,
+    aom_highbd_d153_predictor_4x8_c, aom_highbd_d207e_predictor_4x8_c,
+    aom_highbd_d63e_predictor_4x8_c, aom_highbd_paeth_predictor_4x8_c,
+    aom_highbd_smooth_predictor_4x8_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+// -----------------------------------------------------------------------------
+// 8x8
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x8_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x8_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_1, TestHighbdIntraPred8, "Hbd Intra8x8", aom_highbd_dc_predictor_8x8_c,
+    aom_highbd_dc_left_predictor_8x8_c, aom_highbd_dc_top_predictor_8x8_c,
+    aom_highbd_dc_128_predictor_8x8_c, aom_highbd_v_predictor_8x8_c,
+    aom_highbd_h_predictor_8x8_c, aom_highbd_d45e_predictor_8x8_c,
+    aom_highbd_d135_predictor_8x8_c, aom_highbd_d117_predictor_8x8_c,
+    aom_highbd_d153_predictor_8x8_c, aom_highbd_d207e_predictor_8x8_c,
+    aom_highbd_d63e_predictor_8x8_c, aom_highbd_paeth_predictor_8x8_c,
+    aom_highbd_smooth_predictor_8x8_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred8, "Hbd Intra8x8",
+                       aom_highbd_dc_predictor_8x8_sse2,
+                       aom_highbd_dc_left_predictor_8x8_sse2,
+                       aom_highbd_dc_top_predictor_8x8_sse2,
+                       aom_highbd_dc_128_predictor_8x8_sse2,
+                       aom_highbd_v_predictor_8x8_sse2,
+                       aom_highbd_h_predictor_8x8_sse2,
+                       aom_highbd_d45e_predictor_8x8_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred8, "Hbd Intra8x4",
+                       aom_highbd_dc_predictor_8x4_sse2,
+                       aom_highbd_dc_left_predictor_8x4_sse2,
+                       aom_highbd_dc_top_predictor_8x4_sse2,
+                       aom_highbd_dc_128_predictor_8x4_sse2,
+                       aom_highbd_v_predictor_8x4_sse2,
+                       aom_highbd_h_predictor_8x4_sse2,
+                       aom_highbd_d45e_predictor_8x4_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred8, "Hbd Intra8x16",
+                       aom_highbd_dc_predictor_8x16_sse2,
+                       aom_highbd_dc_left_predictor_8x16_sse2,
+                       aom_highbd_dc_top_predictor_8x16_sse2,
+                       aom_highbd_dc_128_predictor_8x16_sse2,
+                       aom_highbd_v_predictor_8x16_sse2,
+                       aom_highbd_h_predictor_8x16_sse2,
+                       aom_highbd_d45e_predictor_8x16_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred8, "Hbd Intra8x8", NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d135_predictor_8x8_ssse3,
+                       aom_highbd_d117_predictor_8x8_ssse3,
+                       aom_highbd_d153_predictor_8x8_ssse3, NULL, NULL, NULL,
+                       NULL, NULL, NULL)
+#endif
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x4_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x4_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TestHighbdIntraPred8, "Hbd Intra8x4", aom_highbd_dc_predictor_8x4_c,
+    aom_highbd_dc_left_predictor_8x4_c, aom_highbd_dc_top_predictor_8x4_c,
+    aom_highbd_dc_128_predictor_8x4_c, aom_highbd_v_predictor_8x4_c,
+    aom_highbd_h_predictor_8x4_c, aom_highbd_d45e_predictor_8x4_c,
+    aom_highbd_d135_predictor_8x4_c, aom_highbd_d117_predictor_8x4_c,
+    aom_highbd_d153_predictor_8x4_c, aom_highbd_d207e_predictor_8x4_c,
+    aom_highbd_d63e_predictor_8x4_c, aom_highbd_paeth_predictor_8x4_c,
+    aom_highbd_smooth_predictor_8x4_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_8x16_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_8x16_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TestHighbdIntraPred8, "Hbd Intra8x16", aom_highbd_dc_predictor_8x16_c,
+    aom_highbd_dc_left_predictor_8x16_c, aom_highbd_dc_top_predictor_8x16_c,
+    aom_highbd_dc_128_predictor_8x16_c, aom_highbd_v_predictor_8x16_c,
+    aom_highbd_h_predictor_8x16_c, aom_highbd_d45e_predictor_8x16_c,
+    aom_highbd_d135_predictor_8x16_c, aom_highbd_d117_predictor_8x16_c,
+    aom_highbd_d153_predictor_8x16_c, aom_highbd_d207e_predictor_8x16_c,
+    aom_highbd_d63e_predictor_8x16_c, aom_highbd_paeth_predictor_8x16_c,
+    aom_highbd_smooth_predictor_8x16_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+// -----------------------------------------------------------------------------
+// 16x16
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x16_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x16_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_1, TestHighbdIntraPred16, "Hbd Intra16x16",
+    aom_highbd_dc_predictor_16x16_c, aom_highbd_dc_left_predictor_16x16_c,
+    aom_highbd_dc_top_predictor_16x16_c, aom_highbd_dc_128_predictor_16x16_c,
+    aom_highbd_v_predictor_16x16_c, aom_highbd_h_predictor_16x16_c,
+    aom_highbd_d45e_predictor_16x16_c, aom_highbd_d135_predictor_16x16_c,
+    aom_highbd_d117_predictor_16x16_c, aom_highbd_d153_predictor_16x16_c,
+    aom_highbd_d207e_predictor_16x16_c, aom_highbd_d63e_predictor_16x16_c,
+    aom_highbd_paeth_predictor_16x16_c, aom_highbd_smooth_predictor_16x16_c,
+    smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred16, "Hbd Intra16x16",
+                       aom_highbd_dc_predictor_16x16_sse2,
+                       aom_highbd_dc_left_predictor_16x16_sse2,
+                       aom_highbd_dc_top_predictor_16x16_sse2,
+                       aom_highbd_dc_128_predictor_16x16_sse2,
+                       aom_highbd_v_predictor_16x16_sse2,
+                       aom_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred16, "Hbd Intra16x8",
+                       aom_highbd_dc_predictor_16x8_sse2,
+                       aom_highbd_dc_left_predictor_16x8_sse2,
+                       aom_highbd_dc_top_predictor_16x8_sse2,
+                       aom_highbd_dc_128_predictor_16x8_sse2,
+                       aom_highbd_v_predictor_16x8_sse2,
+                       aom_highbd_h_predictor_16x8_sse2, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_3, TestHighbdIntraPred16, "Hbd Intra16x32",
+                       aom_highbd_dc_predictor_16x32_sse2,
+                       aom_highbd_dc_left_predictor_16x32_sse2,
+                       aom_highbd_dc_top_predictor_16x32_sse2,
+                       aom_highbd_dc_128_predictor_16x32_sse2,
+                       aom_highbd_v_predictor_16x32_sse2,
+                       aom_highbd_h_predictor_16x32_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3_1, TestHighbdIntraPred16, "Hbd Intra16x16", NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d135_predictor_16x16_ssse3,
+                       aom_highbd_d117_predictor_16x16_ssse3,
+                       aom_highbd_d153_predictor_16x16_ssse3, NULL, NULL, NULL,
+                       NULL, NULL, NULL)
+#endif
+
+#if HAVE_AVX2
+HIGHBD_INTRA_PRED_TEST(AVX2_1, TestHighbdIntraPred16, "Hbd Intra16x16", NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d45e_predictor_16x16_avx2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_2, TestHighbdIntraPred16, "Hbd Intra16x8", NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d45e_predictor_16x8_avx2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_3, TestHighbdIntraPred16, "Hbd Intra16x32", NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d45e_predictor_16x32_avx2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x8_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x8_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TestHighbdIntraPred16, "Hbd Intra16x8", aom_highbd_dc_predictor_16x8_c,
+    aom_highbd_dc_left_predictor_16x8_c, aom_highbd_dc_top_predictor_16x8_c,
+    aom_highbd_dc_128_predictor_16x8_c, aom_highbd_v_predictor_16x8_c,
+    aom_highbd_h_predictor_16x8_c, aom_highbd_d45e_predictor_16x8_c,
+    aom_highbd_d135_predictor_16x8_c, aom_highbd_d117_predictor_16x8_c,
+    aom_highbd_d153_predictor_16x8_c, aom_highbd_d207e_predictor_16x8_c,
+    aom_highbd_d63e_predictor_16x8_c, aom_highbd_paeth_predictor_16x8_c,
+    aom_highbd_smooth_predictor_16x8_c, smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_16x32_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_16x32_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_3, TestHighbdIntraPred16, "Hbd Intra16x32",
+    aom_highbd_dc_predictor_16x32_c, aom_highbd_dc_left_predictor_16x32_c,
+    aom_highbd_dc_top_predictor_16x32_c, aom_highbd_dc_128_predictor_16x32_c,
+    aom_highbd_v_predictor_16x32_c, aom_highbd_h_predictor_16x32_c,
+    aom_highbd_d45e_predictor_16x32_c, aom_highbd_d135_predictor_16x32_c,
+    aom_highbd_d117_predictor_16x32_c, aom_highbd_d153_predictor_16x32_c,
+    aom_highbd_d207e_predictor_16x32_c, aom_highbd_d63e_predictor_16x32_c,
+    aom_highbd_paeth_predictor_16x32_c, aom_highbd_smooth_predictor_16x32_c,
+    smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+// -----------------------------------------------------------------------------
+// 32x32
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_32x32_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_32x32_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_1, TestHighbdIntraPred32, "Hbd Intra32x32",
+    aom_highbd_dc_predictor_32x32_c, aom_highbd_dc_left_predictor_32x32_c,
+    aom_highbd_dc_top_predictor_32x32_c, aom_highbd_dc_128_predictor_32x32_c,
+    aom_highbd_v_predictor_32x32_c, aom_highbd_h_predictor_32x32_c,
+    aom_highbd_d45e_predictor_32x32_c, aom_highbd_d135_predictor_32x32_c,
+    aom_highbd_d117_predictor_32x32_c, aom_highbd_d153_predictor_32x32_c,
+    aom_highbd_d207e_predictor_32x32_c, aom_highbd_d63e_predictor_32x32_c,
+    aom_highbd_paeth_predictor_32x32_c, aom_highbd_smooth_predictor_32x32_c,
+    smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2_1, TestHighbdIntraPred32, "Hbd Intra32x32",
+                       aom_highbd_dc_predictor_32x32_sse2,
+                       aom_highbd_dc_left_predictor_32x32_sse2,
+                       aom_highbd_dc_top_predictor_32x32_sse2,
+                       aom_highbd_dc_128_predictor_32x32_sse2,
+                       aom_highbd_v_predictor_32x32_sse2,
+                       aom_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+HIGHBD_INTRA_PRED_TEST(SSE2_2, TestHighbdIntraPred32, "Hbd Intra32x16",
+                       aom_highbd_dc_predictor_32x16_sse2,
+                       aom_highbd_dc_left_predictor_32x16_sse2,
+                       aom_highbd_dc_top_predictor_32x16_sse2,
+                       aom_highbd_dc_128_predictor_32x16_sse2,
+                       aom_highbd_v_predictor_32x16_sse2,
+                       aom_highbd_h_predictor_32x16_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if HAVE_SSSE3
+HIGHBD_INTRA_PRED_TEST(SSSE3_1, TestHighbdIntraPred32, "Hbd Intra32x32", NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d135_predictor_32x32_ssse3,
+                       aom_highbd_d117_predictor_32x32_ssse3,
+                       aom_highbd_d153_predictor_32x32_ssse3, NULL, NULL, NULL,
+                       NULL, NULL, NULL)
+#endif
+
+#if HAVE_AVX2
+HIGHBD_INTRA_PRED_TEST(AVX2_1, TestHighbdIntraPred32, "Hbd Intra32x32", NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d45e_predictor_32x32_avx2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+
+HIGHBD_INTRA_PRED_TEST(AVX2_2, TestHighbdIntraPred32, "Hbd Intra32x16", NULL,
+                       NULL, NULL, NULL, NULL, NULL,
+                       aom_highbd_d45e_predictor_32x16_avx2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL, NULL, NULL)
+#endif
+
+#if CONFIG_SMOOTH_HV
+#define smooth_v_pred_func aom_highbd_smooth_v_predictor_32x16_c
+#define smooth_h_pred_func aom_highbd_smooth_h_predictor_32x16_c
+#else
+#define smooth_v_pred_func NULL
+#define smooth_h_pred_func NULL
+#endif  // CONFIG_SMOOTH_HV
+
+HIGHBD_INTRA_PRED_TEST(
+    C_2, TestHighbdIntraPred32, "Hbd Intra32x16",
+    aom_highbd_dc_predictor_32x16_c, aom_highbd_dc_left_predictor_32x16_c,
+    aom_highbd_dc_top_predictor_32x16_c, aom_highbd_dc_128_predictor_32x16_c,
+    aom_highbd_v_predictor_32x16_c, aom_highbd_h_predictor_32x16_c,
+    aom_highbd_d45e_predictor_32x16_c, aom_highbd_d135_predictor_32x16_c,
+    aom_highbd_d117_predictor_32x16_c, aom_highbd_d153_predictor_32x16_c,
+    aom_highbd_d207e_predictor_32x16_c, aom_highbd_d63e_predictor_32x16_c,
+    aom_highbd_paeth_predictor_32x16_c, aom_highbd_smooth_predictor_32x16_c,
+    smooth_v_pred_func, smooth_h_pred_func)
+#undef smooth_v_pred_func
+#undef smooth_h_pred_func
+#endif  // CONFIG_HIGHBITDEPTH
+
 #include "test/test_libaom.cc"
diff --git a/third_party/aom/test/warp_filter_test_util.cc b/third_party/aom/test/warp_filter_test_util.cc
index ab3c153e7..47ce6c371 100644
--- a/third_party/aom/test/warp_filter_test_util.cc
+++ b/third_party/aom/test/warp_filter_test_util.cc
@@ -21,9 +21,9 @@ namespace AV1WarpFilter {
 ::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
     warp_affine_func filter) {
   const WarpTestParam params[] = {
-    make_tuple(4, 4, 50000, filter),  make_tuple(8, 8, 50000, filter),
-    make_tuple(64, 64, 1000, filter), make_tuple(4, 16, 20000, filter),
-    make_tuple(32, 8, 10000, filter),
+    make_tuple(4, 4, 100, filter),   make_tuple(8, 8, 100, filter),
+    make_tuple(64, 64, 100, filter), make_tuple(4, 16, 100, filter),
+    make_tuple(32, 8, 100, filter),
   };
   return ::testing::ValuesIn(params);
 }
@@ -113,34 +113,76 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
-
-  // Generate an input block and extend its borders horizontally
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand8();
-  for (i = 0; i < h; ++i) {
-    memset(input + i * stride - border, input[i * stride], border);
-    memset(input + i * stride + w, input[i * stride + (w - 1)], border);
-  }
+#if CONFIG_CONVOLVE_ROUND
+  int32_t *dsta = new int32_t[output_n];
+  int32_t *dstb = new int32_t[output_n];
+#endif
 
   for (i = 0; i < num_iters; ++i) {
+    // Generate an input block and extend its borders horizontally
+    for (int r = 0; r < h; ++r)
+      for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
+    for (int r = 0; r < h; ++r) {
+      memset(input + r * stride - border, input[r * stride], border);
+      memset(input + r * stride + w, input[r * stride + (w - 1)], border);
+    }
+#if CONFIG_CONVOLVE_ROUND
+    const int use_no_round = rnd_.Rand8() & 1;
+#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
         generate_model(mat, &alpha, &beta, &gamma, &delta);
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          // Prepare two copies of the destination
+          for (j = 0; j < out_w * out_h; ++j) {
+            int32_t v = rnd_.Rand16();
+            dsta[j] = v;
+            dstb[j] = v;
+          }
+          conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
+        } else {
+          conv_params = get_conv_params(0, 0, 0);
+        }
+#endif
         av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
                           out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
                           gamma, delta);
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
+        }
+#endif
         test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
                   out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
 
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          for (j = 0; j < out_w * out_h; ++j)
+            ASSERT_EQ(dsta[j], dstb[j])
+                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+                << ", " << (j / out_w) << ") on iteration " << i;
+        } else {
+          for (j = 0; j < out_w * out_h; ++j)
+            ASSERT_EQ(output[j], output2[j])
+                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+                << ", " << (j / out_w) << ") on iteration " << i;
+        }
+#else
         for (j = 0; j < out_w * out_h; ++j)
           ASSERT_EQ(output[j], output2[j])
               << "Pixel mismatch at index " << j << " = (" << (j % out_w)
               << ", " << (j / out_w) << ") on iteration " << i;
+#endif
       }
   }
   delete[] input_;
   delete[] output;
   delete[] output2;
+#if CONFIG_CONVOLVE_ROUND
+  delete[] dsta;
+  delete[] dstb;
+#endif
 }
 }  // namespace AV1WarpFilter
 
@@ -149,14 +191,14 @@ namespace AV1HighbdWarpFilter {
 
 ::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams() {
   const HighbdWarpTestParam defaultParams[] = {
-    make_tuple(4, 4, 50000, 8),   make_tuple(8, 8, 50000, 8),
-    make_tuple(64, 64, 1000, 8),  make_tuple(4, 16, 20000, 8),
-    make_tuple(32, 8, 10000, 8),  make_tuple(4, 4, 50000, 10),
-    make_tuple(8, 8, 50000, 10),  make_tuple(64, 64, 1000, 10),
-    make_tuple(4, 16, 20000, 10), make_tuple(32, 8, 10000, 10),
-    make_tuple(4, 4, 50000, 12),  make_tuple(8, 8, 50000, 12),
-    make_tuple(64, 64, 1000, 12), make_tuple(4, 16, 20000, 12),
-    make_tuple(32, 8, 10000, 12),
+    make_tuple(4, 4, 100, 8),    make_tuple(8, 8, 100, 8),
+    make_tuple(64, 64, 100, 8),  make_tuple(4, 16, 100, 8),
+    make_tuple(32, 8, 100, 8),   make_tuple(4, 4, 100, 10),
+    make_tuple(8, 8, 100, 10),   make_tuple(64, 64, 100, 10),
+    make_tuple(4, 16, 100, 10),  make_tuple(32, 8, 100, 10),
+    make_tuple(4, 4, 100, 12),   make_tuple(8, 8, 100, 12),
+    make_tuple(64, 64, 100, 12), make_tuple(4, 16, 100, 12),
+    make_tuple(32, 8, 100, 12),
   };
   return ::testing::ValuesIn(defaultParams);
 }
@@ -250,39 +292,82 @@ void AV1HighbdWarpFilterTest::RunCheckOutput(
   int32_t mat[8];
   int16_t alpha, beta, gamma, delta;
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
-
-  // Generate an input block and extend its borders horizontally
-  for (i = 0; i < h; ++i)
-    for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand16() & mask;
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < border; ++j) {
-      input[i * stride - border + j] = input[i * stride];
-      input[i * stride + w + j] = input[i * stride + (w - 1)];
-    }
-  }
+#if CONFIG_CONVOLVE_ROUND
+  int32_t *dsta = new int32_t[output_n];
+  int32_t *dstb = new int32_t[output_n];
+#endif
 
   for (i = 0; i < num_iters; ++i) {
+    // Generate an input block and extend its borders horizontally
+    for (int r = 0; r < h; ++r)
+      for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
+    for (int r = 0; r < h; ++r) {
+      for (int c = 0; c < border; ++c) {
+        input[r * stride - border + c] = input[r * stride];
+        input[r * stride + w + c] = input[r * stride + (w - 1)];
+      }
+    }
+#if CONFIG_CONVOLVE_ROUND
+    const int use_no_round = rnd_.Rand8() & 1;
+#endif
     for (sub_x = 0; sub_x < 2; ++sub_x)
       for (sub_y = 0; sub_y < 2; ++sub_y) {
         generate_model(mat, &alpha, &beta, &gamma, &delta);
-
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          // Prepare two copies of the destination
+          for (j = 0; j < out_w * out_h; ++j) {
+            int32_t v = rnd_.Rand16();
+            dsta[j] = v;
+            dstb[j] = v;
+          }
+          conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
+        } else {
+          conv_params = get_conv_params(0, 0, 0);
+        }
+#endif
         av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
                                  out_w, out_h, out_w, sub_x, sub_y, bd,
                                  &conv_params, alpha, beta, gamma, delta);
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          // TODO(angiebird): Change this to test_impl once we have SIMD
+          // implementation
+          conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
+        }
+#endif
         test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
                   out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma,
                   delta);
 
+#if CONFIG_CONVOLVE_ROUND
+        if (use_no_round) {
+          for (j = 0; j < out_w * out_h; ++j)
+            ASSERT_EQ(dsta[j], dstb[j])
+                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+                << ", " << (j / out_w) << ") on iteration " << i;
+        } else {
+          for (j = 0; j < out_w * out_h; ++j)
+            ASSERT_EQ(output[j], output2[j])
+                << "Pixel mismatch at index " << j << " = (" << (j % out_w)
+                << ", " << (j / out_w) << ") on iteration " << i;
+        }
+#else
         for (j = 0; j < out_w * out_h; ++j)
           ASSERT_EQ(output[j], output2[j])
               << "Pixel mismatch at index " << j << " = (" << (j % out_w)
               << ", " << (j / out_w) << ") on iteration " << i;
+#endif
       }
   }
 
   delete[] input_;
   delete[] output;
   delete[] output2;
+#if CONFIG_CONVOLVE_ROUND
+  delete[] dsta;
+  delete[] dstb;
+#endif
 }
 }  // namespace AV1HighbdWarpFilter
 #endif  // CONFIG_HIGHBITDEPTH
diff --git a/third_party/aom/test/webm_video_source.h b/third_party/aom/test/webm_video_source.h
index 286f69cbf..b6c998042 100644
--- a/third_party/aom/test/webm_video_source.h
+++ b/third_party/aom/test/webm_video_source.h
@@ -41,8 +41,8 @@ class WebMVideoSource : public CompressedVideoSource {
 
   virtual void Begin() {
     aom_ctx_->file = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(aom_ctx_->file != NULL) << "Input file open failed. Filename: "
-                                        << file_name_;
+    ASSERT_TRUE(aom_ctx_->file != NULL)
+        << "Input file open failed. Filename: " << file_name_;
 
     ASSERT_EQ(file_is_webm(webm_ctx_, aom_ctx_), 1) << "file is not WebM";
 
diff --git a/third_party/aom/test/y4m_video_source.h b/third_party/aom/test/y4m_video_source.h
index 2279d7970..f70c30be6 100644
--- a/third_party/aom/test/y4m_video_source.h
+++ b/third_party/aom/test/y4m_video_source.h
@@ -35,8 +35,8 @@ class Y4mVideoSource : public VideoSource {
   virtual void OpenSource() {
     CloseSource();
     input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-                                     << file_name_;
+    ASSERT_TRUE(input_file_ != NULL)
+        << "Input file open failed. Filename: " << file_name_;
   }
 
   virtual void ReadSourceToStart() {
diff --git a/third_party/aom/test/yuv_video_source.h b/third_party/aom/test/yuv_video_source.h
index 9ff76a8d8..88cabd5bb 100644
--- a/third_party/aom/test/yuv_video_source.h
+++ b/third_party/aom/test/yuv_video_source.h
@@ -44,8 +44,8 @@ class YUVVideoSource : public VideoSource {
   virtual void Begin() {
     if (input_file_) fclose(input_file_);
     input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-                                     << file_name_;
+    ASSERT_TRUE(input_file_ != NULL)
+        << "Input file open failed. Filename: " << file_name_;
     if (start_)
       fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
 
diff --git a/third_party/aom/third_party/fastfeat/README.libvpx b/third_party/aom/third_party/fastfeat/README.libvpx
index 2edd6e7bf..1e58a303b 100644
--- a/third_party/aom/third_party/fastfeat/README.libvpx
+++ b/third_party/aom/third_party/fastfeat/README.libvpx
@@ -36,3 +36,4 @@ functions.  To see how to use the individual functions, see fast.c
 Local Modifications:
 Add lines to turn off clang formatting for these files
 Remove Fast 10, 11 and 12
+Convert tabs to spaces
diff --git a/third_party/aom/third_party/fastfeat/fast_9.c b/third_party/aom/third_party/fastfeat/fast_9.c
index 36aee19fe..ec167a953 100644
--- a/third_party/aom/third_party/fastfeat/fast_9.c
+++ b/third_party/aom/third_party/fastfeat/fast_9.c
@@ -7,5904 +7,5904 @@ typedef unsigned char byte;
 
 int fast9_corner_score(const byte* p, const int pixel[], int bstart)
 {
-    int bmin = bstart;
-    int bmax = 255;
-    int b = (bmax + bmin)/2;
+  int bmin = bstart;
+  int bmax = 255;
+  int b = (bmax + bmin)/2;
 
-    /*Compute the score using binary search*/
-	for(;;)
-    {
-		int cb = *p + b;
-		int c_b= *p - b;
+  /*Compute the score using binary search*/
+  for(;;)
+  {
+    int cb = *p + b;
+    int c_b= *p - b;
 
 
-        if( p[pixel[0]] > cb)
-         if( p[pixel[1]] > cb)
-          if( p[pixel[2]] > cb)
-           if( p[pixel[3]] > cb)
+    if( p[pixel[0]] > cb)
+      if( p[pixel[1]] > cb)
+        if( p[pixel[2]] > cb)
+          if( p[pixel[3]] > cb)
             if( p[pixel[4]] > cb)
-             if( p[pixel[5]] > cb)
-              if( p[pixel[6]] > cb)
-               if( p[pixel[7]] > cb)
-                if( p[pixel[8]] > cb)
-                 goto is_a_corner;
-                else
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else if( p[pixel[7]] < c_b)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else if( p[pixel[14]] < c_b)
-                 if( p[pixel[8]] < c_b)
-                  if( p[pixel[9]] < c_b)
-                   if( p[pixel[10]] < c_b)
-                    if( p[pixel[11]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      if( p[pixel[13]] < c_b)
-                       if( p[pixel[15]] < c_b)
+              if( p[pixel[5]] > cb)
+                if( p[pixel[6]] > cb)
+                  if( p[pixel[7]] > cb)
+                    if( p[pixel[8]] > cb)
+                      goto is_a_corner;
+                    else
+                      if( p[pixel[15]] > cb)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-              else if( p[pixel[6]] < c_b)
-               if( p[pixel[15]] > cb)
-                if( p[pixel[13]] > cb)
-                 if( p[pixel[14]] > cb)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else if( p[pixel[13]] < c_b)
-                 if( p[pixel[7]] < c_b)
-                  if( p[pixel[8]] < c_b)
-                   if( p[pixel[9]] < c_b)
-                    if( p[pixel[10]] < c_b)
-                     if( p[pixel[11]] < c_b)
-                      if( p[pixel[12]] < c_b)
-                       if( p[pixel[14]] < c_b)
+                        goto is_not_a_corner;
+                  else if( p[pixel[7]] < c_b)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
                         goto is_a_corner;
-                       else
+                      else
                         goto is_not_a_corner;
+                    else if( p[pixel[14]] < c_b)
+                      if( p[pixel[8]] < c_b)
+                        if( p[pixel[9]] < c_b)
+                          if( p[pixel[10]] < c_b)
+                            if( p[pixel[11]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                if( p[pixel[13]] < c_b)
+                                  if( p[pixel[15]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                if( p[pixel[7]] < c_b)
-                 if( p[pixel[8]] < c_b)
-                  if( p[pixel[9]] < c_b)
-                   if( p[pixel[10]] < c_b)
-                    if( p[pixel[11]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      if( p[pixel[13]] < c_b)
-                       if( p[pixel[14]] < c_b)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-              else
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else if( p[pixel[13]] < c_b)
-                if( p[pixel[7]] < c_b)
-                 if( p[pixel[8]] < c_b)
-                  if( p[pixel[9]] < c_b)
-                   if( p[pixel[10]] < c_b)
-                    if( p[pixel[11]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      if( p[pixel[14]] < c_b)
-                       if( p[pixel[15]] < c_b)
+                      goto is_not_a_corner;
+                else if( p[pixel[6]] < c_b)
+                  if( p[pixel[15]] > cb)
+                    if( p[pixel[13]] > cb)
+                      if( p[pixel[14]] > cb)
                         goto is_a_corner;
-                       else
+                      else
                         goto is_not_a_corner;
+                    else if( p[pixel[13]] < c_b)
+                      if( p[pixel[7]] < c_b)
+                        if( p[pixel[8]] < c_b)
+                          if( p[pixel[9]] < c_b)
+                            if( p[pixel[10]] < c_b)
+                              if( p[pixel[11]] < c_b)
+                                if( p[pixel[12]] < c_b)
+                                  if( p[pixel[14]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    if( p[pixel[7]] < c_b)
+                      if( p[pixel[8]] < c_b)
+                        if( p[pixel[9]] < c_b)
+                          if( p[pixel[10]] < c_b)
+                            if( p[pixel[11]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                if( p[pixel[13]] < c_b)
+                                  if( p[pixel[14]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else if( p[pixel[5]] < c_b)
-              if( p[pixel[14]] > cb)
-               if( p[pixel[12]] > cb)
-                if( p[pixel[13]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       if( p[pixel[11]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
+                        goto is_not_a_corner;
+                    else
                       goto is_not_a_corner;
+                  else if( p[pixel[13]] < c_b)
+                    if( p[pixel[7]] < c_b)
+                      if( p[pixel[8]] < c_b)
+                        if( p[pixel[9]] < c_b)
+                          if( p[pixel[10]] < c_b)
+                            if( p[pixel[11]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                if( p[pixel[14]] < c_b)
+                                  if( p[pixel[15]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else if( p[pixel[12]] < c_b)
-                if( p[pixel[6]] < c_b)
-                 if( p[pixel[7]] < c_b)
-                  if( p[pixel[8]] < c_b)
-                   if( p[pixel[9]] < c_b)
-                    if( p[pixel[10]] < c_b)
-                     if( p[pixel[11]] < c_b)
-                      if( p[pixel[13]] < c_b)
-                       goto is_a_corner;
+                    goto is_not_a_corner;
+              else if( p[pixel[5]] < c_b)
+                if( p[pixel[14]] > cb)
+                  if( p[pixel[12]] > cb)
+                    if( p[pixel[13]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  if( p[pixel[11]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
                       goto is_not_a_corner;
+                  else if( p[pixel[12]] < c_b)
+                    if( p[pixel[6]] < c_b)
+                      if( p[pixel[7]] < c_b)
+                        if( p[pixel[8]] < c_b)
+                          if( p[pixel[9]] < c_b)
+                            if( p[pixel[10]] < c_b)
+                              if( p[pixel[11]] < c_b)
+                                if( p[pixel[13]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[14]] < c_b)
-               if( p[pixel[7]] < c_b)
-                if( p[pixel[8]] < c_b)
-                 if( p[pixel[9]] < c_b)
-                  if( p[pixel[10]] < c_b)
-                   if( p[pixel[11]] < c_b)
-                    if( p[pixel[12]] < c_b)
-                     if( p[pixel[13]] < c_b)
-                      if( p[pixel[6]] < c_b)
-                       goto is_a_corner;
+                    goto is_not_a_corner;
+                else if( p[pixel[14]] < c_b)
+                  if( p[pixel[7]] < c_b)
+                    if( p[pixel[8]] < c_b)
+                      if( p[pixel[9]] < c_b)
+                        if( p[pixel[10]] < c_b)
+                          if( p[pixel[11]] < c_b)
+                            if( p[pixel[12]] < c_b)
+                              if( p[pixel[13]] < c_b)
+                                if( p[pixel[6]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  if( p[pixel[15]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       if( p[pixel[15]] < c_b)
-                        goto is_a_corner;
-                       else
                         goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               if( p[pixel[6]] < c_b)
-                if( p[pixel[7]] < c_b)
-                 if( p[pixel[8]] < c_b)
-                  if( p[pixel[9]] < c_b)
-                   if( p[pixel[10]] < c_b)
-                    if( p[pixel[11]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      if( p[pixel[13]] < c_b)
-                       goto is_a_corner;
+                  if( p[pixel[6]] < c_b)
+                    if( p[pixel[7]] < c_b)
+                      if( p[pixel[8]] < c_b)
+                        if( p[pixel[9]] < c_b)
+                          if( p[pixel[10]] < c_b)
+                            if( p[pixel[11]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                if( p[pixel[13]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       if( p[pixel[11]] > cb)
+                    goto is_not_a_corner;
+              else
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  if( p[pixel[11]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[12]] < c_b)
-               if( p[pixel[7]] < c_b)
-                if( p[pixel[8]] < c_b)
-                 if( p[pixel[9]] < c_b)
-                  if( p[pixel[10]] < c_b)
-                   if( p[pixel[11]] < c_b)
-                    if( p[pixel[13]] < c_b)
-                     if( p[pixel[14]] < c_b)
-                      if( p[pixel[6]] < c_b)
-                       goto is_a_corner;
+                    goto is_not_a_corner;
+                else if( p[pixel[12]] < c_b)
+                  if( p[pixel[7]] < c_b)
+                    if( p[pixel[8]] < c_b)
+                      if( p[pixel[9]] < c_b)
+                        if( p[pixel[10]] < c_b)
+                          if( p[pixel[11]] < c_b)
+                            if( p[pixel[13]] < c_b)
+                              if( p[pixel[14]] < c_b)
+                                if( p[pixel[6]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  if( p[pixel[15]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       if( p[pixel[15]] < c_b)
-                        goto is_a_corner;
-                       else
                         goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else if( p[pixel[4]] < c_b)
-             if( p[pixel[13]] > cb)
-              if( p[pixel[11]] > cb)
-               if( p[pixel[12]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       goto is_a_corner;
+              if( p[pixel[13]] > cb)
+                if( p[pixel[11]] > cb)
+                  if( p[pixel[12]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       goto is_a_corner;
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[11]] < c_b)
-               if( p[pixel[5]] < c_b)
-                if( p[pixel[6]] < c_b)
-                 if( p[pixel[7]] < c_b)
-                  if( p[pixel[8]] < c_b)
-                   if( p[pixel[9]] < c_b)
-                    if( p[pixel[10]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-             else if( p[pixel[13]] < c_b)
-              if( p[pixel[7]] < c_b)
-               if( p[pixel[8]] < c_b)
-                if( p[pixel[9]] < c_b)
-                 if( p[pixel[10]] < c_b)
-                  if( p[pixel[11]] < c_b)
-                   if( p[pixel[12]] < c_b)
+                else if( p[pixel[11]] < c_b)
+                  if( p[pixel[5]] < c_b)
                     if( p[pixel[6]] < c_b)
-                     if( p[pixel[5]] < c_b)
-                      goto is_a_corner;
-                     else
-                      if( p[pixel[14]] < c_b)
-                       goto is_a_corner;
+                      if( p[pixel[7]] < c_b)
+                        if( p[pixel[8]] < c_b)
+                          if( p[pixel[9]] < c_b)
+                            if( p[pixel[10]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[14]] < c_b)
-                      if( p[pixel[15]] < c_b)
-                       goto is_a_corner;
-                      else
-                       goto is_not_a_corner;
-                     else
                       goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-             else
-              if( p[pixel[5]] < c_b)
-               if( p[pixel[6]] < c_b)
+                  goto is_not_a_corner;
+              else if( p[pixel[13]] < c_b)
                 if( p[pixel[7]] < c_b)
-                 if( p[pixel[8]] < c_b)
-                  if( p[pixel[9]] < c_b)
-                   if( p[pixel[10]] < c_b)
-                    if( p[pixel[11]] < c_b)
-                     if( p[pixel[12]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                  if( p[pixel[8]] < c_b)
+                    if( p[pixel[9]] < c_b)
+                      if( p[pixel[10]] < c_b)
+                        if( p[pixel[11]] < c_b)
+                          if( p[pixel[12]] < c_b)
+                            if( p[pixel[6]] < c_b)
+                              if( p[pixel[5]] < c_b)
+                                goto is_a_corner;
+                              else
+                                if( p[pixel[14]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                            else
+                              if( p[pixel[14]] < c_b)
+                                if( p[pixel[15]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-            else
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       goto is_a_corner;
+                if( p[pixel[5]] < c_b)
+                  if( p[pixel[6]] < c_b)
+                    if( p[pixel[7]] < c_b)
+                      if( p[pixel[8]] < c_b)
+                        if( p[pixel[9]] < c_b)
+                          if( p[pixel[10]] < c_b)
+                            if( p[pixel[11]] < c_b)
+                              if( p[pixel[12]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      if( p[pixel[10]] > cb)
-                       goto is_a_corner;
+                  goto is_not_a_corner;
+            else
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                if( p[pixel[10]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
+                    goto is_not_a_corner;
+                else
                   goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-             else if( p[pixel[11]] < c_b)
-              if( p[pixel[7]] < c_b)
-               if( p[pixel[8]] < c_b)
-                if( p[pixel[9]] < c_b)
-                 if( p[pixel[10]] < c_b)
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    if( p[pixel[6]] < c_b)
-                     if( p[pixel[5]] < c_b)
-                      goto is_a_corner;
-                     else
-                      if( p[pixel[14]] < c_b)
-                       goto is_a_corner;
+              else if( p[pixel[11]] < c_b)
+                if( p[pixel[7]] < c_b)
+                  if( p[pixel[8]] < c_b)
+                    if( p[pixel[9]] < c_b)
+                      if( p[pixel[10]] < c_b)
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            if( p[pixel[6]] < c_b)
+                              if( p[pixel[5]] < c_b)
+                                goto is_a_corner;
+                              else
+                                if( p[pixel[14]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                            else
+                              if( p[pixel[14]] < c_b)
+                                if( p[pixel[15]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[14]] < c_b)
-                      if( p[pixel[15]] < c_b)
-                       goto is_a_corner;
-                      else
-                       goto is_not_a_corner;
-                     else
                       goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
-           else if( p[pixel[3]] < c_b)
+                goto is_not_a_corner;
+          else if( p[pixel[3]] < c_b)
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else if( p[pixel[10]] < c_b)
-             if( p[pixel[7]] < c_b)
-              if( p[pixel[8]] < c_b)
-               if( p[pixel[9]] < c_b)
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[6]] < c_b)
-                  if( p[pixel[5]] < c_b)
-                   if( p[pixel[4]] < c_b)
-                    goto is_a_corner;
-                   else
-                    if( p[pixel[12]] < c_b)
-                     if( p[pixel[13]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+              if( p[pixel[7]] < c_b)
+                if( p[pixel[8]] < c_b)
+                  if( p[pixel[9]] < c_b)
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[6]] < c_b)
+                        if( p[pixel[5]] < c_b)
+                          if( p[pixel[4]] < c_b)
+                            goto is_a_corner;
+                          else
+                            if( p[pixel[12]] < c_b)
+                              if( p[pixel[13]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[12]] < c_b)
+                            if( p[pixel[13]] < c_b)
+                              if( p[pixel[14]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            if( p[pixel[14]] < c_b)
+                              if( p[pixel[15]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                  else
-                   if( p[pixel[12]] < c_b)
-                    if( p[pixel[13]] < c_b)
-                     if( p[pixel[14]] < c_b)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    if( p[pixel[14]] < c_b)
-                     if( p[pixel[15]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              if( p[pixel[9]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     if( p[pixel[9]] > cb)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else if( p[pixel[10]] < c_b)
-             if( p[pixel[7]] < c_b)
-              if( p[pixel[8]] < c_b)
-               if( p[pixel[9]] < c_b)
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[5]] < c_b)
-                    if( p[pixel[4]] < c_b)
-                     goto is_a_corner;
+              if( p[pixel[7]] < c_b)
+                if( p[pixel[8]] < c_b)
+                  if( p[pixel[9]] < c_b)
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[5]] < c_b)
+                            if( p[pixel[4]] < c_b)
+                              goto is_a_corner;
+                            else
+                              if( p[pixel[13]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            if( p[pixel[13]] < c_b)
+                              if( p[pixel[14]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[13]] < c_b)
+                            if( p[pixel[14]] < c_b)
+                              if( p[pixel[15]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[13]] < c_b)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                   else
-                    if( p[pixel[13]] < c_b)
-                     if( p[pixel[14]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
                   else
-                   if( p[pixel[13]] < c_b)
-                    if( p[pixel[14]] < c_b)
-                     if( p[pixel[15]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-          else if( p[pixel[2]] < c_b)
-           if( p[pixel[9]] > cb)
+              goto is_not_a_corner;
+        else if( p[pixel[2]] < c_b)
+          if( p[pixel[9]] > cb)
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else if( p[pixel[9]] < c_b)
+              goto is_not_a_corner;
+          else if( p[pixel[9]] < c_b)
             if( p[pixel[7]] < c_b)
-             if( p[pixel[8]] < c_b)
-              if( p[pixel[10]] < c_b)
-               if( p[pixel[6]] < c_b)
-                if( p[pixel[5]] < c_b)
-                 if( p[pixel[4]] < c_b)
-                  if( p[pixel[3]] < c_b)
-                   goto is_a_corner;
+              if( p[pixel[8]] < c_b)
+                if( p[pixel[10]] < c_b)
+                  if( p[pixel[6]] < c_b)
+                    if( p[pixel[5]] < c_b)
+                      if( p[pixel[4]] < c_b)
+                        if( p[pixel[3]] < c_b)
+                          goto is_a_corner;
+                        else
+                          if( p[pixel[11]] < c_b)
+                            if( p[pixel[12]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[11]] < c_b)
+                          if( p[pixel[12]] < c_b)
+                            if( p[pixel[13]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[11]] < c_b)
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            if( p[pixel[14]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   if( p[pixel[11]] < c_b)
-                    if( p[pixel[12]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[11]] < c_b)
-                   if( p[pixel[12]] < c_b)
-                    if( p[pixel[13]] < c_b)
-                     goto is_a_corner;
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[13]] < c_b)
+                          if( p[pixel[14]] < c_b)
+                            if( p[pixel[15]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[11]] < c_b)
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    if( p[pixel[14]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-               else
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[13]] < c_b)
-                   if( p[pixel[14]] < c_b)
-                    if( p[pixel[15]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+              goto is_not_a_corner;
           else
-           if( p[pixel[9]] > cb)
+            goto is_not_a_corner;
+        else
+          if( p[pixel[9]] > cb)
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            if( p[pixel[8]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    if( p[pixel[8]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else if( p[pixel[9]] < c_b)
+              goto is_not_a_corner;
+          else if( p[pixel[9]] < c_b)
             if( p[pixel[7]] < c_b)
-             if( p[pixel[8]] < c_b)
-              if( p[pixel[10]] < c_b)
-               if( p[pixel[11]] < c_b)
-                if( p[pixel[6]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[4]] < c_b)
-                   if( p[pixel[3]] < c_b)
-                    goto is_a_corner;
-                   else
-                    if( p[pixel[12]] < c_b)
-                     goto is_a_corner;
+              if( p[pixel[8]] < c_b)
+                if( p[pixel[10]] < c_b)
+                  if( p[pixel[11]] < c_b)
+                    if( p[pixel[6]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[4]] < c_b)
+                          if( p[pixel[3]] < c_b)
+                            goto is_a_corner;
+                          else
+                            if( p[pixel[12]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[12]] < c_b)
+                            if( p[pixel[13]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            if( p[pixel[14]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[13]] < c_b)
+                          if( p[pixel[14]] < c_b)
+                            if( p[pixel[15]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   if( p[pixel[12]] < c_b)
-                    if( p[pixel[13]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    if( p[pixel[14]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
                 else
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[13]] < c_b)
-                   if( p[pixel[14]] < c_b)
-                    if( p[pixel[15]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-         else if( p[pixel[1]] < c_b)
-          if( p[pixel[8]] > cb)
-           if( p[pixel[9]] > cb)
+      else if( p[pixel[1]] < c_b)
+        if( p[pixel[8]] > cb)
+          if( p[pixel[9]] > cb)
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[2]] > cb)
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[2]] > cb)
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-          else if( p[pixel[8]] < c_b)
-           if( p[pixel[7]] < c_b)
+        else if( p[pixel[8]] < c_b)
+          if( p[pixel[7]] < c_b)
             if( p[pixel[9]] < c_b)
-             if( p[pixel[6]] < c_b)
-              if( p[pixel[5]] < c_b)
-               if( p[pixel[4]] < c_b)
-                if( p[pixel[3]] < c_b)
-                 if( p[pixel[2]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[10]] < c_b)
-                   if( p[pixel[11]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[6]] < c_b)
+                if( p[pixel[5]] < c_b)
+                  if( p[pixel[4]] < c_b)
+                    if( p[pixel[3]] < c_b)
+                      if( p[pixel[2]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[10]] < c_b)
+                          if( p[pixel[11]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[10]] < c_b)
+                        if( p[pixel[11]] < c_b)
+                          if( p[pixel[12]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[10]] < c_b)
+                      if( p[pixel[11]] < c_b)
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[10]] < c_b)
-                  if( p[pixel[11]] < c_b)
-                   if( p[pixel[12]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[10]] < c_b)
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[13]] < c_b)
+                          if( p[pixel[14]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[10]] < c_b)
-                 if( p[pixel[11]] < c_b)
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[10]] < c_b)
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[13]] < c_b)
-                   if( p[pixel[14]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[10]] < c_b)
+                  if( p[pixel[11]] < c_b)
+                    if( p[pixel[12]] < c_b)
+                      if( p[pixel[13]] < c_b)
+                        if( p[pixel[14]] < c_b)
+                          if( p[pixel[15]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[10]] < c_b)
-               if( p[pixel[11]] < c_b)
-                if( p[pixel[12]] < c_b)
-                 if( p[pixel[13]] < c_b)
-                  if( p[pixel[14]] < c_b)
-                   if( p[pixel[15]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+              goto is_not_a_corner;
           else
-           goto is_not_a_corner;
-         else
-          if( p[pixel[8]] > cb)
-           if( p[pixel[9]] > cb)
+            goto is_not_a_corner;
+        else
+          goto is_not_a_corner;
+      else
+        if( p[pixel[8]] > cb)
+          if( p[pixel[9]] > cb)
             if( p[pixel[10]] > cb)
-             if( p[pixel[11]] > cb)
-              if( p[pixel[12]] > cb)
-               if( p[pixel[13]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[15]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] > cb)
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[15]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[2]] > cb)
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[7]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[2]] > cb)
-               if( p[pixel[3]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[7]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-          else if( p[pixel[8]] < c_b)
-           if( p[pixel[7]] < c_b)
+        else if( p[pixel[8]] < c_b)
+          if( p[pixel[7]] < c_b)
             if( p[pixel[9]] < c_b)
-             if( p[pixel[10]] < c_b)
-              if( p[pixel[6]] < c_b)
-               if( p[pixel[5]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[3]] < c_b)
-                  if( p[pixel[2]] < c_b)
-                   goto is_a_corner;
-                  else
-                   if( p[pixel[11]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[11]] < c_b)
-                   if( p[pixel[12]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[10]] < c_b)
+                if( p[pixel[6]] < c_b)
+                  if( p[pixel[5]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[3]] < c_b)
+                        if( p[pixel[2]] < c_b)
+                          goto is_a_corner;
+                        else
+                          if( p[pixel[11]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[11]] < c_b)
+                          if( p[pixel[12]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[11]] < c_b)
+                        if( p[pixel[12]] < c_b)
+                          if( p[pixel[13]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[13]] < c_b)
+                          if( p[pixel[14]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[11]] < c_b)
-                  if( p[pixel[12]] < c_b)
-                   if( p[pixel[13]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[11]] < c_b)
+                    if( p[pixel[12]] < c_b)
+                      if( p[pixel[13]] < c_b)
+                        if( p[pixel[14]] < c_b)
+                          if( p[pixel[15]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[13]] < c_b)
-                   if( p[pixel[14]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[11]] < c_b)
-                if( p[pixel[12]] < c_b)
-                 if( p[pixel[13]] < c_b)
-                  if( p[pixel[14]] < c_b)
-                   if( p[pixel[15]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+              goto is_not_a_corner;
           else
-           goto is_not_a_corner;
-        else if( p[pixel[0]] < c_b)
-         if( p[pixel[1]] > cb)
-          if( p[pixel[8]] > cb)
-           if( p[pixel[7]] > cb)
+            goto is_not_a_corner;
+        else
+          goto is_not_a_corner;
+    else if( p[pixel[0]] < c_b)
+      if( p[pixel[1]] > cb)
+        if( p[pixel[8]] > cb)
+          if( p[pixel[7]] > cb)
             if( p[pixel[9]] > cb)
-             if( p[pixel[6]] > cb)
-              if( p[pixel[5]] > cb)
-               if( p[pixel[4]] > cb)
-                if( p[pixel[3]] > cb)
-                 if( p[pixel[2]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[10]] > cb)
-                   if( p[pixel[11]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[6]] > cb)
+                if( p[pixel[5]] > cb)
+                  if( p[pixel[4]] > cb)
+                    if( p[pixel[3]] > cb)
+                      if( p[pixel[2]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[10]] > cb)
+                          if( p[pixel[11]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[10]] > cb)
+                        if( p[pixel[11]] > cb)
+                          if( p[pixel[12]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[10]] > cb)
+                      if( p[pixel[11]] > cb)
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[10]] > cb)
-                  if( p[pixel[11]] > cb)
-                   if( p[pixel[12]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[10]] > cb)
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[13]] > cb)
+                          if( p[pixel[14]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[10]] > cb)
-                 if( p[pixel[11]] > cb)
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[10]] > cb)
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[13]] > cb)
-                   if( p[pixel[14]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[10]] > cb)
+                  if( p[pixel[11]] > cb)
+                    if( p[pixel[12]] > cb)
+                      if( p[pixel[13]] > cb)
+                        if( p[pixel[14]] > cb)
+                          if( p[pixel[15]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[10]] > cb)
-               if( p[pixel[11]] > cb)
-                if( p[pixel[12]] > cb)
-                 if( p[pixel[13]] > cb)
-                  if( p[pixel[14]] > cb)
-                   if( p[pixel[15]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-          else if( p[pixel[8]] < c_b)
-           if( p[pixel[9]] < c_b)
+        else if( p[pixel[8]] < c_b)
+          if( p[pixel[9]] < c_b)
             if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[2]] < c_b)
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[2]] < c_b)
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+              goto is_not_a_corner;
           else
-           goto is_not_a_corner;
-         else if( p[pixel[1]] < c_b)
-          if( p[pixel[2]] > cb)
-           if( p[pixel[9]] > cb)
+            goto is_not_a_corner;
+        else
+          goto is_not_a_corner;
+      else if( p[pixel[1]] < c_b)
+        if( p[pixel[2]] > cb)
+          if( p[pixel[9]] > cb)
             if( p[pixel[7]] > cb)
-             if( p[pixel[8]] > cb)
-              if( p[pixel[10]] > cb)
-               if( p[pixel[6]] > cb)
-                if( p[pixel[5]] > cb)
-                 if( p[pixel[4]] > cb)
-                  if( p[pixel[3]] > cb)
-                   goto is_a_corner;
+              if( p[pixel[8]] > cb)
+                if( p[pixel[10]] > cb)
+                  if( p[pixel[6]] > cb)
+                    if( p[pixel[5]] > cb)
+                      if( p[pixel[4]] > cb)
+                        if( p[pixel[3]] > cb)
+                          goto is_a_corner;
+                        else
+                          if( p[pixel[11]] > cb)
+                            if( p[pixel[12]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[11]] > cb)
+                          if( p[pixel[12]] > cb)
+                            if( p[pixel[13]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[11]] > cb)
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            if( p[pixel[14]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   if( p[pixel[11]] > cb)
-                    if( p[pixel[12]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[11]] > cb)
-                   if( p[pixel[12]] > cb)
-                    if( p[pixel[13]] > cb)
-                     goto is_a_corner;
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[13]] > cb)
+                          if( p[pixel[14]] > cb)
+                            if( p[pixel[15]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[11]] > cb)
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    if( p[pixel[14]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[13]] > cb)
-                   if( p[pixel[14]] > cb)
-                    if( p[pixel[15]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else if( p[pixel[9]] < c_b)
+              goto is_not_a_corner;
+          else if( p[pixel[9]] < c_b)
             if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-          else if( p[pixel[2]] < c_b)
-           if( p[pixel[3]] > cb)
+        else if( p[pixel[2]] < c_b)
+          if( p[pixel[3]] > cb)
             if( p[pixel[10]] > cb)
-             if( p[pixel[7]] > cb)
-              if( p[pixel[8]] > cb)
-               if( p[pixel[9]] > cb)
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[6]] > cb)
-                  if( p[pixel[5]] > cb)
-                   if( p[pixel[4]] > cb)
-                    goto is_a_corner;
-                   else
-                    if( p[pixel[12]] > cb)
-                     if( p[pixel[13]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                  else
-                   if( p[pixel[12]] > cb)
-                    if( p[pixel[13]] > cb)
-                     if( p[pixel[14]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+              if( p[pixel[7]] > cb)
+                if( p[pixel[8]] > cb)
+                  if( p[pixel[9]] > cb)
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[6]] > cb)
+                        if( p[pixel[5]] > cb)
+                          if( p[pixel[4]] > cb)
+                            goto is_a_corner;
+                          else
+                            if( p[pixel[12]] > cb)
+                              if( p[pixel[13]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[12]] > cb)
+                            if( p[pixel[13]] > cb)
+                              if( p[pixel[14]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            if( p[pixel[14]] > cb)
+                              if( p[pixel[15]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    if( p[pixel[14]] > cb)
-                     if( p[pixel[15]] > cb)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else if( p[pixel[3]] < c_b)
+              goto is_not_a_corner;
+          else if( p[pixel[3]] < c_b)
             if( p[pixel[4]] > cb)
-             if( p[pixel[13]] > cb)
-              if( p[pixel[7]] > cb)
-               if( p[pixel[8]] > cb)
-                if( p[pixel[9]] > cb)
-                 if( p[pixel[10]] > cb)
-                  if( p[pixel[11]] > cb)
-                   if( p[pixel[12]] > cb)
-                    if( p[pixel[6]] > cb)
-                     if( p[pixel[5]] > cb)
-                      goto is_a_corner;
-                     else
-                      if( p[pixel[14]] > cb)
-                       goto is_a_corner;
+              if( p[pixel[13]] > cb)
+                if( p[pixel[7]] > cb)
+                  if( p[pixel[8]] > cb)
+                    if( p[pixel[9]] > cb)
+                      if( p[pixel[10]] > cb)
+                        if( p[pixel[11]] > cb)
+                          if( p[pixel[12]] > cb)
+                            if( p[pixel[6]] > cb)
+                              if( p[pixel[5]] > cb)
+                                goto is_a_corner;
+                              else
+                                if( p[pixel[14]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                            else
+                              if( p[pixel[14]] > cb)
+                                if( p[pixel[15]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[14]] > cb)
-                      if( p[pixel[15]] > cb)
-                       goto is_a_corner;
-                      else
-                       goto is_not_a_corner;
-                     else
                       goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-             else if( p[pixel[13]] < c_b)
-              if( p[pixel[11]] > cb)
-               if( p[pixel[5]] > cb)
-                if( p[pixel[6]] > cb)
-                 if( p[pixel[7]] > cb)
-                  if( p[pixel[8]] > cb)
-                   if( p[pixel[9]] > cb)
-                    if( p[pixel[10]] > cb)
-                     if( p[pixel[12]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[11]] < c_b)
-               if( p[pixel[12]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       goto is_a_corner;
+                  goto is_not_a_corner;
+              else if( p[pixel[13]] < c_b)
+                if( p[pixel[11]] > cb)
+                  if( p[pixel[5]] > cb)
+                    if( p[pixel[6]] > cb)
+                      if( p[pixel[7]] > cb)
+                        if( p[pixel[8]] > cb)
+                          if( p[pixel[9]] > cb)
+                            if( p[pixel[10]] > cb)
+                              if( p[pixel[12]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       goto is_a_corner;
-                      else
-                       goto is_not_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
+                  else
                     goto is_not_a_corner;
+                else if( p[pixel[11]] < c_b)
+                  if( p[pixel[12]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
+                    goto is_not_a_corner;
+                else
                   goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              if( p[pixel[5]] > cb)
-               if( p[pixel[6]] > cb)
-                if( p[pixel[7]] > cb)
-                 if( p[pixel[8]] > cb)
-                  if( p[pixel[9]] > cb)
-                   if( p[pixel[10]] > cb)
-                    if( p[pixel[11]] > cb)
-                     if( p[pixel[12]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                if( p[pixel[5]] > cb)
+                  if( p[pixel[6]] > cb)
+                    if( p[pixel[7]] > cb)
+                      if( p[pixel[8]] > cb)
+                        if( p[pixel[9]] > cb)
+                          if( p[pixel[10]] > cb)
+                            if( p[pixel[11]] > cb)
+                              if( p[pixel[12]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else if( p[pixel[4]] < c_b)
-             if( p[pixel[5]] > cb)
-              if( p[pixel[14]] > cb)
-               if( p[pixel[7]] > cb)
-                if( p[pixel[8]] > cb)
-                 if( p[pixel[9]] > cb)
-                  if( p[pixel[10]] > cb)
-                   if( p[pixel[11]] > cb)
-                    if( p[pixel[12]] > cb)
-                     if( p[pixel[13]] > cb)
-                      if( p[pixel[6]] > cb)
-                       goto is_a_corner;
+              if( p[pixel[5]] > cb)
+                if( p[pixel[14]] > cb)
+                  if( p[pixel[7]] > cb)
+                    if( p[pixel[8]] > cb)
+                      if( p[pixel[9]] > cb)
+                        if( p[pixel[10]] > cb)
+                          if( p[pixel[11]] > cb)
+                            if( p[pixel[12]] > cb)
+                              if( p[pixel[13]] > cb)
+                                if( p[pixel[6]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  if( p[pixel[15]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       if( p[pixel[15]] > cb)
-                        goto is_a_corner;
-                       else
                         goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[14]] < c_b)
-               if( p[pixel[12]] > cb)
-                if( p[pixel[6]] > cb)
-                 if( p[pixel[7]] > cb)
-                  if( p[pixel[8]] > cb)
-                   if( p[pixel[9]] > cb)
-                    if( p[pixel[10]] > cb)
-                     if( p[pixel[11]] > cb)
-                      if( p[pixel[13]] > cb)
-                       goto is_a_corner;
+                    goto is_not_a_corner;
+                else if( p[pixel[14]] < c_b)
+                  if( p[pixel[12]] > cb)
+                    if( p[pixel[6]] > cb)
+                      if( p[pixel[7]] > cb)
+                        if( p[pixel[8]] > cb)
+                          if( p[pixel[9]] > cb)
+                            if( p[pixel[10]] > cb)
+                              if( p[pixel[11]] > cb)
+                                if( p[pixel[13]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else if( p[pixel[12]] < c_b)
-                if( p[pixel[13]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       if( p[pixel[11]] < c_b)
+                      goto is_not_a_corner;
+                  else if( p[pixel[12]] < c_b)
+                    if( p[pixel[13]] < c_b)
+                      if( p[pixel[15]] < c_b)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  if( p[pixel[11]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               if( p[pixel[6]] > cb)
-                if( p[pixel[7]] > cb)
-                 if( p[pixel[8]] > cb)
-                  if( p[pixel[9]] > cb)
-                   if( p[pixel[10]] > cb)
-                    if( p[pixel[11]] > cb)
-                     if( p[pixel[12]] > cb)
-                      if( p[pixel[13]] > cb)
-                       goto is_a_corner;
+                  if( p[pixel[6]] > cb)
+                    if( p[pixel[7]] > cb)
+                      if( p[pixel[8]] > cb)
+                        if( p[pixel[9]] > cb)
+                          if( p[pixel[10]] > cb)
+                            if( p[pixel[11]] > cb)
+                              if( p[pixel[12]] > cb)
+                                if( p[pixel[13]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else if( p[pixel[5]] < c_b)
-              if( p[pixel[6]] > cb)
-               if( p[pixel[15]] < c_b)
-                if( p[pixel[13]] > cb)
-                 if( p[pixel[7]] > cb)
-                  if( p[pixel[8]] > cb)
-                   if( p[pixel[9]] > cb)
-                    if( p[pixel[10]] > cb)
-                     if( p[pixel[11]] > cb)
-                      if( p[pixel[12]] > cb)
-                       if( p[pixel[14]] > cb)
-                        goto is_a_corner;
-                       else
+                    goto is_not_a_corner;
+              else if( p[pixel[5]] < c_b)
+                if( p[pixel[6]] > cb)
+                  if( p[pixel[15]] < c_b)
+                    if( p[pixel[13]] > cb)
+                      if( p[pixel[7]] > cb)
+                        if( p[pixel[8]] > cb)
+                          if( p[pixel[9]] > cb)
+                            if( p[pixel[10]] > cb)
+                              if( p[pixel[11]] > cb)
+                                if( p[pixel[12]] > cb)
+                                  if( p[pixel[14]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
                         goto is_not_a_corner;
+                    else if( p[pixel[13]] < c_b)
+                      if( p[pixel[14]] < c_b)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else if( p[pixel[13]] < c_b)
-                 if( p[pixel[14]] < c_b)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                if( p[pixel[7]] > cb)
-                 if( p[pixel[8]] > cb)
-                  if( p[pixel[9]] > cb)
-                   if( p[pixel[10]] > cb)
-                    if( p[pixel[11]] > cb)
-                     if( p[pixel[12]] > cb)
-                      if( p[pixel[13]] > cb)
-                       if( p[pixel[14]] > cb)
-                        goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
+                    if( p[pixel[7]] > cb)
+                      if( p[pixel[8]] > cb)
+                        if( p[pixel[9]] > cb)
+                          if( p[pixel[10]] > cb)
+                            if( p[pixel[11]] > cb)
+                              if( p[pixel[12]] > cb)
+                                if( p[pixel[13]] > cb)
+                                  if( p[pixel[14]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-              else if( p[pixel[6]] < c_b)
-               if( p[pixel[7]] > cb)
-                if( p[pixel[14]] > cb)
-                 if( p[pixel[8]] > cb)
-                  if( p[pixel[9]] > cb)
-                   if( p[pixel[10]] > cb)
-                    if( p[pixel[11]] > cb)
-                     if( p[pixel[12]] > cb)
-                      if( p[pixel[13]] > cb)
-                       if( p[pixel[15]] > cb)
-                        goto is_a_corner;
-                       else
+                      goto is_not_a_corner;
+                else if( p[pixel[6]] < c_b)
+                  if( p[pixel[7]] > cb)
+                    if( p[pixel[14]] > cb)
+                      if( p[pixel[8]] > cb)
+                        if( p[pixel[9]] > cb)
+                          if( p[pixel[10]] > cb)
+                            if( p[pixel[11]] > cb)
+                              if( p[pixel[12]] > cb)
+                                if( p[pixel[13]] > cb)
+                                  if( p[pixel[15]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
                         goto is_not_a_corner;
+                    else if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
+                        goto is_not_a_corner;
+                    else
                       goto is_not_a_corner;
+                  else if( p[pixel[7]] < c_b)
+                    if( p[pixel[8]] < c_b)
+                      goto is_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else if( p[pixel[7]] < c_b)
-                if( p[pixel[8]] < c_b)
-                 goto is_a_corner;
-                else
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-              else
-               if( p[pixel[13]] > cb)
-                if( p[pixel[7]] > cb)
-                 if( p[pixel[8]] > cb)
-                  if( p[pixel[9]] > cb)
-                   if( p[pixel[10]] > cb)
-                    if( p[pixel[11]] > cb)
-                     if( p[pixel[12]] > cb)
-                      if( p[pixel[14]] > cb)
-                       if( p[pixel[15]] > cb)
+                      if( p[pixel[15]] < c_b)
                         goto is_a_corner;
-                       else
+                      else
                         goto is_not_a_corner;
+                  else
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  goto is_not_a_corner;
+                      goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[12]] > cb)
-               if( p[pixel[7]] > cb)
-                if( p[pixel[8]] > cb)
-                 if( p[pixel[9]] > cb)
-                  if( p[pixel[10]] > cb)
-                   if( p[pixel[11]] > cb)
-                    if( p[pixel[13]] > cb)
-                     if( p[pixel[14]] > cb)
-                      if( p[pixel[6]] > cb)
-                       goto is_a_corner;
+                  if( p[pixel[13]] > cb)
+                    if( p[pixel[7]] > cb)
+                      if( p[pixel[8]] > cb)
+                        if( p[pixel[9]] > cb)
+                          if( p[pixel[10]] > cb)
+                            if( p[pixel[11]] > cb)
+                              if( p[pixel[12]] > cb)
+                                if( p[pixel[14]] > cb)
+                                  if( p[pixel[15]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       if( p[pixel[15]] > cb)
-                        goto is_a_corner;
-                       else
                         goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       if( p[pixel[11]] < c_b)
+                      goto is_not_a_corner;
+                  else if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
                         goto is_a_corner;
-                       else
-                        goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                    goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-            else
-             if( p[pixel[11]] > cb)
-              if( p[pixel[7]] > cb)
-               if( p[pixel[8]] > cb)
-                if( p[pixel[9]] > cb)
-                 if( p[pixel[10]] > cb)
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    if( p[pixel[6]] > cb)
-                     if( p[pixel[5]] > cb)
-                      goto is_a_corner;
-                     else
-                      if( p[pixel[14]] > cb)
-                       goto is_a_corner;
+                if( p[pixel[12]] > cb)
+                  if( p[pixel[7]] > cb)
+                    if( p[pixel[8]] > cb)
+                      if( p[pixel[9]] > cb)
+                        if( p[pixel[10]] > cb)
+                          if( p[pixel[11]] > cb)
+                            if( p[pixel[13]] > cb)
+                              if( p[pixel[14]] > cb)
+                                if( p[pixel[6]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  if( p[pixel[15]] > cb)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[14]] > cb)
-                      if( p[pixel[15]] > cb)
-                       goto is_a_corner;
-                      else
-                       goto is_not_a_corner;
-                     else
                       goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-             else if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       goto is_a_corner;
+                    goto is_not_a_corner;
+                else if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  if( p[pixel[11]] < c_b)
+                                    goto is_a_corner;
+                                  else
+                                    goto is_not_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      if( p[pixel[10]] < c_b)
-                       goto is_a_corner;
+                  goto is_not_a_corner;
+            else
+              if( p[pixel[11]] > cb)
+                if( p[pixel[7]] > cb)
+                  if( p[pixel[8]] > cb)
+                    if( p[pixel[9]] > cb)
+                      if( p[pixel[10]] > cb)
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            if( p[pixel[6]] > cb)
+                              if( p[pixel[5]] > cb)
+                                goto is_a_corner;
+                              else
+                                if( p[pixel[14]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                            else
+                              if( p[pixel[14]] > cb)
+                                if( p[pixel[15]] > cb)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                       else
-                       goto is_not_a_corner;
-                     else
-                      goto is_not_a_corner;
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
+                      goto is_not_a_corner;
+                  else
                     goto is_not_a_corner;
+                else
+                  goto is_not_a_corner;
+              else if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                if( p[pixel[10]] < c_b)
+                                  goto is_a_corner;
+                                else
+                                  goto is_not_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
+                    goto is_not_a_corner;
+                else
                   goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
-           else
+                goto is_not_a_corner;
+          else
             if( p[pixel[10]] > cb)
-             if( p[pixel[7]] > cb)
-              if( p[pixel[8]] > cb)
-               if( p[pixel[9]] > cb)
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[6]] > cb)
-                   if( p[pixel[5]] > cb)
-                    if( p[pixel[4]] > cb)
-                     goto is_a_corner;
+              if( p[pixel[7]] > cb)
+                if( p[pixel[8]] > cb)
+                  if( p[pixel[9]] > cb)
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[6]] > cb)
+                          if( p[pixel[5]] > cb)
+                            if( p[pixel[4]] > cb)
+                              goto is_a_corner;
+                            else
+                              if( p[pixel[13]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                          else
+                            if( p[pixel[13]] > cb)
+                              if( p[pixel[14]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[13]] > cb)
+                            if( p[pixel[14]] > cb)
+                              if( p[pixel[15]] > cb)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     if( p[pixel[13]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                   else
-                    if( p[pixel[13]] > cb)
-                     if( p[pixel[14]] > cb)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
                   else
-                   if( p[pixel[13]] > cb)
-                    if( p[pixel[14]] > cb)
-                     if( p[pixel[15]] > cb)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
-                      goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              if( p[pixel[9]] < c_b)
+                                goto is_a_corner;
+                              else
+                                goto is_not_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     if( p[pixel[9]] < c_b)
-                      goto is_a_corner;
-                     else
                       goto is_not_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
+                  goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-          else
-           if( p[pixel[9]] > cb)
+              goto is_not_a_corner;
+        else
+          if( p[pixel[9]] > cb)
             if( p[pixel[7]] > cb)
-             if( p[pixel[8]] > cb)
-              if( p[pixel[10]] > cb)
-               if( p[pixel[11]] > cb)
-                if( p[pixel[6]] > cb)
-                 if( p[pixel[5]] > cb)
-                  if( p[pixel[4]] > cb)
-                   if( p[pixel[3]] > cb)
-                    goto is_a_corner;
-                   else
-                    if( p[pixel[12]] > cb)
-                     goto is_a_corner;
+              if( p[pixel[8]] > cb)
+                if( p[pixel[10]] > cb)
+                  if( p[pixel[11]] > cb)
+                    if( p[pixel[6]] > cb)
+                      if( p[pixel[5]] > cb)
+                        if( p[pixel[4]] > cb)
+                          if( p[pixel[3]] > cb)
+                            goto is_a_corner;
+                          else
+                            if( p[pixel[12]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                        else
+                          if( p[pixel[12]] > cb)
+                            if( p[pixel[13]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            if( p[pixel[14]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[13]] > cb)
+                          if( p[pixel[14]] > cb)
+                            if( p[pixel[15]] > cb)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   if( p[pixel[12]] > cb)
-                    if( p[pixel[13]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    if( p[pixel[14]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
                 else
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[13]] > cb)
-                   if( p[pixel[14]] > cb)
-                    if( p[pixel[15]] > cb)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
+                goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else if( p[pixel[9]] < c_b)
+              goto is_not_a_corner;
+          else if( p[pixel[9]] < c_b)
             if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            if( p[pixel[8]] < c_b)
+                              goto is_a_corner;
+                            else
+                              goto is_not_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                     else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    if( p[pixel[8]] < c_b)
-                     goto is_a_corner;
-                    else
-                     goto is_not_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-         else
-          if( p[pixel[8]] > cb)
-           if( p[pixel[7]] > cb)
+      else
+        if( p[pixel[8]] > cb)
+          if( p[pixel[7]] > cb)
             if( p[pixel[9]] > cb)
-             if( p[pixel[10]] > cb)
-              if( p[pixel[6]] > cb)
-               if( p[pixel[5]] > cb)
-                if( p[pixel[4]] > cb)
-                 if( p[pixel[3]] > cb)
-                  if( p[pixel[2]] > cb)
-                   goto is_a_corner;
-                  else
-                   if( p[pixel[11]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
-                 else
-                  if( p[pixel[11]] > cb)
-                   if( p[pixel[12]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[10]] > cb)
+                if( p[pixel[6]] > cb)
+                  if( p[pixel[5]] > cb)
+                    if( p[pixel[4]] > cb)
+                      if( p[pixel[3]] > cb)
+                        if( p[pixel[2]] > cb)
+                          goto is_a_corner;
+                        else
+                          if( p[pixel[11]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                      else
+                        if( p[pixel[11]] > cb)
+                          if( p[pixel[12]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[11]] > cb)
+                        if( p[pixel[12]] > cb)
+                          if( p[pixel[13]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[13]] > cb)
+                          if( p[pixel[14]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[11]] > cb)
-                  if( p[pixel[12]] > cb)
-                   if( p[pixel[13]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[11]] > cb)
+                    if( p[pixel[12]] > cb)
+                      if( p[pixel[13]] > cb)
+                        if( p[pixel[14]] > cb)
+                          if( p[pixel[15]] > cb)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[13]] > cb)
-                   if( p[pixel[14]] > cb)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[11]] > cb)
-                if( p[pixel[12]] > cb)
-                 if( p[pixel[13]] > cb)
-                  if( p[pixel[14]] > cb)
-                   if( p[pixel[15]] > cb)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
                 goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
+              goto is_not_a_corner;
+          else
             goto is_not_a_corner;
-          else if( p[pixel[8]] < c_b)
-           if( p[pixel[9]] < c_b)
+        else if( p[pixel[8]] < c_b)
+          if( p[pixel[9]] < c_b)
             if( p[pixel[10]] < c_b)
-             if( p[pixel[11]] < c_b)
-              if( p[pixel[12]] < c_b)
-               if( p[pixel[13]] < c_b)
-                if( p[pixel[14]] < c_b)
-                 if( p[pixel[15]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+              if( p[pixel[11]] < c_b)
+                if( p[pixel[12]] < c_b)
+                  if( p[pixel[13]] < c_b)
+                    if( p[pixel[14]] < c_b)
+                      if( p[pixel[15]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
               else
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
-                    goto is_not_a_corner;
+                if( p[pixel[2]] < c_b)
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[4]] < c_b)
+                      if( p[pixel[5]] < c_b)
+                        if( p[pixel[6]] < c_b)
+                          if( p[pixel[7]] < c_b)
+                            goto is_a_corner;
+                          else
+                            goto is_not_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
-              if( p[pixel[2]] < c_b)
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[4]] < c_b)
-                 if( p[pixel[5]] < c_b)
-                  if( p[pixel[6]] < c_b)
-                   if( p[pixel[7]] < c_b)
-                    goto is_a_corner;
-                   else
                     goto is_not_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
+                  goto is_not_a_corner;
             else
-             goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+              goto is_not_a_corner;
           else
-           goto is_not_a_corner;
+            goto is_not_a_corner;
         else
-         if( p[pixel[7]] > cb)
-          if( p[pixel[8]] > cb)
-           if( p[pixel[9]] > cb)
+          goto is_not_a_corner;
+    else
+      if( p[pixel[7]] > cb)
+        if( p[pixel[8]] > cb)
+          if( p[pixel[9]] > cb)
             if( p[pixel[6]] > cb)
-             if( p[pixel[5]] > cb)
-              if( p[pixel[4]] > cb)
-               if( p[pixel[3]] > cb)
-                if( p[pixel[2]] > cb)
-                 if( p[pixel[1]] > cb)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[10]] > cb)
-                   goto is_a_corner;
+              if( p[pixel[5]] > cb)
+                if( p[pixel[4]] > cb)
+                  if( p[pixel[3]] > cb)
+                    if( p[pixel[2]] > cb)
+                      if( p[pixel[1]] > cb)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[10]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[10]] > cb)
+                        if( p[pixel[11]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[10]] > cb)
+                      if( p[pixel[11]] > cb)
+                        if( p[pixel[12]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[10]] > cb)
-                  if( p[pixel[11]] > cb)
-                   goto is_a_corner;
+                  if( p[pixel[10]] > cb)
+                    if( p[pixel[11]] > cb)
+                      if( p[pixel[12]] > cb)
+                        if( p[pixel[13]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
+                    goto is_not_a_corner;
+              else
                 if( p[pixel[10]] > cb)
-                 if( p[pixel[11]] > cb)
-                  if( p[pixel[12]] > cb)
-                   goto is_a_corner;
+                  if( p[pixel[11]] > cb)
+                    if( p[pixel[12]] > cb)
+                      if( p[pixel[13]] > cb)
+                        if( p[pixel[14]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-              else
-               if( p[pixel[10]] > cb)
-                if( p[pixel[11]] > cb)
-                 if( p[pixel[12]] > cb)
-                  if( p[pixel[13]] > cb)
-                   goto is_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
+            else
               if( p[pixel[10]] > cb)
-               if( p[pixel[11]] > cb)
-                if( p[pixel[12]] > cb)
-                 if( p[pixel[13]] > cb)
-                  if( p[pixel[14]] > cb)
-                   goto is_a_corner;
+                if( p[pixel[11]] > cb)
+                  if( p[pixel[12]] > cb)
+                    if( p[pixel[13]] > cb)
+                      if( p[pixel[14]] > cb)
+                        if( p[pixel[15]] > cb)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-            else
-             if( p[pixel[10]] > cb)
-              if( p[pixel[11]] > cb)
-               if( p[pixel[12]] > cb)
-                if( p[pixel[13]] > cb)
-                 if( p[pixel[14]] > cb)
-                  if( p[pixel[15]] > cb)
-                   goto is_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+                goto is_not_a_corner;
           else
-           goto is_not_a_corner;
-         else if( p[pixel[7]] < c_b)
-          if( p[pixel[8]] < c_b)
-           if( p[pixel[9]] < c_b)
+            goto is_not_a_corner;
+        else
+          goto is_not_a_corner;
+      else if( p[pixel[7]] < c_b)
+        if( p[pixel[8]] < c_b)
+          if( p[pixel[9]] < c_b)
             if( p[pixel[6]] < c_b)
-             if( p[pixel[5]] < c_b)
-              if( p[pixel[4]] < c_b)
-               if( p[pixel[3]] < c_b)
-                if( p[pixel[2]] < c_b)
-                 if( p[pixel[1]] < c_b)
-                  goto is_a_corner;
-                 else
-                  if( p[pixel[10]] < c_b)
-                   goto is_a_corner;
+              if( p[pixel[5]] < c_b)
+                if( p[pixel[4]] < c_b)
+                  if( p[pixel[3]] < c_b)
+                    if( p[pixel[2]] < c_b)
+                      if( p[pixel[1]] < c_b)
+                        goto is_a_corner;
+                      else
+                        if( p[pixel[10]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                    else
+                      if( p[pixel[10]] < c_b)
+                        if( p[pixel[11]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
+                    if( p[pixel[10]] < c_b)
+                      if( p[pixel[11]] < c_b)
+                        if( p[pixel[12]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                 else
-                 if( p[pixel[10]] < c_b)
-                  if( p[pixel[11]] < c_b)
-                   goto is_a_corner;
+                  if( p[pixel[10]] < c_b)
+                    if( p[pixel[11]] < c_b)
+                      if( p[pixel[12]] < c_b)
+                        if( p[pixel[13]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
-               else
+                    goto is_not_a_corner;
+              else
                 if( p[pixel[10]] < c_b)
-                 if( p[pixel[11]] < c_b)
-                  if( p[pixel[12]] < c_b)
-                   goto is_a_corner;
+                  if( p[pixel[11]] < c_b)
+                    if( p[pixel[12]] < c_b)
+                      if( p[pixel[13]] < c_b)
+                        if( p[pixel[14]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-              else
-               if( p[pixel[10]] < c_b)
-                if( p[pixel[11]] < c_b)
-                 if( p[pixel[12]] < c_b)
-                  if( p[pixel[13]] < c_b)
-                   goto is_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-             else
+            else
               if( p[pixel[10]] < c_b)
-               if( p[pixel[11]] < c_b)
-                if( p[pixel[12]] < c_b)
-                 if( p[pixel[13]] < c_b)
-                  if( p[pixel[14]] < c_b)
-                   goto is_a_corner;
+                if( p[pixel[11]] < c_b)
+                  if( p[pixel[12]] < c_b)
+                    if( p[pixel[13]] < c_b)
+                      if( p[pixel[14]] < c_b)
+                        if( p[pixel[15]] < c_b)
+                          goto is_a_corner;
+                        else
+                          goto is_not_a_corner;
+                      else
+                        goto is_not_a_corner;
+                    else
+                      goto is_not_a_corner;
                   else
-                   goto is_not_a_corner;
-                 else
-                  goto is_not_a_corner;
+                    goto is_not_a_corner;
                 else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
-              else
-               goto is_not_a_corner;
-            else
-             if( p[pixel[10]] < c_b)
-              if( p[pixel[11]] < c_b)
-               if( p[pixel[12]] < c_b)
-                if( p[pixel[13]] < c_b)
-                 if( p[pixel[14]] < c_b)
-                  if( p[pixel[15]] < c_b)
-                   goto is_a_corner;
-                  else
-                   goto is_not_a_corner;
-                 else
                   goto is_not_a_corner;
-                else
-                 goto is_not_a_corner;
-               else
-                goto is_not_a_corner;
               else
-               goto is_not_a_corner;
-             else
-              goto is_not_a_corner;
-           else
-            goto is_not_a_corner;
+                goto is_not_a_corner;
           else
-           goto is_not_a_corner;
-         else
+            goto is_not_a_corner;
+        else
           goto is_not_a_corner;
+      else
+        goto is_not_a_corner;
 
-		is_a_corner:
-			bmin=b;
-			goto end_if;
+is_a_corner:
+    bmin=b;
+    goto end_if;
 
-		is_not_a_corner:
-			bmax=b;
-			goto end_if;
+is_not_a_corner:
+    bmax=b;
+    goto end_if;
 
-		end_if:
+end_if:
 
-		if(bmin == bmax - 1 || bmin == bmax)
-			return bmin;
-		b = (bmin + bmax) / 2;
-    }
+    if(bmin == bmax - 1 || bmin == bmax)
+      return bmin;
+    b = (bmin + bmax) / 2;
+  }
 }
 
 static void make_offsets(int pixel[], int row_stride)
 {
-        pixel[0] = 0 + row_stride * 3;
-        pixel[1] = 1 + row_stride * 3;
-        pixel[2] = 2 + row_stride * 2;
-        pixel[3] = 3 + row_stride * 1;
-        pixel[4] = 3 + row_stride * 0;
-        pixel[5] = 3 + row_stride * -1;
-        pixel[6] = 2 + row_stride * -2;
-        pixel[7] = 1 + row_stride * -3;
-        pixel[8] = 0 + row_stride * -3;
-        pixel[9] = -1 + row_stride * -3;
-        pixel[10] = -2 + row_stride * -2;
-        pixel[11] = -3 + row_stride * -1;
-        pixel[12] = -3 + row_stride * 0;
-        pixel[13] = -3 + row_stride * 1;
-        pixel[14] = -2 + row_stride * 2;
-        pixel[15] = -1 + row_stride * 3;
+  pixel[0] = 0 + row_stride * 3;
+  pixel[1] = 1 + row_stride * 3;
+  pixel[2] = 2 + row_stride * 2;
+  pixel[3] = 3 + row_stride * 1;
+  pixel[4] = 3 + row_stride * 0;
+  pixel[5] = 3 + row_stride * -1;
+  pixel[6] = 2 + row_stride * -2;
+  pixel[7] = 1 + row_stride * -3;
+  pixel[8] = 0 + row_stride * -3;
+  pixel[9] = -1 + row_stride * -3;
+  pixel[10] = -2 + row_stride * -2;
+  pixel[11] = -3 + row_stride * -1;
+  pixel[12] = -3 + row_stride * 0;
+  pixel[13] = -3 + row_stride * 1;
+  pixel[14] = -2 + row_stride * 2;
+  pixel[15] = -1 + row_stride * 3;
 }
 
 
 
 int* fast9_score(const byte* i, int stride, xy* corners, int num_corners, int b)
 {
-	int* scores = (int*)malloc(sizeof(int)* num_corners);
-	int n;
+  int* scores = (int*)malloc(sizeof(int)* num_corners);
+  int n;
 
-	int pixel[16];
-	make_offsets(pixel, stride);
+  int pixel[16];
+  make_offsets(pixel, stride);
 
-    for(n=0; n < num_corners; n++)
-        scores[n] = fast9_corner_score(i + corners[n].y*stride + corners[n].x, pixel, b);
+  for(n=0; n < num_corners; n++)
+    scores[n] = fast9_corner_score(i + corners[n].y*stride + corners[n].x, pixel, b);
 
-	return scores;
+  return scores;
 }
 
 
 xy* fast9_detect(const byte* im, int xsize, int ysize, int stride, int b, int* ret_num_corners)
 {
-	int num_corners=0;
-	xy* ret_corners;
-	int rsize=512;
-	int pixel[16];
-	int x, y;
+  int num_corners=0;
+  xy* ret_corners;
+  int rsize=512;
+  int pixel[16];
+  int x, y;
 
-	ret_corners = (xy*)malloc(sizeof(xy)*rsize);
-	make_offsets(pixel, stride);
+  ret_corners = (xy*)malloc(sizeof(xy)*rsize);
+  make_offsets(pixel, stride);
 
-	for(y=3; y < ysize - 3; y++)
-		for(x=3; x < xsize - 3; x++)
-		{
-			const byte* p = im + y*stride + x;
+  for(y=3; y < ysize - 3; y++)
+    for(x=3; x < xsize - 3; x++)
+    {
+      const byte* p = im + y*stride + x;
 
-			int cb = *p + b;
-			int c_b= *p - b;
-        if(p[pixel[0]] > cb)
-         if(p[pixel[1]] > cb)
+      int cb = *p + b;
+      int c_b= *p - b;
+      if(p[pixel[0]] > cb)
+        if(p[pixel[1]] > cb)
           if(p[pixel[2]] > cb)
-           if(p[pixel[3]] > cb)
-            if(p[pixel[4]] > cb)
-             if(p[pixel[5]] > cb)
-              if(p[pixel[6]] > cb)
-               if(p[pixel[7]] > cb)
-                if(p[pixel[8]] > cb)
-                 {}
-                else
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  continue;
-               else if(p[pixel[7]] < c_b)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  continue;
-                else if(p[pixel[14]] < c_b)
-                 if(p[pixel[8]] < c_b)
-                  if(p[pixel[9]] < c_b)
-                   if(p[pixel[10]] < c_b)
-                    if(p[pixel[11]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      if(p[pixel[13]] < c_b)
-                       if(p[pixel[15]] < c_b)
-                        {}
-                       else
-                        continue;
+            if(p[pixel[3]] > cb)
+              if(p[pixel[4]] > cb)
+                if(p[pixel[5]] > cb)
+                  if(p[pixel[6]] > cb)
+                    if(p[pixel[7]] > cb)
+                      if(p[pixel[8]] > cb)
+                      {}
                       else
-                       continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-              else if(p[pixel[6]] < c_b)
-               if(p[pixel[15]] > cb)
-                if(p[pixel[13]] > cb)
-                 if(p[pixel[14]] > cb)
-                  {}
-                 else
-                  continue;
-                else if(p[pixel[13]] < c_b)
-                 if(p[pixel[7]] < c_b)
-                  if(p[pixel[8]] < c_b)
-                   if(p[pixel[9]] < c_b)
-                    if(p[pixel[10]] < c_b)
-                     if(p[pixel[11]] < c_b)
-                      if(p[pixel[12]] < c_b)
-                       if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] > cb)
                         {}
-                       else
-                        continue;
+                        else
+                          continue;
+                    else if(p[pixel[7]] < c_b)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          continue;
+                      else if(p[pixel[14]] < c_b)
+                        if(p[pixel[8]] < c_b)
+                          if(p[pixel[9]] < c_b)
+                            if(p[pixel[10]] < c_b)
+                              if(p[pixel[11]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                  if(p[pixel[13]] < c_b)
+                                    if(p[pixel[15]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                if(p[pixel[7]] < c_b)
-                 if(p[pixel[8]] < c_b)
-                  if(p[pixel[9]] < c_b)
-                   if(p[pixel[10]] < c_b)
-                    if(p[pixel[11]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      if(p[pixel[13]] < c_b)
-                       if(p[pixel[14]] < c_b)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
                         {}
-                       else
+                        else
+                          continue;
+                      else
                         continue;
+                  else if(p[pixel[6]] < c_b)
+                    if(p[pixel[15]] > cb)
+                      if(p[pixel[13]] > cb)
+                        if(p[pixel[14]] > cb)
+                        {}
+                        else
+                          continue;
+                      else if(p[pixel[13]] < c_b)
+                        if(p[pixel[7]] < c_b)
+                          if(p[pixel[8]] < c_b)
+                            if(p[pixel[9]] < c_b)
+                              if(p[pixel[10]] < c_b)
+                                if(p[pixel[11]] < c_b)
+                                  if(p[pixel[12]] < c_b)
+                                    if(p[pixel[14]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      if(p[pixel[7]] < c_b)
+                        if(p[pixel[8]] < c_b)
+                          if(p[pixel[9]] < c_b)
+                            if(p[pixel[10]] < c_b)
+                              if(p[pixel[11]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                  if(p[pixel[13]] < c_b)
+                                    if(p[pixel[14]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-              else
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-               else if(p[pixel[13]] < c_b)
-                if(p[pixel[7]] < c_b)
-                 if(p[pixel[8]] < c_b)
-                  if(p[pixel[9]] < c_b)
-                   if(p[pixel[10]] < c_b)
-                    if(p[pixel[11]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      if(p[pixel[14]] < c_b)
-                       if(p[pixel[15]] < c_b)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
                         {}
-                       else
+                        else
+                          continue;
+                      else
                         continue;
+                    else if(p[pixel[13]] < c_b)
+                      if(p[pixel[7]] < c_b)
+                        if(p[pixel[8]] < c_b)
+                          if(p[pixel[9]] < c_b)
+                            if(p[pixel[10]] < c_b)
+                              if(p[pixel[11]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                  if(p[pixel[14]] < c_b)
+                                    if(p[pixel[15]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-             else if(p[pixel[5]] < c_b)
-              if(p[pixel[14]] > cb)
-               if(p[pixel[12]] > cb)
-                if(p[pixel[13]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       if(p[pixel[11]] > cb)
+                      continue;
+                else if(p[pixel[5]] < c_b)
+                  if(p[pixel[14]] > cb)
+                    if(p[pixel[12]] > cb)
+                      if(p[pixel[13]] > cb)
+                        if(p[pixel[15]] > cb)
                         {}
-                       else
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                    if(p[pixel[11]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
                         continue;
+                    else if(p[pixel[12]] < c_b)
+                      if(p[pixel[6]] < c_b)
+                        if(p[pixel[7]] < c_b)
+                          if(p[pixel[8]] < c_b)
+                            if(p[pixel[9]] < c_b)
+                              if(p[pixel[10]] < c_b)
+                                if(p[pixel[11]] < c_b)
+                                  if(p[pixel[13]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 continue;
-               else if(p[pixel[12]] < c_b)
-                if(p[pixel[6]] < c_b)
-                 if(p[pixel[7]] < c_b)
-                  if(p[pixel[8]] < c_b)
-                   if(p[pixel[9]] < c_b)
-                    if(p[pixel[10]] < c_b)
-                     if(p[pixel[11]] < c_b)
-                      if(p[pixel[13]] < c_b)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
+                  else if(p[pixel[14]] < c_b)
+                    if(p[pixel[7]] < c_b)
+                      if(p[pixel[8]] < c_b)
+                        if(p[pixel[9]] < c_b)
+                          if(p[pixel[10]] < c_b)
+                            if(p[pixel[11]] < c_b)
+                              if(p[pixel[12]] < c_b)
+                                if(p[pixel[13]] < c_b)
+                                  if(p[pixel[6]] < c_b)
+                                  {}
+                                  else
+                                    if(p[pixel[15]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else if(p[pixel[14]] < c_b)
-               if(p[pixel[7]] < c_b)
-                if(p[pixel[8]] < c_b)
-                 if(p[pixel[9]] < c_b)
-                  if(p[pixel[10]] < c_b)
-                   if(p[pixel[11]] < c_b)
-                    if(p[pixel[12]] < c_b)
-                     if(p[pixel[13]] < c_b)
-                      if(p[pixel[6]] < c_b)
-                       {}
+                    if(p[pixel[6]] < c_b)
+                      if(p[pixel[7]] < c_b)
+                        if(p[pixel[8]] < c_b)
+                          if(p[pixel[9]] < c_b)
+                            if(p[pixel[10]] < c_b)
+                              if(p[pixel[11]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                  if(p[pixel[13]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       if(p[pixel[15]] < c_b)
-                        {}
-                       else
                         continue;
-                     else
-                      continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
+                      continue;
                 else
-                 continue;
-               else
-                continue;
-              else
-               if(p[pixel[6]] < c_b)
-                if(p[pixel[7]] < c_b)
-                 if(p[pixel[8]] < c_b)
-                  if(p[pixel[9]] < c_b)
-                   if(p[pixel[10]] < c_b)
-                    if(p[pixel[11]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      if(p[pixel[13]] < c_b)
-                       {}
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                    if(p[pixel[11]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                     else
+                        continue;
+                    else
                       continue;
+                  else if(p[pixel[12]] < c_b)
+                    if(p[pixel[7]] < c_b)
+                      if(p[pixel[8]] < c_b)
+                        if(p[pixel[9]] < c_b)
+                          if(p[pixel[10]] < c_b)
+                            if(p[pixel[11]] < c_b)
+                              if(p[pixel[13]] < c_b)
+                                if(p[pixel[14]] < c_b)
+                                  if(p[pixel[6]] < c_b)
+                                  {}
+                                  else
+                                    if(p[pixel[15]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       if(p[pixel[11]] > cb)
+                    continue;
+              else if(p[pixel[4]] < c_b)
+                if(p[pixel[13]] > cb)
+                  if(p[pixel[11]] > cb)
+                    if(p[pixel[12]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
                         {}
-                       else
-                        continue;
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 continue;
-               else
-                continue;
-              else if(p[pixel[12]] < c_b)
-               if(p[pixel[7]] < c_b)
-                if(p[pixel[8]] < c_b)
-                 if(p[pixel[9]] < c_b)
-                  if(p[pixel[10]] < c_b)
-                   if(p[pixel[11]] < c_b)
-                    if(p[pixel[13]] < c_b)
-                     if(p[pixel[14]] < c_b)
+                      continue;
+                  else if(p[pixel[11]] < c_b)
+                    if(p[pixel[5]] < c_b)
                       if(p[pixel[6]] < c_b)
-                       {}
+                        if(p[pixel[7]] < c_b)
+                          if(p[pixel[8]] < c_b)
+                            if(p[pixel[9]] < c_b)
+                              if(p[pixel[10]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       if(p[pixel[15]] < c_b)
-                        {}
-                       else
                         continue;
-                     else
-                      continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-            else if(p[pixel[4]] < c_b)
-             if(p[pixel[13]] > cb)
-              if(p[pixel[11]] > cb)
-               if(p[pixel[12]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       {}
+                    continue;
+                else if(p[pixel[13]] < c_b)
+                  if(p[pixel[7]] < c_b)
+                    if(p[pixel[8]] < c_b)
+                      if(p[pixel[9]] < c_b)
+                        if(p[pixel[10]] < c_b)
+                          if(p[pixel[11]] < c_b)
+                            if(p[pixel[12]] < c_b)
+                              if(p[pixel[6]] < c_b)
+                                if(p[pixel[5]] < c_b)
+                                {}
+                                else
+                                  if(p[pixel[14]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                              else
+                                if(p[pixel[14]] < c_b)
+                                  if(p[pixel[15]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                continue;
-              else if(p[pixel[11]] < c_b)
-               if(p[pixel[5]] < c_b)
-                if(p[pixel[6]] < c_b)
-                 if(p[pixel[7]] < c_b)
-                  if(p[pixel[8]] < c_b)
-                   if(p[pixel[9]] < c_b)
-                    if(p[pixel[10]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else if(p[pixel[13]] < c_b)
-              if(p[pixel[7]] < c_b)
-               if(p[pixel[8]] < c_b)
-                if(p[pixel[9]] < c_b)
-                 if(p[pixel[10]] < c_b)
-                  if(p[pixel[11]] < c_b)
-                   if(p[pixel[12]] < c_b)
+                  if(p[pixel[5]] < c_b)
                     if(p[pixel[6]] < c_b)
-                     if(p[pixel[5]] < c_b)
-                      {}
-                     else
-                      if(p[pixel[14]] < c_b)
-                       {}
+                      if(p[pixel[7]] < c_b)
+                        if(p[pixel[8]] < c_b)
+                          if(p[pixel[9]] < c_b)
+                            if(p[pixel[10]] < c_b)
+                              if(p[pixel[11]] < c_b)
+                                if(p[pixel[12]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
+                        continue;
                     else
-                     if(p[pixel[14]] < c_b)
-                      if(p[pixel[15]] < c_b)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              if(p[pixel[5]] < c_b)
-               if(p[pixel[6]] < c_b)
-                if(p[pixel[7]] < c_b)
-                 if(p[pixel[8]] < c_b)
-                  if(p[pixel[9]] < c_b)
-                   if(p[pixel[10]] < c_b)
-                    if(p[pixel[11]] < c_b)
-                     if(p[pixel[12]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
               else
-               continue;
-            else
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       {}
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                  if(p[pixel[10]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      if(p[pixel[10]] > cb)
-                       {}
+                    continue;
+                else if(p[pixel[11]] < c_b)
+                  if(p[pixel[7]] < c_b)
+                    if(p[pixel[8]] < c_b)
+                      if(p[pixel[9]] < c_b)
+                        if(p[pixel[10]] < c_b)
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                              if(p[pixel[6]] < c_b)
+                                if(p[pixel[5]] < c_b)
+                                {}
+                                else
+                                  if(p[pixel[14]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                              else
+                                if(p[pixel[14]] < c_b)
+                                  if(p[pixel[15]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
+                    continue;
+                else
                   continue;
-               else
-                continue;
-              else
-               continue;
-             else if(p[pixel[11]] < c_b)
-              if(p[pixel[7]] < c_b)
-               if(p[pixel[8]] < c_b)
-                if(p[pixel[9]] < c_b)
-                 if(p[pixel[10]] < c_b)
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    if(p[pixel[6]] < c_b)
-                     if(p[pixel[5]] < c_b)
-                      {}
-                     else
-                      if(p[pixel[14]] < c_b)
-                       {}
+            else if(p[pixel[3]] < c_b)
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                    else
-                     if(p[pixel[14]] < c_b)
-                      if(p[pixel[15]] < c_b)
-                       {}
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
-                   else
-                    continue;
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              continue;
-           else if(p[pixel[3]] < c_b)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
                 else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
+              else if(p[pixel[10]] < c_b)
+                if(p[pixel[7]] < c_b)
+                  if(p[pixel[8]] < c_b)
+                    if(p[pixel[9]] < c_b)
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[6]] < c_b)
+                          if(p[pixel[5]] < c_b)
+                            if(p[pixel[4]] < c_b)
+                            {}
+                            else
+                              if(p[pixel[12]] < c_b)
+                                if(p[pixel[13]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                          else
+                            if(p[pixel[12]] < c_b)
+                              if(p[pixel[13]] < c_b)
+                                if(p[pixel[14]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                              if(p[pixel[14]] < c_b)
+                                if(p[pixel[15]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-              else
-               continue;
-             else
-              continue;
-            else if(p[pixel[10]] < c_b)
-             if(p[pixel[7]] < c_b)
-              if(p[pixel[8]] < c_b)
-               if(p[pixel[9]] < c_b)
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[6]] < c_b)
-                  if(p[pixel[5]] < c_b)
-                   if(p[pixel[4]] < c_b)
-                    {}
-                   else
-                    if(p[pixel[12]] < c_b)
-                     if(p[pixel[13]] < c_b)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
                   else
-                   if(p[pixel[12]] < c_b)
-                    if(p[pixel[13]] < c_b)
-                     if(p[pixel[14]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                 else
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    if(p[pixel[14]] < c_b)
-                     if(p[pixel[15]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
                 else
-                 continue;
-               else
-                continue;
+                  continue;
               else
-               continue;
-             else
-              continue;
+                continue;
             else
-             continue;
-           else
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                                if(p[pixel[9]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     if(p[pixel[9]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
                 else
-                 continue;
-              else
-               continue;
-             else
-              continue;
-            else if(p[pixel[10]] < c_b)
-             if(p[pixel[7]] < c_b)
-              if(p[pixel[8]] < c_b)
-               if(p[pixel[9]] < c_b)
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[5]] < c_b)
-                    if(p[pixel[4]] < c_b)
-                     {}
+                  continue;
+              else if(p[pixel[10]] < c_b)
+                if(p[pixel[7]] < c_b)
+                  if(p[pixel[8]] < c_b)
+                    if(p[pixel[9]] < c_b)
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[5]] < c_b)
+                              if(p[pixel[4]] < c_b)
+                              {}
+                              else
+                                if(p[pixel[13]] < c_b)
+                                {}
+                                else
+                                  continue;
+                            else
+                              if(p[pixel[13]] < c_b)
+                                if(p[pixel[14]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                          else
+                            if(p[pixel[13]] < c_b)
+                              if(p[pixel[14]] < c_b)
+                                if(p[pixel[15]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     if(p[pixel[13]] < c_b)
-                      {}
-                     else
-                      continue;
-                   else
-                    if(p[pixel[13]] < c_b)
-                     if(p[pixel[14]] < c_b)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
                   else
-                   if(p[pixel[13]] < c_b)
-                    if(p[pixel[14]] < c_b)
-                     if(p[pixel[15]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                 else
-                  continue;
                 else
-                 continue;
-               else
-                continue;
+                  continue;
               else
-               continue;
-             else
-              continue;
-            else
-             continue;
+                continue;
           else if(p[pixel[2]] < c_b)
-           if(p[pixel[9]] > cb)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+            if(p[pixel[9]] > cb)
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
+                      continue;
                 else
-                 continue;
+                  continue;
               else
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+                continue;
+            else if(p[pixel[9]] < c_b)
+              if(p[pixel[7]] < c_b)
+                if(p[pixel[8]] < c_b)
+                  if(p[pixel[10]] < c_b)
+                    if(p[pixel[6]] < c_b)
+                      if(p[pixel[5]] < c_b)
+                        if(p[pixel[4]] < c_b)
+                          if(p[pixel[3]] < c_b)
+                          {}
+                          else
+                            if(p[pixel[11]] < c_b)
+                              if(p[pixel[12]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[11]] < c_b)
+                            if(p[pixel[12]] < c_b)
+                              if(p[pixel[13]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[11]] < c_b)
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                              if(p[pixel[14]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[13]] < c_b)
+                            if(p[pixel[14]] < c_b)
+                              if(p[pixel[15]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
-               else
+                  continue;
+              else
                 continue;
-             else
-              continue;
             else
-             continue;
-           else if(p[pixel[9]] < c_b)
-            if(p[pixel[7]] < c_b)
-             if(p[pixel[8]] < c_b)
-              if(p[pixel[10]] < c_b)
-               if(p[pixel[6]] < c_b)
-                if(p[pixel[5]] < c_b)
-                 if(p[pixel[4]] < c_b)
-                  if(p[pixel[3]] < c_b)
-                   {}
+              continue;
+          else
+            if(p[pixel[9]] > cb)
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   if(p[pixel[11]] < c_b)
-                    if(p[pixel[12]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                 else
-                  if(p[pixel[11]] < c_b)
-                   if(p[pixel[12]] < c_b)
-                    if(p[pixel[13]] < c_b)
-                     {}
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                              if(p[pixel[8]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
+                      continue;
                 else
-                 if(p[pixel[11]] < c_b)
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    if(p[pixel[14]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-               else
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[13]] < c_b)
-                   if(p[pixel[14]] < c_b)
-                    if(p[pixel[15]] < c_b)
-                     {}
+              else
+                continue;
+            else if(p[pixel[9]] < c_b)
+              if(p[pixel[7]] < c_b)
+                if(p[pixel[8]] < c_b)
+                  if(p[pixel[10]] < c_b)
+                    if(p[pixel[11]] < c_b)
+                      if(p[pixel[6]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[4]] < c_b)
+                            if(p[pixel[3]] < c_b)
+                            {}
+                            else
+                              if(p[pixel[12]] < c_b)
+                              {}
+                              else
+                                continue;
+                          else
+                            if(p[pixel[12]] < c_b)
+                              if(p[pixel[13]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                              if(p[pixel[14]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[13]] < c_b)
+                            if(p[pixel[14]] < c_b)
+                              if(p[pixel[15]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
+                  continue;
               else
-               continue;
-             else
-              continue;
+                continue;
             else
-             continue;
-           else
-            continue;
-          else
-           if(p[pixel[9]] > cb)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+              continue;
+        else if(p[pixel[1]] < c_b)
+          if(p[pixel[8]] > cb)
+            if(p[pixel[9]] > cb)
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
+                  if(p[pixel[2]] > cb)
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    if(p[pixel[8]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              continue;
             else
-             continue;
-           else if(p[pixel[9]] < c_b)
+              continue;
+          else if(p[pixel[8]] < c_b)
             if(p[pixel[7]] < c_b)
-             if(p[pixel[8]] < c_b)
-              if(p[pixel[10]] < c_b)
-               if(p[pixel[11]] < c_b)
+              if(p[pixel[9]] < c_b)
                 if(p[pixel[6]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[4]] < c_b)
-                   if(p[pixel[3]] < c_b)
-                    {}
-                   else
-                    if(p[pixel[12]] < c_b)
-                     {}
+                  if(p[pixel[5]] < c_b)
+                    if(p[pixel[4]] < c_b)
+                      if(p[pixel[3]] < c_b)
+                        if(p[pixel[2]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[10]] < c_b)
+                            if(p[pixel[11]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[10]] < c_b)
+                          if(p[pixel[11]] < c_b)
+                            if(p[pixel[12]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
+                      if(p[pixel[10]] < c_b)
+                        if(p[pixel[11]] < c_b)
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   if(p[pixel[12]] < c_b)
-                    if(p[pixel[13]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                 else
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    if(p[pixel[14]] < c_b)
-                     {}
+                    if(p[pixel[10]] < c_b)
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[13]] < c_b)
+                            if(p[pixel[14]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
+                      continue;
                 else
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[13]] < c_b)
-                   if(p[pixel[14]] < c_b)
-                    if(p[pixel[15]] < c_b)
-                     {}
+                  if(p[pixel[10]] < c_b)
+                    if(p[pixel[11]] < c_b)
+                      if(p[pixel[12]] < c_b)
+                        if(p[pixel[13]] < c_b)
+                          if(p[pixel[14]] < c_b)
+                            if(p[pixel[15]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                continue;
+                    continue;
               else
-               continue;
-             else
-              continue;
+                continue;
             else
-             continue;
-           else
+              continue;
+          else
             continue;
-         else if(p[pixel[1]] < c_b)
+        else
           if(p[pixel[8]] > cb)
-           if(p[pixel[9]] > cb)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
+            if(p[pixel[9]] > cb)
+              if(p[pixel[10]] > cb)
+                if(p[pixel[11]] > cb)
+                  if(p[pixel[12]] > cb)
+                    if(p[pixel[13]] > cb)
+                      if(p[pixel[14]] > cb)
+                        if(p[pixel[15]] > cb)
+                        {}
+                        else
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
+                  if(p[pixel[2]] > cb)
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[7]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              if(p[pixel[2]] > cb)
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else if(p[pixel[8]] < c_b)
-           if(p[pixel[7]] < c_b)
-            if(p[pixel[9]] < c_b)
-             if(p[pixel[6]] < c_b)
-              if(p[pixel[5]] < c_b)
-               if(p[pixel[4]] < c_b)
-                if(p[pixel[3]] < c_b)
-                 if(p[pixel[2]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[10]] < c_b)
-                   if(p[pixel[11]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[10]] < c_b)
-                  if(p[pixel[11]] < c_b)
-                   if(p[pixel[12]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
+            if(p[pixel[7]] < c_b)
+              if(p[pixel[9]] < c_b)
                 if(p[pixel[10]] < c_b)
-                 if(p[pixel[11]] < c_b)
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-              else
-               if(p[pixel[10]] < c_b)
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[13]] < c_b)
-                   if(p[pixel[14]] < c_b)
-                    {}
-                   else
-                    continue;
+                  if(p[pixel[6]] < c_b)
+                    if(p[pixel[5]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[3]] < c_b)
+                          if(p[pixel[2]] < c_b)
+                          {}
+                          else
+                            if(p[pixel[11]] < c_b)
+                            {}
+                            else
+                              continue;
+                        else
+                          if(p[pixel[11]] < c_b)
+                            if(p[pixel[12]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[11]] < c_b)
+                          if(p[pixel[12]] < c_b)
+                            if(p[pixel[13]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[13]] < c_b)
+                            if(p[pixel[14]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    if(p[pixel[11]] < c_b)
+                      if(p[pixel[12]] < c_b)
+                        if(p[pixel[13]] < c_b)
+                          if(p[pixel[14]] < c_b)
+                            if(p[pixel[15]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[10]] < c_b)
-               if(p[pixel[11]] < c_b)
-                if(p[pixel[12]] < c_b)
-                 if(p[pixel[13]] < c_b)
-                  if(p[pixel[14]] < c_b)
-                   if(p[pixel[15]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
-               else
-                continue;
               else
-               continue;
+                continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else
-           continue;
-         else
+            continue;
+      else if(p[pixel[0]] < c_b)
+        if(p[pixel[1]] > cb)
           if(p[pixel[8]] > cb)
-           if(p[pixel[9]] > cb)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[11]] > cb)
-              if(p[pixel[12]] > cb)
-               if(p[pixel[13]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[15]] > cb)
-                  {}
-                 else
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
+            if(p[pixel[7]] > cb)
+              if(p[pixel[9]] > cb)
+                if(p[pixel[6]] > cb)
+                  if(p[pixel[5]] > cb)
+                    if(p[pixel[4]] > cb)
+                      if(p[pixel[3]] > cb)
+                        if(p[pixel[2]] > cb)
+                        {}
+                        else
+                          if(p[pixel[10]] > cb)
+                            if(p[pixel[11]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[10]] > cb)
+                          if(p[pixel[11]] > cb)
+                            if(p[pixel[12]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[10]] > cb)
+                        if(p[pixel[11]] > cb)
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[10]] > cb)
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[13]] > cb)
+                            if(p[pixel[14]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
+                  if(p[pixel[10]] > cb)
+                    if(p[pixel[11]] > cb)
+                      if(p[pixel[12]] > cb)
+                        if(p[pixel[13]] > cb)
+                          if(p[pixel[14]] > cb)
+                            if(p[pixel[15]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              if(p[pixel[2]] > cb)
-               if(p[pixel[3]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[7]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else if(p[pixel[8]] < c_b)
-           if(p[pixel[7]] < c_b)
             if(p[pixel[9]] < c_b)
-             if(p[pixel[10]] < c_b)
-              if(p[pixel[6]] < c_b)
-               if(p[pixel[5]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[3]] < c_b)
-                  if(p[pixel[2]] < c_b)
-                   {}
-                  else
-                   if(p[pixel[11]] < c_b)
-                    {}
-                   else
-                    continue;
-                 else
-                  if(p[pixel[11]] < c_b)
-                   if(p[pixel[12]] < c_b)
-                    {}
-                   else
-                    continue;
+              if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[11]] < c_b)
-                  if(p[pixel[12]] < c_b)
-                   if(p[pixel[13]] < c_b)
-                    {}
-                   else
-                    continue;
+                  if(p[pixel[2]] < c_b)
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[13]] < c_b)
-                   if(p[pixel[14]] < c_b)
-                    {}
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[11]] < c_b)
-                if(p[pixel[12]] < c_b)
-                 if(p[pixel[13]] < c_b)
-                  if(p[pixel[14]] < c_b)
-                   if(p[pixel[15]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else
-           continue;
-        else if(p[pixel[0]] < c_b)
-         if(p[pixel[1]] > cb)
-          if(p[pixel[8]] > cb)
-           if(p[pixel[7]] > cb)
+            continue;
+        else if(p[pixel[1]] < c_b)
+          if(p[pixel[2]] > cb)
             if(p[pixel[9]] > cb)
-             if(p[pixel[6]] > cb)
-              if(p[pixel[5]] > cb)
-               if(p[pixel[4]] > cb)
-                if(p[pixel[3]] > cb)
-                 if(p[pixel[2]] > cb)
-                  {}
-                 else
+              if(p[pixel[7]] > cb)
+                if(p[pixel[8]] > cb)
                   if(p[pixel[10]] > cb)
-                   if(p[pixel[11]] > cb)
-                    {}
-                   else
-                    continue;
+                    if(p[pixel[6]] > cb)
+                      if(p[pixel[5]] > cb)
+                        if(p[pixel[4]] > cb)
+                          if(p[pixel[3]] > cb)
+                          {}
+                          else
+                            if(p[pixel[11]] > cb)
+                              if(p[pixel[12]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[11]] > cb)
+                            if(p[pixel[12]] > cb)
+                              if(p[pixel[13]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[11]] > cb)
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                              if(p[pixel[14]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[13]] > cb)
+                            if(p[pixel[14]] > cb)
+                              if(p[pixel[15]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                else
-                 if(p[pixel[10]] > cb)
-                  if(p[pixel[11]] > cb)
-                   if(p[pixel[12]] > cb)
-                    {}
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[10]] > cb)
-                 if(p[pixel[11]] > cb)
-                  if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
                 else
-                 continue;
-              else
-               if(p[pixel[10]] > cb)
-                if(p[pixel[11]] > cb)
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[13]] > cb)
-                   if(p[pixel[14]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[10]] > cb)
-               if(p[pixel[11]] > cb)
-                if(p[pixel[12]] > cb)
-                 if(p[pixel[13]] > cb)
-                  if(p[pixel[14]] > cb)
-                   if(p[pixel[15]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
               else
-               continue;
-            else
-             continue;
-           else
-            continue;
-          else if(p[pixel[8]] < c_b)
-           if(p[pixel[9]] < c_b)
-            if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
+                continue;
+            else if(p[pixel[9]] < c_b)
+              if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              if(p[pixel[2]] < c_b)
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
             else
-             continue;
-           else
-            continue;
-          else
-           continue;
-         else if(p[pixel[1]] < c_b)
-          if(p[pixel[2]] > cb)
-           if(p[pixel[9]] > cb)
-            if(p[pixel[7]] > cb)
-             if(p[pixel[8]] > cb)
+              continue;
+          else if(p[pixel[2]] < c_b)
+            if(p[pixel[3]] > cb)
               if(p[pixel[10]] > cb)
-               if(p[pixel[6]] > cb)
-                if(p[pixel[5]] > cb)
-                 if(p[pixel[4]] > cb)
-                  if(p[pixel[3]] > cb)
-                   {}
-                  else
-                   if(p[pixel[11]] > cb)
-                    if(p[pixel[12]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                 else
-                  if(p[pixel[11]] > cb)
-                   if(p[pixel[12]] > cb)
-                    if(p[pixel[13]] > cb)
-                     {}
+                if(p[pixel[7]] > cb)
+                  if(p[pixel[8]] > cb)
+                    if(p[pixel[9]] > cb)
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[6]] > cb)
+                          if(p[pixel[5]] > cb)
+                            if(p[pixel[4]] > cb)
+                            {}
+                            else
+                              if(p[pixel[12]] > cb)
+                                if(p[pixel[13]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                          else
+                            if(p[pixel[12]] > cb)
+                              if(p[pixel[13]] > cb)
+                                if(p[pixel[14]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                              if(p[pixel[14]] > cb)
+                                if(p[pixel[15]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                else
-                 if(p[pixel[11]] > cb)
-                  if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    if(p[pixel[14]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
+                else
                   continue;
-               else
-                if(p[pixel[11]] > cb)
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[13]] > cb)
-                   if(p[pixel[14]] > cb)
-                    if(p[pixel[15]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+              else if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-              else
-               continue;
-             else
-              continue;
-            else
-             continue;
-           else if(p[pixel[9]] < c_b)
-            if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
                 else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              continue;
-            else
-             continue;
-           else
-            continue;
-          else if(p[pixel[2]] < c_b)
-           if(p[pixel[3]] > cb)
-            if(p[pixel[10]] > cb)
-             if(p[pixel[7]] > cb)
-              if(p[pixel[8]] > cb)
-               if(p[pixel[9]] > cb)
-                if(p[pixel[11]] > cb)
-                 if(p[pixel[6]] > cb)
-                  if(p[pixel[5]] > cb)
-                   if(p[pixel[4]] > cb)
-                    {}
-                   else
-                    if(p[pixel[12]] > cb)
-                     if(p[pixel[13]] > cb)
-                      {}
-                     else
-                      continue;
+            else if(p[pixel[3]] < c_b)
+              if(p[pixel[4]] > cb)
+                if(p[pixel[13]] > cb)
+                  if(p[pixel[7]] > cb)
+                    if(p[pixel[8]] > cb)
+                      if(p[pixel[9]] > cb)
+                        if(p[pixel[10]] > cb)
+                          if(p[pixel[11]] > cb)
+                            if(p[pixel[12]] > cb)
+                              if(p[pixel[6]] > cb)
+                                if(p[pixel[5]] > cb)
+                                {}
+                                else
+                                  if(p[pixel[14]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                              else
+                                if(p[pixel[14]] > cb)
+                                  if(p[pixel[15]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                  else
-                   if(p[pixel[12]] > cb)
-                    if(p[pixel[13]] > cb)
-                     if(p[pixel[14]] > cb)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
+                  else
                     continue;
-                 else
-                  if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    if(p[pixel[14]] > cb)
-                     if(p[pixel[15]] > cb)
-                      {}
-                     else
-                      continue;
+                else if(p[pixel[13]] < c_b)
+                  if(p[pixel[11]] > cb)
+                    if(p[pixel[5]] > cb)
+                      if(p[pixel[6]] > cb)
+                        if(p[pixel[7]] > cb)
+                          if(p[pixel[8]] > cb)
+                            if(p[pixel[9]] > cb)
+                              if(p[pixel[10]] > cb)
+                                if(p[pixel[12]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              continue;
-            else if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
                       continue;
+                  else if(p[pixel[11]] < c_b)
+                    if(p[pixel[12]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
                 else
-                 continue;
-              else
-               continue;
-             else
-              continue;
-            else
-             continue;
-           else if(p[pixel[3]] < c_b)
-            if(p[pixel[4]] > cb)
-             if(p[pixel[13]] > cb)
-              if(p[pixel[7]] > cb)
-               if(p[pixel[8]] > cb)
-                if(p[pixel[9]] > cb)
-                 if(p[pixel[10]] > cb)
-                  if(p[pixel[11]] > cb)
-                   if(p[pixel[12]] > cb)
+                  if(p[pixel[5]] > cb)
                     if(p[pixel[6]] > cb)
-                     if(p[pixel[5]] > cb)
-                      {}
-                     else
-                      if(p[pixel[14]] > cb)
-                       {}
+                      if(p[pixel[7]] > cb)
+                        if(p[pixel[8]] > cb)
+                          if(p[pixel[9]] > cb)
+                            if(p[pixel[10]] > cb)
+                              if(p[pixel[11]] > cb)
+                                if(p[pixel[12]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
+                        continue;
                     else
-                     if(p[pixel[14]] > cb)
-                      if(p[pixel[15]] > cb)
-                       {}
-                      else
-                       continue;
-                     else
-                      continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else if(p[pixel[13]] < c_b)
-              if(p[pixel[11]] > cb)
-               if(p[pixel[5]] > cb)
-                if(p[pixel[6]] > cb)
-                 if(p[pixel[7]] > cb)
-                  if(p[pixel[8]] > cb)
-                   if(p[pixel[9]] > cb)
-                    if(p[pixel[10]] > cb)
-                     if(p[pixel[12]] > cb)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else if(p[pixel[11]] < c_b)
-               if(p[pixel[12]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       {}
-                      else
-                       continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       {}
+              else if(p[pixel[4]] < c_b)
+                if(p[pixel[5]] > cb)
+                  if(p[pixel[14]] > cb)
+                    if(p[pixel[7]] > cb)
+                      if(p[pixel[8]] > cb)
+                        if(p[pixel[9]] > cb)
+                          if(p[pixel[10]] > cb)
+                            if(p[pixel[11]] > cb)
+                              if(p[pixel[12]] > cb)
+                                if(p[pixel[13]] > cb)
+                                  if(p[pixel[6]] > cb)
+                                  {}
+                                  else
+                                    if(p[pixel[15]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              if(p[pixel[5]] > cb)
-               if(p[pixel[6]] > cb)
-                if(p[pixel[7]] > cb)
-                 if(p[pixel[8]] > cb)
-                  if(p[pixel[9]] > cb)
-                   if(p[pixel[10]] > cb)
-                    if(p[pixel[11]] > cb)
-                     if(p[pixel[12]] > cb)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-            else if(p[pixel[4]] < c_b)
-             if(p[pixel[5]] > cb)
-              if(p[pixel[14]] > cb)
-               if(p[pixel[7]] > cb)
-                if(p[pixel[8]] > cb)
-                 if(p[pixel[9]] > cb)
-                  if(p[pixel[10]] > cb)
-                   if(p[pixel[11]] > cb)
+                  else if(p[pixel[14]] < c_b)
                     if(p[pixel[12]] > cb)
-                     if(p[pixel[13]] > cb)
                       if(p[pixel[6]] > cb)
-                       {}
+                        if(p[pixel[7]] > cb)
+                          if(p[pixel[8]] > cb)
+                            if(p[pixel[9]] > cb)
+                              if(p[pixel[10]] > cb)
+                                if(p[pixel[11]] > cb)
+                                  if(p[pixel[13]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       if(p[pixel[15]] > cb)
-                        {}
-                       else
                         continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else if(p[pixel[14]] < c_b)
-               if(p[pixel[12]] > cb)
-                if(p[pixel[6]] > cb)
-                 if(p[pixel[7]] > cb)
-                  if(p[pixel[8]] > cb)
-                   if(p[pixel[9]] > cb)
-                    if(p[pixel[10]] > cb)
-                     if(p[pixel[11]] > cb)
-                      if(p[pixel[13]] > cb)
-                       {}
-                      else
-                       continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else if(p[pixel[12]] < c_b)
-                if(p[pixel[13]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       if(p[pixel[11]] < c_b)
+                    else if(p[pixel[12]] < c_b)
+                      if(p[pixel[13]] < c_b)
+                        if(p[pixel[15]] < c_b)
                         {}
-                       else
-                        continue;
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                    if(p[pixel[11]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               if(p[pixel[6]] > cb)
-                if(p[pixel[7]] > cb)
-                 if(p[pixel[8]] > cb)
-                  if(p[pixel[9]] > cb)
-                   if(p[pixel[10]] > cb)
-                    if(p[pixel[11]] > cb)
-                     if(p[pixel[12]] > cb)
-                      if(p[pixel[13]] > cb)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-             else if(p[pixel[5]] < c_b)
-              if(p[pixel[6]] > cb)
-               if(p[pixel[15]] < c_b)
-                if(p[pixel[13]] > cb)
-                 if(p[pixel[7]] > cb)
-                  if(p[pixel[8]] > cb)
-                   if(p[pixel[9]] > cb)
-                    if(p[pixel[10]] > cb)
-                     if(p[pixel[11]] > cb)
-                      if(p[pixel[12]] > cb)
-                       if(p[pixel[14]] > cb)
-                        {}
-                       else
-                        continue;
+                    if(p[pixel[6]] > cb)
+                      if(p[pixel[7]] > cb)
+                        if(p[pixel[8]] > cb)
+                          if(p[pixel[9]] > cb)
+                            if(p[pixel[10]] > cb)
+                              if(p[pixel[11]] > cb)
+                                if(p[pixel[12]] > cb)
+                                  if(p[pixel[13]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else if(p[pixel[13]] < c_b)
-                 if(p[pixel[14]] < c_b)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                if(p[pixel[7]] > cb)
-                 if(p[pixel[8]] > cb)
-                  if(p[pixel[9]] > cb)
-                   if(p[pixel[10]] > cb)
-                    if(p[pixel[11]] > cb)
-                     if(p[pixel[12]] > cb)
-                      if(p[pixel[13]] > cb)
-                       if(p[pixel[14]] > cb)
-                        {}
-                       else
                         continue;
-                      else
-                       continue;
-                     else
-                      continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-              else if(p[pixel[6]] < c_b)
-               if(p[pixel[7]] > cb)
-                if(p[pixel[14]] > cb)
-                 if(p[pixel[8]] > cb)
-                  if(p[pixel[9]] > cb)
-                   if(p[pixel[10]] > cb)
-                    if(p[pixel[11]] > cb)
-                     if(p[pixel[12]] > cb)
+                      continue;
+                else if(p[pixel[5]] < c_b)
+                  if(p[pixel[6]] > cb)
+                    if(p[pixel[15]] < c_b)
                       if(p[pixel[13]] > cb)
-                       if(p[pixel[15]] > cb)
+                        if(p[pixel[7]] > cb)
+                          if(p[pixel[8]] > cb)
+                            if(p[pixel[9]] > cb)
+                              if(p[pixel[10]] > cb)
+                                if(p[pixel[11]] > cb)
+                                  if(p[pixel[12]] > cb)
+                                    if(p[pixel[14]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else if(p[pixel[13]] < c_b)
+                        if(p[pixel[14]] < c_b)
                         {}
-                       else
-                        continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-               else if(p[pixel[7]] < c_b)
-                if(p[pixel[8]] < c_b)
-                 {}
-                else
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  continue;
-               else
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-              else
-               if(p[pixel[13]] > cb)
-                if(p[pixel[7]] > cb)
-                 if(p[pixel[8]] > cb)
-                  if(p[pixel[9]] > cb)
-                   if(p[pixel[10]] > cb)
-                    if(p[pixel[11]] > cb)
-                     if(p[pixel[12]] > cb)
+                      if(p[pixel[7]] > cb)
+                        if(p[pixel[8]] > cb)
+                          if(p[pixel[9]] > cb)
+                            if(p[pixel[10]] > cb)
+                              if(p[pixel[11]] > cb)
+                                if(p[pixel[12]] > cb)
+                                  if(p[pixel[13]] > cb)
+                                    if(p[pixel[14]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                  else if(p[pixel[6]] < c_b)
+                    if(p[pixel[7]] > cb)
                       if(p[pixel[14]] > cb)
-                       if(p[pixel[15]] > cb)
+                        if(p[pixel[8]] > cb)
+                          if(p[pixel[9]] > cb)
+                            if(p[pixel[10]] > cb)
+                              if(p[pixel[11]] > cb)
+                                if(p[pixel[12]] > cb)
+                                  if(p[pixel[13]] > cb)
+                                    if(p[pixel[15]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
                         {}
-                       else
+                        else
+                          continue;
+                      else
                         continue;
+                    else if(p[pixel[7]] < c_b)
+                      if(p[pixel[8]] < c_b)
+                      {}
                       else
-                       continue;
-                     else
-                      continue;
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[12]] > cb)
-               if(p[pixel[7]] > cb)
-                if(p[pixel[8]] > cb)
-                 if(p[pixel[9]] > cb)
-                  if(p[pixel[10]] > cb)
-                   if(p[pixel[11]] > cb)
                     if(p[pixel[13]] > cb)
-                     if(p[pixel[14]] > cb)
-                      if(p[pixel[6]] > cb)
-                       {}
+                      if(p[pixel[7]] > cb)
+                        if(p[pixel[8]] > cb)
+                          if(p[pixel[9]] > cb)
+                            if(p[pixel[10]] > cb)
+                              if(p[pixel[11]] > cb)
+                                if(p[pixel[12]] > cb)
+                                  if(p[pixel[14]] > cb)
+                                    if(p[pixel[15]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       if(p[pixel[15]] > cb)
-                        {}
-                       else
                         continue;
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       if(p[pixel[11]] < c_b)
+                    else if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
                         {}
-                       else
-                        continue;
+                        else
+                          continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
+                      continue;
                 else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-            else
-             if(p[pixel[11]] > cb)
-              if(p[pixel[7]] > cb)
-               if(p[pixel[8]] > cb)
-                if(p[pixel[9]] > cb)
-                 if(p[pixel[10]] > cb)
                   if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    if(p[pixel[6]] > cb)
-                     if(p[pixel[5]] > cb)
-                      {}
-                     else
-                      if(p[pixel[14]] > cb)
-                       {}
+                    if(p[pixel[7]] > cb)
+                      if(p[pixel[8]] > cb)
+                        if(p[pixel[9]] > cb)
+                          if(p[pixel[10]] > cb)
+                            if(p[pixel[11]] > cb)
+                              if(p[pixel[13]] > cb)
+                                if(p[pixel[14]] > cb)
+                                  if(p[pixel[6]] > cb)
+                                  {}
+                                  else
+                                    if(p[pixel[15]] > cb)
+                                    {}
+                                    else
+                                      continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                       else
-                       continue;
+                        continue;
                     else
-                     if(p[pixel[14]] > cb)
-                      if(p[pixel[15]] > cb)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       {}
+                  else if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                    if(p[pixel[11]] < c_b)
+                                    {}
+                                    else
+                                      continue;
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
                       else
-                       continue;
-                     else
-                      continue;
+                        continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      if(p[pixel[10]] < c_b)
-                       {}
-                      else
-                       continue;
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                continue;
+                    continue;
               else
-               continue;
-             else
-              continue;
-           else
-            if(p[pixel[10]] > cb)
-             if(p[pixel[7]] > cb)
-              if(p[pixel[8]] > cb)
-               if(p[pixel[9]] > cb)
                 if(p[pixel[11]] > cb)
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[6]] > cb)
-                   if(p[pixel[5]] > cb)
-                    if(p[pixel[4]] > cb)
-                     {}
+                  if(p[pixel[7]] > cb)
+                    if(p[pixel[8]] > cb)
+                      if(p[pixel[9]] > cb)
+                        if(p[pixel[10]] > cb)
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                              if(p[pixel[6]] > cb)
+                                if(p[pixel[5]] > cb)
+                                {}
+                                else
+                                  if(p[pixel[14]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                              else
+                                if(p[pixel[14]] > cb)
+                                  if(p[pixel[15]] > cb)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     if(p[pixel[13]] > cb)
-                      {}
-                     else
-                      continue;
-                   else
-                    if(p[pixel[13]] > cb)
-                     if(p[pixel[14]] > cb)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
                   else
-                   if(p[pixel[13]] > cb)
-                    if(p[pixel[14]] > cb)
-                     if(p[pixel[15]] > cb)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              continue;
-            else if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
-                      continue;
+                else if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                  if(p[pixel[10]] < c_b)
+                                  {}
+                                  else
+                                    continue;
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
                       continue;
-                    else
-                     continue;
-                   else
-                    continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     if(p[pixel[9]] < c_b)
-                      {}
-                     else
-                      continue;
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
                 else
-                 continue;
-              else
-               continue;
-             else
-              continue;
+                  continue;
             else
-             continue;
-          else
-           if(p[pixel[9]] > cb)
-            if(p[pixel[7]] > cb)
-             if(p[pixel[8]] > cb)
               if(p[pixel[10]] > cb)
-               if(p[pixel[11]] > cb)
-                if(p[pixel[6]] > cb)
-                 if(p[pixel[5]] > cb)
-                  if(p[pixel[4]] > cb)
-                   if(p[pixel[3]] > cb)
-                    {}
-                   else
-                    if(p[pixel[12]] > cb)
-                     {}
+                if(p[pixel[7]] > cb)
+                  if(p[pixel[8]] > cb)
+                    if(p[pixel[9]] > cb)
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[6]] > cb)
+                            if(p[pixel[5]] > cb)
+                              if(p[pixel[4]] > cb)
+                              {}
+                              else
+                                if(p[pixel[13]] > cb)
+                                {}
+                                else
+                                  continue;
+                            else
+                              if(p[pixel[13]] > cb)
+                                if(p[pixel[14]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                          else
+                            if(p[pixel[13]] > cb)
+                              if(p[pixel[14]] > cb)
+                                if(p[pixel[15]] > cb)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          continue;
+                      else
+                        continue;
                     else
-                     continue;
+                      continue;
                   else
-                   if(p[pixel[12]] > cb)
-                    if(p[pixel[13]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                 else
-                  if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    if(p[pixel[14]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
                 else
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[13]] > cb)
-                   if(p[pixel[14]] > cb)
-                    if(p[pixel[15]] > cb)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-               else
-                continue;
-              else
-               continue;
-             else
-              continue;
-            else
-             continue;
-           else if(p[pixel[9]] < c_b)
-            if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+              else if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                                if(p[pixel[9]] < c_b)
+                                {}
+                                else
+                                  continue;
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
                     continue;
-                  else
-                   continue;
-                 else
+                else
                   continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
+              else
+                continue;
+          else
+            if(p[pixel[9]] > cb)
+              if(p[pixel[7]] > cb)
+                if(p[pixel[8]] > cb)
+                  if(p[pixel[10]] > cb)
+                    if(p[pixel[11]] > cb)
+                      if(p[pixel[6]] > cb)
+                        if(p[pixel[5]] > cb)
+                          if(p[pixel[4]] > cb)
+                            if(p[pixel[3]] > cb)
+                            {}
+                            else
+                              if(p[pixel[12]] > cb)
+                              {}
+                              else
+                                continue;
+                          else
+                            if(p[pixel[12]] > cb)
+                              if(p[pixel[13]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                        else
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                              if(p[pixel[14]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[13]] > cb)
+                            if(p[pixel[14]] > cb)
+                              if(p[pixel[15]] > cb)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
                     else
-                     continue;
-                   else
-                    continue;
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
+                  continue;
               else
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    if(p[pixel[8]] < c_b)
-                     {}
-                    else
-                     continue;
-                   else
-                    continue;
+                continue;
+            else if(p[pixel[9]] < c_b)
+              if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                              if(p[pixel[8]] < c_b)
+                              {}
+                              else
+                                continue;
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 continue;
-               else
+                  continue;
+              else
                 continue;
-             else
-              continue;
             else
-             continue;
-           else
-            continue;
-         else
+              continue;
+        else
           if(p[pixel[8]] > cb)
-           if(p[pixel[7]] > cb)
-            if(p[pixel[9]] > cb)
-             if(p[pixel[10]] > cb)
-              if(p[pixel[6]] > cb)
-               if(p[pixel[5]] > cb)
-                if(p[pixel[4]] > cb)
-                 if(p[pixel[3]] > cb)
-                  if(p[pixel[2]] > cb)
-                   {}
-                  else
-                   if(p[pixel[11]] > cb)
-                    {}
-                   else
-                    continue;
-                 else
-                  if(p[pixel[11]] > cb)
-                   if(p[pixel[12]] > cb)
-                    {}
-                   else
-                    continue;
+            if(p[pixel[7]] > cb)
+              if(p[pixel[9]] > cb)
+                if(p[pixel[10]] > cb)
+                  if(p[pixel[6]] > cb)
+                    if(p[pixel[5]] > cb)
+                      if(p[pixel[4]] > cb)
+                        if(p[pixel[3]] > cb)
+                          if(p[pixel[2]] > cb)
+                          {}
+                          else
+                            if(p[pixel[11]] > cb)
+                            {}
+                            else
+                              continue;
+                        else
+                          if(p[pixel[11]] > cb)
+                            if(p[pixel[12]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[11]] > cb)
+                          if(p[pixel[12]] > cb)
+                            if(p[pixel[13]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[13]] > cb)
+                            if(p[pixel[14]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[11]] > cb)
+                      if(p[pixel[12]] > cb)
+                        if(p[pixel[13]] > cb)
+                          if(p[pixel[14]] > cb)
+                            if(p[pixel[15]] > cb)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[11]] > cb)
-                  if(p[pixel[12]] > cb)
-                   if(p[pixel[13]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
                   continue;
-               else
-                if(p[pixel[11]] > cb)
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[13]] > cb)
-                   if(p[pixel[14]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[11]] > cb)
-                if(p[pixel[12]] > cb)
-                 if(p[pixel[13]] > cb)
-                  if(p[pixel[14]] > cb)
-                   if(p[pixel[15]] > cb)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-             else
-              continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else if(p[pixel[8]] < c_b)
-           if(p[pixel[9]] < c_b)
-            if(p[pixel[10]] < c_b)
-             if(p[pixel[11]] < c_b)
-              if(p[pixel[12]] < c_b)
-               if(p[pixel[13]] < c_b)
-                if(p[pixel[14]] < c_b)
-                 if(p[pixel[15]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
+            if(p[pixel[9]] < c_b)
+              if(p[pixel[10]] < c_b)
+                if(p[pixel[11]] < c_b)
+                  if(p[pixel[12]] < c_b)
+                    if(p[pixel[13]] < c_b)
+                      if(p[pixel[14]] < c_b)
+                        if(p[pixel[15]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                      else
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
+                  if(p[pixel[2]] < c_b)
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[4]] < c_b)
+                        if(p[pixel[5]] < c_b)
+                          if(p[pixel[6]] < c_b)
+                            if(p[pixel[7]] < c_b)
+                            {}
+                            else
+                              continue;
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
                     continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
               else
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[2]] < c_b)
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[4]] < c_b)
-                 if(p[pixel[5]] < c_b)
-                  if(p[pixel[6]] < c_b)
-                   if(p[pixel[7]] < c_b)
-                    {}
-                   else
-                    continue;
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
                 continue;
-              else
-               continue;
             else
-             continue;
-           else
-            continue;
+              continue;
           else
-           continue;
-        else
-         if(p[pixel[7]] > cb)
+            continue;
+      else
+        if(p[pixel[7]] > cb)
           if(p[pixel[8]] > cb)
-           if(p[pixel[9]] > cb)
-            if(p[pixel[6]] > cb)
-             if(p[pixel[5]] > cb)
-              if(p[pixel[4]] > cb)
-               if(p[pixel[3]] > cb)
-                if(p[pixel[2]] > cb)
-                 if(p[pixel[1]] > cb)
-                  {}
-                 else
-                  if(p[pixel[10]] > cb)
-                   {}
+            if(p[pixel[9]] > cb)
+              if(p[pixel[6]] > cb)
+                if(p[pixel[5]] > cb)
+                  if(p[pixel[4]] > cb)
+                    if(p[pixel[3]] > cb)
+                      if(p[pixel[2]] > cb)
+                        if(p[pixel[1]] > cb)
+                        {}
+                        else
+                          if(p[pixel[10]] > cb)
+                          {}
+                          else
+                            continue;
+                      else
+                        if(p[pixel[10]] > cb)
+                          if(p[pixel[11]] > cb)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[10]] > cb)
+                        if(p[pixel[11]] > cb)
+                          if(p[pixel[12]] > cb)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[10]] > cb)
+                      if(p[pixel[11]] > cb)
+                        if(p[pixel[12]] > cb)
+                          if(p[pixel[13]] > cb)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[10]] > cb)
-                  if(p[pixel[11]] > cb)
-                   {}
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[10]] > cb)
-                 if(p[pixel[11]] > cb)
-                  if(p[pixel[12]] > cb)
-                   {}
+                  if(p[pixel[10]] > cb)
+                    if(p[pixel[11]] > cb)
+                      if(p[pixel[12]] > cb)
+                        if(p[pixel[13]] > cb)
+                          if(p[pixel[14]] > cb)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
+                    continue;
               else
-               if(p[pixel[10]] > cb)
-                if(p[pixel[11]] > cb)
-                 if(p[pixel[12]] > cb)
-                  if(p[pixel[13]] > cb)
-                   {}
+                if(p[pixel[10]] > cb)
+                  if(p[pixel[11]] > cb)
+                    if(p[pixel[12]] > cb)
+                      if(p[pixel[13]] > cb)
+                        if(p[pixel[14]] > cb)
+                          if(p[pixel[15]] > cb)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[10]] > cb)
-               if(p[pixel[11]] > cb)
-                if(p[pixel[12]] > cb)
-                 if(p[pixel[13]] > cb)
-                  if(p[pixel[14]] > cb)
-                   {}
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
             else
-             if(p[pixel[10]] > cb)
-              if(p[pixel[11]] > cb)
-               if(p[pixel[12]] > cb)
-                if(p[pixel[13]] > cb)
-                 if(p[pixel[14]] > cb)
-                  if(p[pixel[15]] > cb)
-                   {}
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
               continue;
-           else
-            continue;
           else
-           continue;
-         else if(p[pixel[7]] < c_b)
+            continue;
+        else if(p[pixel[7]] < c_b)
           if(p[pixel[8]] < c_b)
-           if(p[pixel[9]] < c_b)
-            if(p[pixel[6]] < c_b)
-             if(p[pixel[5]] < c_b)
-              if(p[pixel[4]] < c_b)
-               if(p[pixel[3]] < c_b)
-                if(p[pixel[2]] < c_b)
-                 if(p[pixel[1]] < c_b)
-                  {}
-                 else
-                  if(p[pixel[10]] < c_b)
-                   {}
+            if(p[pixel[9]] < c_b)
+              if(p[pixel[6]] < c_b)
+                if(p[pixel[5]] < c_b)
+                  if(p[pixel[4]] < c_b)
+                    if(p[pixel[3]] < c_b)
+                      if(p[pixel[2]] < c_b)
+                        if(p[pixel[1]] < c_b)
+                        {}
+                        else
+                          if(p[pixel[10]] < c_b)
+                          {}
+                          else
+                            continue;
+                      else
+                        if(p[pixel[10]] < c_b)
+                          if(p[pixel[11]] < c_b)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                    else
+                      if(p[pixel[10]] < c_b)
+                        if(p[pixel[11]] < c_b)
+                          if(p[pixel[12]] < c_b)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
                   else
-                   continue;
+                    if(p[pixel[10]] < c_b)
+                      if(p[pixel[11]] < c_b)
+                        if(p[pixel[12]] < c_b)
+                          if(p[pixel[13]] < c_b)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                 else
-                 if(p[pixel[10]] < c_b)
-                  if(p[pixel[11]] < c_b)
-                   {}
-                  else
-                   continue;
-                 else
-                  continue;
-               else
-                if(p[pixel[10]] < c_b)
-                 if(p[pixel[11]] < c_b)
-                  if(p[pixel[12]] < c_b)
-                   {}
+                  if(p[pixel[10]] < c_b)
+                    if(p[pixel[11]] < c_b)
+                      if(p[pixel[12]] < c_b)
+                        if(p[pixel[13]] < c_b)
+                          if(p[pixel[14]] < c_b)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
+                    continue;
               else
-               if(p[pixel[10]] < c_b)
-                if(p[pixel[11]] < c_b)
-                 if(p[pixel[12]] < c_b)
-                  if(p[pixel[13]] < c_b)
-                   {}
+                if(p[pixel[10]] < c_b)
+                  if(p[pixel[11]] < c_b)
+                    if(p[pixel[12]] < c_b)
+                      if(p[pixel[13]] < c_b)
+                        if(p[pixel[14]] < c_b)
+                          if(p[pixel[15]] < c_b)
+                          {}
+                          else
+                            continue;
+                        else
+                          continue;
+                      else
+                        continue;
+                    else
+                      continue;
                   else
-                   continue;
-                 else
-                  continue;
+                    continue;
                 else
-                 continue;
-               else
-                continue;
-             else
-              if(p[pixel[10]] < c_b)
-               if(p[pixel[11]] < c_b)
-                if(p[pixel[12]] < c_b)
-                 if(p[pixel[13]] < c_b)
-                  if(p[pixel[14]] < c_b)
-                   {}
-                  else
-                   continue;
-                 else
                   continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
             else
-             if(p[pixel[10]] < c_b)
-              if(p[pixel[11]] < c_b)
-               if(p[pixel[12]] < c_b)
-                if(p[pixel[13]] < c_b)
-                 if(p[pixel[14]] < c_b)
-                  if(p[pixel[15]] < c_b)
-                   {}
-                  else
-                   continue;
-                 else
-                  continue;
-                else
-                 continue;
-               else
-                continue;
-              else
-               continue;
-             else
               continue;
-           else
-            continue;
           else
-           continue;
-         else
+            continue;
+        else
           continue;
-			if(num_corners == rsize)
-			{
-				rsize*=2;
-				ret_corners = (xy*)realloc(ret_corners, sizeof(xy)*rsize);
-			}
-			ret_corners[num_corners].x = x;
-			ret_corners[num_corners].y = y;
-			num_corners++;
+      if(num_corners == rsize)
+      {
+        rsize*=2;
+        ret_corners = (xy*)realloc(ret_corners, sizeof(xy)*rsize);
+      }
+      ret_corners[num_corners].x = x;
+      ret_corners[num_corners].y = y;
+      num_corners++;
 
-		}
+    }
 
-	*ret_num_corners = num_corners;
-	return ret_corners;
+  *ret_num_corners = num_corners;
+  return ret_corners;
 
 }
 
diff --git a/third_party/aom/third_party/googletest/README.libaom b/third_party/aom/third_party/googletest/README.libaom
index a53d7e008..9784dd51b 100644
--- a/third_party/aom/third_party/googletest/README.libaom
+++ b/third_party/aom/third_party/googletest/README.libaom
@@ -22,3 +22,5 @@ Local Modifications:
     CONTRIBUTORS
     LICENSE
     README.md
+- Suppress unsigned overflow instrumentation in the LCG
+  https://github.com/google/googletest/pull/1066
diff --git a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
index 0094ed507..da57e65d3 100644
--- a/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
+++ b/third_party/aom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
@@ -985,6 +985,19 @@ using ::std::tuple_size;
 # define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
 #endif  // __clang__
 
+// A function level attribute to disable UndefinedBehaviorSanitizer's (defined)
+// unsigned integer overflow instrumentation.
+#if defined(__clang__)
+# if defined(__has_attribute) && __has_attribute(no_sanitize)
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_ \
+       __attribute__((no_sanitize("unsigned-integer-overflow")))
+# else
+#  define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
+# endif  // defined(__has_attribute) && __has_attribute(no_sanitize)
+#else
+# define GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
+#endif  // __clang__
+
 namespace testing {
 
 class Message;
diff --git a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc b/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc
index d882ab2e3..5a8932c73 100644
--- a/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc
+++ b/third_party/aom/third_party/googletest/src/googletest/src/gtest.cc
@@ -308,6 +308,7 @@ namespace internal {
 // Generates a random number from [0, range), using a Linear
 // Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
 // than kMaxRange.
+GTEST_ATTRIBUTE_NO_SANITIZE_UNSIGNED_OVERFLOW_
 UInt32 Random::Generate(UInt32 range) {
   // These constants are the same as are used in glibc's rand(3).
   state_ = (1103515245U*state_ + 12345U) % kMaxRange;
diff --git a/third_party/aom/third_party/libwebm/README.libaom b/third_party/aom/third_party/libwebm/README.libaom
index e9c4046c2..df543c44c 100644
--- a/third_party/aom/third_party/libwebm/README.libaom
+++ b/third_party/aom/third_party/libwebm/README.libaom
@@ -1,5 +1,5 @@
 URL: https://chromium.googlesource.com/webm/libwebm
-Version: a97c484bfd6b5de4b1b61efe33089b55d810b412
+Version: 7baf4cb898f5e39fcdca2d4583fd2b14f488c725
 License: BSD
 License File: LICENSE.txt
 
diff --git a/third_party/aom/third_party/libwebm/common/file_util.cc b/third_party/aom/third_party/libwebm/common/file_util.cc
index 6dab146dd..618ffc087 100644
--- a/third_party/aom/third_party/libwebm/common/file_util.cc
+++ b/third_party/aom/third_party/libwebm/common/file_util.cc
@@ -17,6 +17,7 @@
 #include <cstring>
 #include <fstream>
 #include <ios>
+#include <string>
 
 namespace libwebm {
 
@@ -41,7 +42,12 @@ std::string GetTempFileName() {
   return temp_file_name;
 #else
   char tmp_file_name[_MAX_PATH];
+#if defined _MSC_VER || defined MINGW_HAS_SECURE_API
   errno_t err = tmpnam_s(tmp_file_name);
+#else
+  char* fname_pointer = tmpnam(tmp_file_name);
+  errno_t err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1;
+#endif
   if (err == 0) {
     return std::string(tmp_file_name);
   }
@@ -65,6 +71,15 @@ uint64_t GetFileSize(const std::string& file_name) {
   return file_size;
 }
 
+bool GetFileContents(const std::string& file_name, std::string* contents) {
+  std::ifstream file(file_name.c_str());
+  *contents = std::string(static_cast<size_t>(GetFileSize(file_name)), 0);
+  if (file.good() && contents->size()) {
+    file.read(&(*contents)[0], contents->size());
+  }
+  return !file.fail();
+}
+
 TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); }
 
 TempFileDeleter::~TempFileDeleter() {
diff --git a/third_party/aom/third_party/libwebm/common/file_util.h b/third_party/aom/third_party/libwebm/common/file_util.h
index ed89ef3f7..a87373464 100644
--- a/third_party/aom/third_party/libwebm/common/file_util.h
+++ b/third_party/aom/third_party/libwebm/common/file_util.h
@@ -22,6 +22,9 @@ std::string GetTempFileName();
 // Returns size of file specified by |file_name|, or 0 upon failure.
 uint64_t GetFileSize(const std::string& file_name);
 
+// Gets the contents file_name as a string. Returns false on error.
+bool GetFileContents(const std::string& file_name, std::string* contents);
+
 // Manages life of temporary file specified at time of construction. Deletes
 // file upon destruction.
 class TempFileDeleter {
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc
index 30252bc9d..27e831023 100644
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxer.cc
@@ -8,6 +8,8 @@
 
 #include "mkvmuxer/mkvmuxer.h"
 
+#include <stdint.h>
+
 #include <cfloat>
 #include <climits>
 #include <cstdio>
@@ -2667,7 +2669,7 @@ bool Cluster::QueueOrWriteFrame(const Frame* const frame) {
   // and write it if it is okay to do so (i.e.) no other track has an held back
   // frame with timestamp <= the timestamp of the frame in question.
   std::vector<std::list<Frame*>::iterator> frames_to_erase;
-  for (std::list<Frame *>::iterator
+  for (std::list<Frame*>::iterator
            current_track_iterator = stored_frames_[track_number].begin(),
            end = --stored_frames_[track_number].end();
        current_track_iterator != end; ++current_track_iterator) {
diff --git a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
index bd98b1104..355d4e22b 100644
--- a/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
+++ b/third_party/aom/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
@@ -10,6 +10,7 @@
 
 #ifdef __ANDROID__
 #include <fcntl.h>
+#include <unistd.h>
 #endif
 
 #include <cassert>
diff --git a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc b/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc
index 37f230d0a..1eeaa1365 100644
--- a/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc
+++ b/third_party/aom/third_party/libwebm/mkvparser/mkvparser.cc
@@ -5035,6 +5035,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
       double value = 0;
       const long long value_parse_status =
           UnserializeFloat(reader, read_pos, child_size, value);
+      if (value < -FLT_MAX || value > FLT_MAX ||
+          (value > 0.0 && value < FLT_MIN)) {
+        return false;
+      }
       mm_ptr->luminance_max = static_cast<float>(value);
       if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
           mm_ptr->luminance_max > 9999.99) {
@@ -5044,6 +5048,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
       double value = 0;
       const long long value_parse_status =
           UnserializeFloat(reader, read_pos, child_size, value);
+      if (value < -FLT_MAX || value > FLT_MAX ||
+          (value > 0.0 && value < FLT_MIN)) {
+        return false;
+      }
       mm_ptr->luminance_min = static_cast<float>(value);
       if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
           mm_ptr->luminance_min > 999.9999) {
@@ -7903,6 +7911,10 @@ long Block::Parse(const Cluster* pCluster) {
         return E_FILE_FORMAT_INVALID;
 
       curr.len = static_cast<long>(frame_size);
+      // Check if size + curr.len could overflow.
+      if (size > LLONG_MAX - curr.len) {
+        return E_FILE_FORMAT_INVALID;
+      }
       size += curr.len;  // contribution of this frame
 
       --frame_count;
@@ -7981,6 +7993,10 @@ long long Block::GetTime(const Cluster* pCluster) const {
   const long long scale = pInfo->GetTimeCodeScale();
   assert(scale >= 1);
 
+  // Check if tc * scale could overflow.
+  if (tc != 0 && scale > LLONG_MAX / tc) {
+    return -1;
+  }
   const long long ns = tc * scale;
 
   return ns;
diff --git a/third_party/aom/third_party/vector/vector.c b/third_party/aom/third_party/vector/vector.c
new file mode 100644
index 000000000..2f0a38e86
--- /dev/null
+++ b/third_party/aom/third_party/vector/vector.c
@@ -0,0 +1,543 @@
+/*
+The MIT License(MIT)
+Copyright(c) 2016 Peter Goldsborough
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files(the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions :
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#define __STDC_WANT_LIB_EXT1__ 1
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/vector/vector.h"
+
+int vector_setup(Vector *vector, size_t capacity, size_t element_size) {
+  assert(vector != NULL);
+
+  if (vector == NULL) return VECTOR_ERROR;
+
+  vector->size = 0;
+  vector->capacity = MAX(VECTOR_MINIMUM_CAPACITY, capacity);
+  vector->element_size = element_size;
+  vector->data = malloc(vector->capacity * element_size);
+
+  return vector->data == NULL ? VECTOR_ERROR : VECTOR_SUCCESS;
+}
+
+int vector_copy(Vector *destination, Vector *source) {
+  assert(destination != NULL);
+  assert(source != NULL);
+  assert(vector_is_initialized(source));
+  assert(!vector_is_initialized(destination));
+
+  if (destination == NULL) return VECTOR_ERROR;
+  if (source == NULL) return VECTOR_ERROR;
+  if (vector_is_initialized(destination)) return VECTOR_ERROR;
+  if (!vector_is_initialized(source)) return VECTOR_ERROR;
+
+  /* Copy ALL the data */
+  destination->size = source->size;
+  destination->capacity = source->size * 2;
+  destination->element_size = source->element_size;
+
+  /* Note that we are not necessarily allocating the same capacity */
+  destination->data = malloc(destination->capacity * source->element_size);
+  if (destination->data == NULL) return VECTOR_ERROR;
+
+  memcpy(destination->data, source->data, vector_byte_size(source));
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_copy_assign(Vector *destination, Vector *source) {
+  assert(destination != NULL);
+  assert(source != NULL);
+  assert(vector_is_initialized(source));
+  assert(vector_is_initialized(destination));
+
+  if (destination == NULL) return VECTOR_ERROR;
+  if (source == NULL) return VECTOR_ERROR;
+  if (!vector_is_initialized(destination)) return VECTOR_ERROR;
+  if (!vector_is_initialized(source)) return VECTOR_ERROR;
+
+  vector_destroy(destination);
+
+  return vector_copy(destination, source);
+}
+
+int vector_move(Vector *destination, Vector *source) {
+  assert(destination != NULL);
+  assert(source != NULL);
+
+  if (destination == NULL) return VECTOR_ERROR;
+  if (source == NULL) return VECTOR_ERROR;
+
+  *destination = *source;
+  source->data = NULL;
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_move_assign(Vector *destination, Vector *source) {
+  vector_swap(destination, source);
+  return vector_destroy(source);
+}
+
+int vector_swap(Vector *destination, Vector *source) {
+  void *temp;
+
+  assert(destination != NULL);
+  assert(source != NULL);
+  assert(vector_is_initialized(source));
+  assert(vector_is_initialized(destination));
+
+  if (destination == NULL) return VECTOR_ERROR;
+  if (source == NULL) return VECTOR_ERROR;
+  if (!vector_is_initialized(destination)) return VECTOR_ERROR;
+  if (!vector_is_initialized(source)) return VECTOR_ERROR;
+
+  _vector_swap(&destination->size, &source->size);
+  _vector_swap(&destination->capacity, &source->capacity);
+  _vector_swap(&destination->element_size, &source->element_size);
+
+  temp = destination->data;
+  destination->data = source->data;
+  source->data = temp;
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_destroy(Vector *vector) {
+  assert(vector != NULL);
+
+  if (vector == NULL) return VECTOR_ERROR;
+
+  free(vector->data);
+  vector->data = NULL;
+
+  return VECTOR_SUCCESS;
+}
+
+/* Insertion */
+int vector_push_back(Vector *vector, void *element) {
+  assert(vector != NULL);
+  assert(element != NULL);
+
+  if (_vector_should_grow(vector)) {
+    if (_vector_adjust_capacity(vector) == VECTOR_ERROR) {
+      return VECTOR_ERROR;
+    }
+  }
+
+  _vector_assign(vector, vector->size, element);
+
+  ++vector->size;
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_push_front(Vector *vector, void *element) {
+  return vector_insert(vector, 0, element);
+}
+
+int vector_insert(Vector *vector, size_t index, void *element) {
+  void *offset;
+
+  assert(vector != NULL);
+  assert(element != NULL);
+  assert(index <= vector->size);
+
+  if (vector == NULL) return VECTOR_ERROR;
+  if (element == NULL) return VECTOR_ERROR;
+  if (vector->element_size == 0) return VECTOR_ERROR;
+  if (index > vector->size) return VECTOR_ERROR;
+
+  if (_vector_should_grow(vector)) {
+    if (_vector_adjust_capacity(vector) == VECTOR_ERROR) {
+      return VECTOR_ERROR;
+    }
+  }
+
+  /* Move other elements to the right */
+  if (_vector_move_right(vector, index) == VECTOR_ERROR) {
+    return VECTOR_ERROR;
+  }
+
+  /* Insert the element */
+  offset = _vector_offset(vector, index);
+  memcpy(offset, element, vector->element_size);
+  ++vector->size;
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_assign(Vector *vector, size_t index, void *element) {
+  assert(vector != NULL);
+  assert(element != NULL);
+  assert(index < vector->size);
+
+  if (vector == NULL) return VECTOR_ERROR;
+  if (element == NULL) return VECTOR_ERROR;
+  if (vector->element_size == 0) return VECTOR_ERROR;
+  if (index >= vector->size) return VECTOR_ERROR;
+
+  _vector_assign(vector, index, element);
+
+  return VECTOR_SUCCESS;
+}
+
+/* Deletion */
+int vector_pop_back(Vector *vector) {
+  assert(vector != NULL);
+  assert(vector->size > 0);
+
+  if (vector == NULL) return VECTOR_ERROR;
+  if (vector->element_size == 0) return VECTOR_ERROR;
+
+  --vector->size;
+
+#ifndef VECTOR_NO_SHRINK
+  if (_vector_should_shrink(vector)) {
+    _vector_adjust_capacity(vector);
+  }
+#endif
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_pop_front(Vector *vector) { return vector_erase(vector, 0); }
+
+int vector_erase(Vector *vector, size_t index) {
+  assert(vector != NULL);
+  assert(index < vector->size);
+
+  if (vector == NULL) return VECTOR_ERROR;
+  if (vector->element_size == 0) return VECTOR_ERROR;
+  if (index >= vector->size) return VECTOR_ERROR;
+
+  /* Just overwrite */
+  _vector_move_left(vector, index);
+
+#ifndef VECTOR_NO_SHRINK
+  if (--vector->size == vector->capacity / 4) {
+    _vector_adjust_capacity(vector);
+  }
+#endif
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_clear(Vector *vector) { return vector_resize(vector, 0); }
+
+/* Lookup */
+void *vector_get(Vector *vector, size_t index) {
+  assert(vector != NULL);
+  assert(index < vector->size);
+
+  if (vector == NULL) return NULL;
+  if (vector->element_size == 0) return NULL;
+  if (index >= vector->size) return NULL;
+
+  return _vector_offset(vector, index);
+}
+
+const void *vector_const_get(const Vector *vector, size_t index) {
+  assert(vector != NULL);
+  assert(index < vector->size);
+
+  if (vector == NULL) return NULL;
+  if (vector->element_size == 0) return NULL;
+  if (index >= vector->size) return NULL;
+
+  return _vector_const_offset(vector, index);
+}
+
+void *vector_front(Vector *vector) { return vector_get(vector, 0); }
+
+void *vector_back(Vector *vector) {
+  return vector_get(vector, vector->size - 1);
+}
+
+/* Information */
+
+bool vector_is_initialized(const Vector *vector) {
+  return vector->data != NULL;
+}
+
+size_t vector_byte_size(const Vector *vector) {
+  return vector->size * vector->element_size;
+}
+
+size_t vector_free_space(const Vector *vector) {
+  return vector->capacity - vector->size;
+}
+
+bool vector_is_empty(const Vector *vector) { return vector->size == 0; }
+
+/* Memory management */
+int vector_resize(Vector *vector, size_t new_size) {
+  if (new_size <= vector->capacity * VECTOR_SHRINK_THRESHOLD) {
+    vector->size = new_size;
+    if (_vector_reallocate(vector, new_size * VECTOR_GROWTH_FACTOR) == -1) {
+      return VECTOR_ERROR;
+    }
+  } else if (new_size > vector->capacity) {
+    if (_vector_reallocate(vector, new_size * VECTOR_GROWTH_FACTOR) == -1) {
+      return VECTOR_ERROR;
+    }
+  }
+
+  vector->size = new_size;
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_reserve(Vector *vector, size_t minimum_capacity) {
+  if (minimum_capacity > vector->capacity) {
+    if (_vector_reallocate(vector, minimum_capacity) == VECTOR_ERROR) {
+      return VECTOR_ERROR;
+    }
+  }
+
+  return VECTOR_SUCCESS;
+}
+
+int vector_shrink_to_fit(Vector *vector) {
+  return _vector_reallocate(vector, vector->size);
+}
+
+/* Iterators */
+Iterator vector_begin(Vector *vector) { return vector_iterator(vector, 0); }
+
+Iterator vector_end(Vector *vector) {
+  return vector_iterator(vector, vector->size);
+}
+
+Iterator vector_iterator(Vector *vector, size_t index) {
+  Iterator iterator = { NULL, 0 };
+
+  assert(vector != NULL);
+  assert(index <= vector->size);
+
+  if (vector == NULL) return iterator;
+  if (index > vector->size) return iterator;
+  if (vector->element_size == 0) return iterator;
+
+  iterator.pointer = _vector_offset(vector, index);
+  iterator.element_size = vector->element_size;
+
+  return iterator;
+}
+
+void *iterator_get(Iterator *iterator) { return iterator->pointer; }
+
+int iterator_erase(Vector *vector, Iterator *iterator) {
+  size_t index = iterator_index(vector, iterator);
+
+  if (vector_erase(vector, index) == VECTOR_ERROR) {
+    return VECTOR_ERROR;
+  }
+
+  *iterator = vector_iterator(vector, index);
+
+  return VECTOR_SUCCESS;
+}
+
+void iterator_increment(Iterator *iterator) {
+  assert(iterator != NULL);
+  // iterator->pointer += iterator->element_size;
+  iterator->pointer =
+      (unsigned char *)iterator->pointer + iterator->element_size;
+}
+
+void iterator_decrement(Iterator *iterator) {
+  assert(iterator != NULL);
+  // iterator->pointer -= iterator->element_size;
+  iterator->pointer =
+      (unsigned char *)iterator->pointer - iterator->element_size;
+}
+
+void *iterator_next(Iterator *iterator) {
+  void *current = iterator->pointer;
+  iterator_increment(iterator);
+
+  return current;
+}
+
+void *iterator_previous(Iterator *iterator) {
+  void *current = iterator->pointer;
+  iterator_decrement(iterator);
+
+  return current;
+}
+
+bool iterator_equals(Iterator *first, Iterator *second) {
+  assert(first->element_size == second->element_size);
+  return first->pointer == second->pointer;
+}
+
+bool iterator_is_before(Iterator *first, Iterator *second) {
+  assert(first->element_size == second->element_size);
+  return first->pointer < second->pointer;
+}
+
+bool iterator_is_after(Iterator *first, Iterator *second) {
+  assert(first->element_size == second->element_size);
+  return first->pointer > second->pointer;
+}
+
+size_t iterator_index(Vector *vector, Iterator *iterator) {
+  assert(vector != NULL);
+  assert(iterator != NULL);
+  // return (iterator->pointer - vector->data) / vector->element_size;
+  return ((unsigned char *)iterator->pointer - (unsigned char *)vector->data) /
+         vector->element_size;
+}
+
+/***** PRIVATE *****/
+
+bool _vector_should_grow(Vector *vector) {
+  assert(vector->size <= vector->capacity);
+  return vector->size == vector->capacity;
+}
+
+bool _vector_should_shrink(Vector *vector) {
+  assert(vector->size <= vector->capacity);
+  return vector->size == vector->capacity * VECTOR_SHRINK_THRESHOLD;
+}
+
+size_t _vector_free_bytes(const Vector *vector) {
+  return vector_free_space(vector) * vector->element_size;
+}
+
+void *_vector_offset(Vector *vector, size_t index) {
+  // return vector->data + (index * vector->element_size);
+  return (unsigned char *)vector->data + (index * vector->element_size);
+}
+
+const void *_vector_const_offset(const Vector *vector, size_t index) {
+  // return vector->data + (index * vector->element_size);
+  return (unsigned char *)vector->data + (index * vector->element_size);
+}
+
+void _vector_assign(Vector *vector, size_t index, void *element) {
+  /* Insert the element */
+  void *offset = _vector_offset(vector, index);
+  memcpy(offset, element, vector->element_size);
+}
+
+int _vector_move_right(Vector *vector, size_t index) {
+  assert(vector->size < vector->capacity);
+
+  /* The location where to start to move from. */
+  void *offset = _vector_offset(vector, index);
+
+  /* How many to move to the right. */
+  size_t elements_in_bytes = (vector->size - index) * vector->element_size;
+
+#ifdef __STDC_LIB_EXT1__
+  size_t right_capacity_in_bytes =
+      (vector->capacity - (index + 1)) * vector->element_size;
+
+  /* clang-format off */
+    int return_code =  memmove_s(
+        offset + vector->element_size,
+        right_capacity_in_bytes,
+        offset,
+        elements_in_bytes);
+
+  /* clang-format on */
+
+  return return_code == 0 ? VECTOR_SUCCESS : VECTOR_ERROR;
+
+#else
+  // memmove(offset + vector->element_size, offset, elements_in_bytes);
+  memmove((unsigned char *)offset + vector->element_size, offset,
+          elements_in_bytes);
+  return VECTOR_SUCCESS;
+#endif
+}
+
+void _vector_move_left(Vector *vector, size_t index) {
+  size_t right_elements_in_bytes;
+  void *offset;
+
+  /* The offset into the memory */
+  offset = _vector_offset(vector, index);
+
+  /* How many to move to the left */
+  right_elements_in_bytes = (vector->size - index - 1) * vector->element_size;
+
+  // memmove(offset, offset + vector->element_size, right_elements_in_bytes);
+  memmove(offset, (unsigned char *)offset + vector->element_size,
+          right_elements_in_bytes);
+}
+
+int _vector_adjust_capacity(Vector *vector) {
+  return _vector_reallocate(vector,
+                            MAX(1, vector->size * VECTOR_GROWTH_FACTOR));
+}
+
+int _vector_reallocate(Vector *vector, size_t new_capacity) {
+  size_t new_capacity_in_bytes;
+  void *old;
+  assert(vector != NULL);
+
+  if (new_capacity < VECTOR_MINIMUM_CAPACITY) {
+    if (vector->capacity > VECTOR_MINIMUM_CAPACITY) {
+      new_capacity = VECTOR_MINIMUM_CAPACITY;
+    } else {
+      /* NO-OP */
+      return VECTOR_SUCCESS;
+    }
+  }
+
+  new_capacity_in_bytes = new_capacity * vector->element_size;
+  old = vector->data;
+
+  if ((vector->data = malloc(new_capacity_in_bytes)) == NULL) {
+    return VECTOR_ERROR;
+  }
+
+#ifdef __STDC_LIB_EXT1__
+  /* clang-format off */
+    if (memcpy_s(vector->data,
+                             new_capacity_in_bytes,
+                             old,
+                             vector_byte_size(vector)) != 0) {
+        return VECTOR_ERROR;
+    }
+/* clang-format on */
+#else
+  memcpy(vector->data, old, vector_byte_size(vector));
+#endif
+
+  vector->capacity = new_capacity;
+
+  free(old);
+
+  return VECTOR_SUCCESS;
+}
+
+void _vector_swap(size_t *first, size_t *second) {
+  size_t temp = *first;
+  *first = *second;
+  *second = temp;
+}
diff --git a/third_party/aom/third_party/vector/vector.h b/third_party/aom/third_party/vector/vector.h
new file mode 100644
index 000000000..2bf1a9a8d
--- /dev/null
+++ b/third_party/aom/third_party/vector/vector.h
@@ -0,0 +1,159 @@
+/*
+The MIT License(MIT)
+Copyright(c) 2016 Peter Goldsborough
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files(the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions :
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef VECTOR_H
+#define VECTOR_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/***** DEFINITIONS *****/
+
+#define VECTOR_MINIMUM_CAPACITY 2
+#define VECTOR_GROWTH_FACTOR 2
+#define VECTOR_SHRINK_THRESHOLD (1 / 4)
+
+#define VECTOR_ERROR -1
+#define VECTOR_SUCCESS 0
+
+#define VECTOR_UNINITIALIZED NULL
+#define VECTOR_INITIALIZER \
+  { 0, 0, 0, VECTOR_UNINITIALIZED }
+
+/***** STRUCTURES *****/
+
+typedef struct Vector {
+  size_t size;
+  size_t capacity;
+  size_t element_size;
+
+  void *data;
+} Vector;
+
+typedef struct Iterator {
+  void *pointer;
+  size_t element_size;
+} Iterator;
+
+/***** METHODS *****/
+
+/* Constructor */
+int vector_setup(Vector *vector, size_t capacity, size_t element_size);
+
+/* Copy Constructor */
+int vector_copy(Vector *destination, Vector *source);
+
+/* Copy Assignment */
+int vector_copy_assign(Vector *destination, Vector *source);
+
+/* Move Constructor */
+int vector_move(Vector *destination, Vector *source);
+
+/* Move Assignment */
+int vector_move_assign(Vector *destination, Vector *source);
+
+int vector_swap(Vector *destination, Vector *source);
+
+/* Destructor */
+int vector_destroy(Vector *vector);
+
+/* Insertion */
+int vector_push_back(Vector *vector, void *element);
+int vector_push_front(Vector *vector, void *element);
+int vector_insert(Vector *vector, size_t index, void *element);
+int vector_assign(Vector *vector, size_t index, void *element);
+
+/* Deletion */
+int vector_pop_back(Vector *vector);
+int vector_pop_front(Vector *vector);
+int vector_erase(Vector *vector, size_t index);
+int vector_clear(Vector *vector);
+
+/* Lookup */
+void *vector_get(Vector *vector, size_t index);
+const void *vector_const_get(const Vector *vector, size_t index);
+void *vector_front(Vector *vector);
+void *vector_back(Vector *vector);
+#define VECTOR_GET_AS(type, vector_pointer, index) \
+  *((type *)vector_get((vector_pointer), (index)))
+
+/* Information */
+bool vector_is_initialized(const Vector *vector);
+size_t vector_byte_size(const Vector *vector);
+size_t vector_free_space(const Vector *vector);
+bool vector_is_empty(const Vector *vector);
+
+/* Memory management */
+int vector_resize(Vector *vector, size_t new_size);
+int vector_reserve(Vector *vector, size_t minimum_capacity);
+int vector_shrink_to_fit(Vector *vector);
+
+/* Iterators */
+Iterator vector_begin(Vector *vector);
+Iterator vector_end(Vector *vector);
+Iterator vector_iterator(Vector *vector, size_t index);
+
+void *iterator_get(Iterator *iterator);
+#define ITERATOR_GET_AS(type, iterator) *((type *)iterator_get((iterator)))
+
+int iterator_erase(Vector *vector, Iterator *iterator);
+
+void iterator_increment(Iterator *iterator);
+void iterator_decrement(Iterator *iterator);
+
+void *iterator_next(Iterator *iterator);
+void *iterator_previous(Iterator *iterator);
+
+bool iterator_equals(Iterator *first, Iterator *second);
+bool iterator_is_before(Iterator *first, Iterator *second);
+bool iterator_is_after(Iterator *first, Iterator *second);
+
+size_t iterator_index(Vector *vector, Iterator *iterator);
+
+#define VECTOR_FOR_EACH(vector_pointer, iterator_name)           \
+  for (Iterator(iterator_name) = vector_begin((vector_pointer)), \
+      end = vector_end((vector_pointer));                        \
+       !iterator_equals(&(iterator_name), &end);                 \
+       iterator_increment(&(iterator_name)))
+
+/***** PRIVATE *****/
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+bool _vector_should_grow(Vector *vector);
+bool _vector_should_shrink(Vector *vector);
+
+size_t _vector_free_bytes(const Vector *vector);
+void *_vector_offset(Vector *vector, size_t index);
+const void *_vector_const_offset(const Vector *vector, size_t index);
+
+void _vector_assign(Vector *vector, size_t index, void *element);
+
+int _vector_move_right(Vector *vector, size_t index);
+void _vector_move_left(Vector *vector, size_t index);
+
+int _vector_adjust_capacity(Vector *vector);
+int _vector_reallocate(Vector *vector, size_t new_capacity);
+
+void _vector_swap(size_t *first, size_t *second);
+
+#endif /* VECTOR_H */
diff --git a/third_party/aom/tools/aom_entropy_optimizer.c b/third_party/aom/tools/aom_entropy_optimizer.c
index b892cc163..962c1af36 100644
--- a/third_party/aom/tools/aom_entropy_optimizer.c
+++ b/third_party/aom/tools/aom_entropy_optimizer.c
@@ -28,7 +28,6 @@
 #include "./aom_config.h"
 #include "av1/common/entropymode.h"
 
-#if CONFIG_ALT_INTRA
 #if CONFIG_SMOOTH_HV
 const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
   -DC_PRED,
@@ -70,19 +69,6 @@ const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
   -D207_PRED, -SMOOTH_PRED, /* 9 = D207_NODE */
 };
 #endif  // CONFIG_SMOOTH_HV
-#else
-const aom_tree_index av1_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-  -DC_PRED,   2,          /* 0 = DC_NODE */
-  -TM_PRED,   4,          /* 1 = TM_NODE */
-  -V_PRED,    6,          /* 2 = V_NODE */
-  8,          12,         /* 3 = COM_NODE */
-  -H_PRED,    10,         /* 4 = H_NODE */
-  -D135_PRED, -D117_PRED, /* 5 = D135_NODE */
-  -D45_PRED,  14,         /* 6 = D45_NODE */
-  -D63_PRED,  16,         /* 7 = D63_NODE */
-  -D153_PRED, -D207_PRED  /* 8 = D153_NODE */
-};
-#endif  // CONFIG_ALT_INTRA
 
 #define SPACES_PER_TAB 2
 
@@ -90,6 +76,13 @@ typedef unsigned int aom_count_type;
 // A log file recording parsed counts
 static FILE *logfile;  // TODO(yuec): make it a command line option
 
+static INLINE aom_prob get_binary_prob_new(unsigned int n0, unsigned int n1) {
+  // The "+1" will prevent this function from generating extreme probability
+  // when both n0 and n1 are small
+  const unsigned int den = n0 + 1 + n1 + 1;
+  return get_prob(n0 + 1, den);
+}
+
 // Optimized probabilities will be stored in probs[].
 static unsigned int optimize_tree_probs(const aom_tree_index *tree,
                                         unsigned int idx,
@@ -101,7 +94,7 @@ static unsigned int optimize_tree_probs(const aom_tree_index *tree,
   const int r = tree[idx + 1];
   const unsigned int right_count =
       (r <= 0) ? counts[-r] : optimize_tree_probs(tree, r, counts, probs);
-  probs[idx >> 1] = get_binary_prob(left_count, right_count);
+  probs[idx >> 1] = get_binary_prob_new(left_count, right_count);
   return left_count + right_count;
 }
 
@@ -127,7 +120,7 @@ static int parse_stats(aom_count_type **ct_ptr, FILE *const probsfile, int tabs,
       optimize_tree_probs(tree, 0, counts1d, probs);
     } else {
       assert(total_modes == 2);
-      probs[0] = get_binary_prob(counts1d[0], counts1d[1]);
+      probs[0] = get_binary_prob_new(counts1d[0], counts1d[1]);
     }
     if (tabs > 0) fprintf(probsfile, "%*c", tabs * SPACES_PER_TAB, ' ');
     for (int k = 0; k < total_modes - 1; ++k) {
@@ -144,10 +137,10 @@ static int parse_stats(aom_count_type **ct_ptr, FILE *const probsfile, int tabs,
     for (int k = 0; k < cts_each_dim[0]; ++k) {
       if (k == cts_each_dim[0] - 1) {
         fprintf(probsfile, " %3d ",
-                get_binary_prob((*ct_ptr)[0], (*ct_ptr)[1]));
+                get_binary_prob_new((*ct_ptr)[0], (*ct_ptr)[1]));
       } else {
         fprintf(probsfile, " %3d,",
-                get_binary_prob((*ct_ptr)[0], (*ct_ptr)[1]));
+                get_binary_prob_new((*ct_ptr)[0], (*ct_ptr)[1]));
       }
       fprintf(logfile, "%d %d\n", (*ct_ptr)[0], (*ct_ptr)[1]);
       (*ct_ptr) += 2;
@@ -402,28 +395,11 @@ int main(int argc, const char **argv) {
   /* Interpolation filter */
   cts_each_dim[0] = SWITCHABLE_FILTER_CONTEXTS;
   cts_each_dim[1] = SWITCHABLE_FILTERS;
-  optimize_entropy_table(
-      &fc.switchable_interp[0][0], probsfile, 2, cts_each_dim,
-      av1_switchable_interp_tree, 0,
-      "static const aom_prob \n"
-      "default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]"
-      "[SWITCHABLE_FILTERS - 1]");
   optimize_cdf_table(&fc.switchable_interp[0][0], probsfile, 2, cts_each_dim,
                      "static const aom_cdf_prob\n"
                      "default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS]"
                      "[CDF_SIZE(SWITCHABLE_FILTERS)]");
 
-  /* Blockzero */
-  cts_each_dim[0] = TX_SIZES;
-  cts_each_dim[1] = PLANE_TYPES;
-  cts_each_dim[2] = REF_TYPES;
-  cts_each_dim[3] = BLOCKZ_CONTEXTS;
-  cts_each_dim[4] = 2;
-  optimize_entropy_table(
-      &fc.blockz_count[0][0][0][0][0], probsfile, 5, cts_each_dim, NULL, 1,
-      "static const aom_prob av1_default_blockzero_probs[TX_SIZES]"
-      "[PLANE_TYPES][REF_TYPES][BLOCKZ_CONTEXTS]");
-
   /* Motion vector referencing */
   cts_each_dim[0] = NEWMV_MODE_CONTEXTS;
   cts_each_dim[1] = 2;
@@ -461,8 +437,7 @@ int main(int argc, const char **argv) {
                      "static const aom_cdf_prob "
                      "default_drl_cdf[DRL_MODE_CONTEXTS][CDF_SIZE(2)]");
 
-/* ext_inter experiment */
-#if CONFIG_EXT_INTER
+  /* ext_inter experiment */
   /* New compound mode */
   cts_each_dim[0] = INTER_MODE_CONTEXTS;
   cts_each_dim[1] = INTER_COMPOUND_MODES;
@@ -534,7 +509,6 @@ int main(int argc, const char **argv) {
       &fc.compound_interinter[0][0], probsfile, 2, cts_each_dim,
       "static const aom_cdf_prob\n"
       "default_compound_type_cdf[BLOCK_SIZES_ALL][CDF_SIZE(COMPOUND_TYPES)]");
-#endif
 
 /* motion_var and warped_motion experiments */
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -559,6 +533,31 @@ int main(int argc, const char **argv) {
                      "static const aom_cdf_prob "
                      "default_obmc_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]");
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_NCOBMC_ADAPT_WEIGHT
+  cts_each_dim[0] = ADAPT_OVERLAP_BLOCKS;
+  cts_each_dim[1] = MAX_NCOBMC_MODES;
+  optimize_entropy_table(
+      &fc.ncobmc_mode[0][0], probsfile, 2, cts_each_dim, av1_ncobmc_mode_tree,
+      0,
+      "static const aom_prob default_ncobmc_mode_prob[ADAPT_OVERLAP_BLOCKS]"
+      "[MAX_NCOBMC_MODES - 1]");
+  optimize_cdf_table(&fc.ncobmc_mode[0][0], probsfile, 2, cts_each_dim,
+                     "static const aom_cdf_prob\n"
+                     "default_ncobmc_mode_cdf[ADAPT_OVERLAP_BLOCKS]"
+                     "[CDF_SIZE(MAX_NCOBMC_MODES)]");
+#if CONFIG_WARPED_MOTION
+  cts_each_dim[0] = BLOCK_SIZES_ALL;
+  cts_each_dim[1] = OBMC_FAMILY_MODES;
+  optimize_entropy_table(
+      &fc.ncobmc[0][0], probsfile, 2, cts_each_dim, av1_ncobmc_tree, 0,
+      "static const aom_prob default_ncobmc_prob[BLOCK_SIZES_ALL]"
+      "[OBMC_FAMILY_MODES - 1]");
+  optimize_cdf_table(&fc.ncobmc[0][0], probsfile, 2, cts_each_dim,
+                     "static const aom_cdf_prob\n"
+                     "default_ncobmc_cdf[BLOCK_SIZES_ALL]"
+                     "[CDF_SIZE(OBMC_FAMILY_MODES)]");
+#endif
+#endif  // CONFIG_NCOBMC_ADAPT_WEIGHT
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
   /* Intra/inter flag */
@@ -663,7 +662,7 @@ int main(int argc, const char **argv) {
 #endif  // CONFIG_EXT_REFS
 
 /* Compound single ref inter mode */
-#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
+#if CONFIG_COMPOUND_SINGLEREF
   cts_each_dim[0] = COMP_INTER_MODE_CONTEXTS;
   cts_each_dim[1] = 2;
   optimize_entropy_table(&fc.comp_inter_mode[0][0], probsfile, 2, cts_each_dim,
@@ -677,7 +676,6 @@ int main(int argc, const char **argv) {
 #endif
 
 /* Transform size */
-// TODO(yuec): av1_tx_size_tree has variable sizes, so needs special handling
 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
   cts_each_dim[0] = 2;
   optimize_entropy_table(&fc.quarter_tx_size[0], probsfile, 1, cts_each_dim,
@@ -720,8 +718,7 @@ int main(int argc, const char **argv) {
       "static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)]");
 #endif
 
-/* delta_q experiment */
-#if CONFIG_DELTA_Q
+  /* delta_q */
   cts_each_dim[0] = DELTA_Q_PROBS;
   cts_each_dim[1] = 2;
   optimize_entropy_table(
@@ -734,7 +731,6 @@ int main(int argc, const char **argv) {
       &fc.delta_lf[0][0], probsfile, 2, cts_each_dim, NULL, 1,
       "static const aom_prob default_delta_lf_probs[DELTA_LF_PROBS]");
 #endif
-#endif
 
 /* Transform type */
 #if CONFIG_EXT_TX
@@ -797,6 +793,143 @@ int main(int argc, const char **argv) {
       "static const aom_cdf_prob default_filter_intra_cdf[2][CDF_SIZE(2)]");
 #endif
 
+#if CONFIG_LV_MAP
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = NUM_BASE_LEVELS;
+  cts_each_dim[3] = COEFF_BASE_CONTEXTS;
+  cts_each_dim[4] = 2;
+  optimize_entropy_table(&fc.coeff_base[0][0][0][0][0], probsfile, 5,
+                         cts_each_dim, NULL, 1,
+                         "static const aom_prob "
+                         "default_coeff_base[TX_SIZES][PLANE_TYPES][NUM_BASE_"
+                         "LEVELS][COEFF_BASE_CONTEXTS]");
+  optimize_cdf_table(&fc.coeff_base[0][0][0][0][0], probsfile, 5, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_coeff_base_cdf[TX_SIZES][PLANE_TYPES][NUM_BASE_"
+                     "LEVELS][COEFF_BASE_CONTEXTS][CDF_SIZE(2)]");
+
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = SIG_COEF_CONTEXTS;
+  cts_each_dim[3] = 2;
+  optimize_entropy_table(
+      &fc.nz_map[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1,
+      "static const aom_prob "
+      "default_nz_map[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]");
+  optimize_cdf_table(&fc.nz_map[0][0][0][0], probsfile, 4, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_nz_map_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_"
+                     "CONTEXTS][CDF_SIZE(2)]");
+
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_COEF_CONTEXTS;
+  cts_each_dim[3] = 2;
+  optimize_entropy_table(
+      &fc.eob_flag[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1,
+      "static const aom_prob "
+      "default_eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]");
+  optimize_cdf_table(&fc.eob_flag[0][0][0][0], probsfile, 4, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_eob_flag_cdf[TX_SIZES][PLANE_TYPES][EOB_COEF_"
+                     "CONTEXTS][CDF_SIZE(2)]");
+
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = LEVEL_CONTEXTS;
+  cts_each_dim[3] = 2;
+  optimize_entropy_table(
+      &fc.coeff_lps[0][0][0][0], probsfile, 4, cts_each_dim, NULL, 1,
+      "static const aom_prob "
+      "default_coeff_lps[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]");
+  optimize_cdf_table(&fc.coeff_lps[0][0][0][0], probsfile, 4, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_coeff_lps_cdf[TX_SIZES][PLANE_TYPES][LEVEL_"
+                     "CONTEXTS][CDF_SIZE(2)]");
+
+#if BR_NODE
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = BASE_RANGE_SETS;
+  cts_each_dim[3] = LEVEL_CONTEXTS;
+  cts_each_dim[4] = 2;
+  optimize_entropy_table(&fc.coeff_br[0][0][0][0][0], probsfile, 5,
+                         cts_each_dim, NULL, 1,
+                         "static const aom_prob "
+                         "default_coeff_br[TX_SIZES][PLANE_TYPES][BASE_RANGE_"
+                         "SETS][LEVEL_CONTEXTS]");
+  optimize_cdf_table(&fc.coeff_br[0][0][0][0][0], probsfile, 5, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_coeff_br_cdf[TX_SIZES][PLANE_TYPES][BASE_RANGE_"
+                     "SETS][LEVEL_CONTEXTS][CDF_SIZE(2)]");
+#endif  // BR_NODE
+
+#if CONFIG_CTX1D
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = TX_CLASSES;
+  cts_each_dim[3] = 2;
+  optimize_entropy_table(&fc.eob_mode[0][0][0][0], probsfile, 4, cts_each_dim,
+                         NULL, 1,
+                         "static const aom_prob "
+                         "default_eob_mode[TX_SIZES][PLANE_TYPES][TX_CLASSES]");
+  optimize_cdf_table(&fc.eob_mode[0][0][0][0], probsfile, 4, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_eob_mode_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]["
+                     "CDF_SIZE(2)]");
+
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = TX_CLASSES;
+  cts_each_dim[3] = EMPTY_LINE_CONTEXTS;
+  cts_each_dim[4] = 2;
+  optimize_entropy_table(&fc.empty_line[0][0][0][0][0], probsfile, 5,
+                         cts_each_dim, NULL, 1,
+                         "static const aom_prob "
+                         "default_empty_line[TX_SIZES][PLANE_TYPES][TX_CLASSES]"
+                         "[EMPTY_LINE_CONTEXTS]");
+  optimize_cdf_table(&fc.empty_line[0][0][0][0][0], probsfile, 5, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_empty_line_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES]"
+                     "[EMPTY_LINE_CONTEXTS][CDF_SIZE(2)]");
+
+  cts_each_dim[0] = TX_SIZES;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = TX_CLASSES;
+  cts_each_dim[3] = HV_EOB_CONTEXTS;
+  cts_each_dim[4] = 2;
+  optimize_entropy_table(
+      &fc.hv_eob[0][0][0][0][0], probsfile, 5, cts_each_dim, NULL, 1,
+      "static const aom_prob "
+      "default_hv_eob[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_EOB_CONTEXTS]");
+  optimize_cdf_table(&fc.hv_eob[0][0][0][0][0], probsfile, 5, cts_each_dim,
+                     "static const aom_cdf_prob "
+                     "default_hv_eob_cdf[TX_SIZES][PLANE_TYPES][TX_CLASSES][HV_"
+                     "EOB_CONTEXTS][CDF_SIZE(2)]");
+#endif  // CONFIG_CTX1D
+#endif  // CONFIG_LV_MAP
+
+/* lgt_from_pred experiment */
+#if CONFIG_LGT_FROM_PRED
+  cts_each_dim[0] = LGT_SIZES;
+  if (LGT_FROM_PRED_INTRA) {
+    cts_each_dim[1] = INTRA_MODES;
+    cts_each_dim[2] = 2;
+    optimize_entropy_table(&fc.intra_lgt[0][0][0], probsfile, 3, cts_each_dim,
+                           NULL, 1,
+                           "static const aom_prob default_intra_lgt_prob"
+                           "[LGT_SIZES][INTRA_MODES][2]");
+  }
+  if (LGT_FROM_PRED_INTER) {
+    cts_each_dim[1] = 2;
+    optimize_entropy_table(&fc.inter_lgt[0][0], probsfile, 2, cts_each_dim,
+                           NULL, 1,
+                           "static const aom_prob default_inter_lgt_prob"
+                           "[LGT_SIZES][2]");
+  }
+#endif  // CONFIG_LGT_FROM_PRED
+
   fclose(statsfile);
   fclose(logfile);
   fclose(probsfile);
diff --git a/third_party/aom/tools_common.c b/third_party/aom/tools_common.c
index d1115ac27..b7095e3f1 100644
--- a/third_party/aom/tools_common.c
+++ b/third_party/aom/tools_common.c
@@ -185,22 +185,6 @@ const AvxInterface *get_aom_decoder_by_fourcc(uint32_t fourcc) {
 }
 #endif  // CONFIG_AV1_DECODER
 
-// TODO(dkovalev): move this function to aom_image.{c, h}, so it will be part
-// of aom_image_t support
-int aom_img_plane_width(const aom_image_t *img, int plane) {
-  if (plane > 0 && img->x_chroma_shift > 0)
-    return (img->d_w + 1) >> img->x_chroma_shift;
-  else
-    return img->d_w;
-}
-
-int aom_img_plane_height(const aom_image_t *img, int plane) {
-  if (plane > 0 && img->y_chroma_shift > 0)
-    return (img->d_h + 1) >> img->y_chroma_shift;
-  else
-    return img->d_h;
-}
-
 void aom_img_write(const aom_image_t *img, FILE *file) {
   int plane;
 
diff --git a/third_party/aom/tools_common.h b/third_party/aom/tools_common.h
index 4bf7fd0a6..5fc8fbafd 100644
--- a/third_party/aom/tools_common.h
+++ b/third_party/aom/tools_common.h
@@ -146,10 +146,6 @@ const AvxInterface *get_aom_decoder_by_index(int i);
 const AvxInterface *get_aom_decoder_by_name(const char *name);
 const AvxInterface *get_aom_decoder_by_fourcc(uint32_t fourcc);
 
-// TODO(dkovalev): move this function to aom_image.{c, h}, so it will be part
-// of aom_image_t support
-int aom_img_plane_width(const aom_image_t *img, int plane);
-int aom_img_plane_height(const aom_image_t *img, int plane);
 void aom_img_write(const aom_image_t *img, FILE *file);
 int aom_img_read(aom_image_t *img, FILE *file);
 
diff --git a/third_party/aom/y4minput.c b/third_party/aom/y4minput.c
index 191918924..e009042b2 100644
--- a/third_party/aom/y4minput.c
+++ b/third_party/aom/y4minput.c
@@ -190,26 +190,29 @@ static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
        window.*/
     for (x = 0; x < OC_MINI(_c_w, 2); x++) {
       _dst[x] = (unsigned char)OC_CLAMPI(
-          0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] + 114 * _src[x] +
-              35 * _src[OC_MINI(x + 1, _c_w - 1)] -
-              9 * _src[OC_MINI(x + 2, _c_w - 1)] +
-              _src[OC_MINI(x + 3, _c_w - 1)] + 64) >>
-                 7,
+          0,
+          (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] + 114 * _src[x] +
+           35 * _src[OC_MINI(x + 1, _c_w - 1)] -
+           9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[OC_MINI(x + 3, _c_w - 1)] +
+           64) >>
+              7,
           255);
     }
     for (; x < _c_w - 3; x++) {
       _dst[x] = (unsigned char)OC_CLAMPI(
-          0, (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
-              35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >>
-                 7,
+          0,
+          (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
+           35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >>
+              7,
           255);
     }
     for (; x < _c_w; x++) {
       _dst[x] = (unsigned char)OC_CLAMPI(
-          0, (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
-              35 * _src[OC_MINI(x + 1, _c_w - 1)] -
-              9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[_c_w - 1] + 64) >>
-                 7,
+          0,
+          (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] +
+           35 * _src[OC_MINI(x + 1, _c_w - 1)] -
+           9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[_c_w - 1] + 64) >>
+              7,
           255);
     }
     _dst += _c_w;
@@ -309,28 +312,31 @@ static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
         for (x = 0; x < c_w; x++) {
           for (y = 0; y < OC_MINI(c_h, 3); y++) {
             _dst[y * c_w] = (unsigned char)OC_CLAMPI(
-                0, (tmp[0] - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] +
-                    35 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w] -
-                    17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
-                    4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >>
-                       7,
+                0,
+                (tmp[0] - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] +
+                 35 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w] -
+                 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
+                 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >>
+                    7,
                 255);
           }
           for (; y < c_h - 2; y++) {
             _dst[y * c_w] = (unsigned char)OC_CLAMPI(
-                0, (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
-                    35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
-                    17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >>
-                       7,
+                0,
+                (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
+                 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
+                 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >>
+                    7,
                 255);
           }
           for (; y < c_h; y++) {
             _dst[y * c_w] = (unsigned char)OC_CLAMPI(
-                0, (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
-                    35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
-                    17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
-                    4 * tmp[(c_h - 1) * c_w] + 64) >>
-                       7,
+                0,
+                (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] +
+                 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] -
+                 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] +
+                 4 * tmp[(c_h - 1) * c_w] + 64) >>
+                    7,
                 255);
           }
           _dst++;
@@ -355,10 +361,11 @@ static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
           }
           for (; y < c_h - 3; y++) {
             _dst[y * c_w] = (unsigned char)OC_CLAMPI(
-                0, (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] +
-                    114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w] -
-                    9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >>
-                       7,
+                0,
+                (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] +
+                 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w] -
+                 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >>
+                    7,
                 255);
           }
           for (; y < c_h; y++) {
@@ -397,18 +404,20 @@ static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
   for (x = 0; x < _c_w; x++) {
     for (y = 0; y < OC_MINI(_c_h, 2); y += 2) {
       _dst[(y >> 1) * _c_w] =
-          OC_CLAMPI(0, (64 * _src[0] + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w] -
-                        17 * _src[OC_MINI(2, _c_h - 1) * _c_w] +
-                        3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >>
-                           7,
+          OC_CLAMPI(0,
+                    (64 * _src[0] + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w] -
+                     17 * _src[OC_MINI(2, _c_h - 1) * _c_w] +
+                     3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >>
+                        7,
                     255);
     }
     for (; y < _c_h - 3; y += 2) {
       _dst[(y >> 1) * _c_w] =
-          OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w]) -
-                        17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w]) +
-                        78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >>
-                           7,
+          OC_CLAMPI(0,
+                    (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w]) -
+                     17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w]) +
+                     78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >>
+                        7,
                     255);
     }
     for (; y < _c_h; y += 2) {
@@ -635,33 +644,38 @@ static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst,
          4-tap Mitchell window.*/
       for (x = 0; x < OC_MINI(c_w, 1); x++) {
         tmp[x << 1] = (unsigned char)OC_CLAMPI(
-            0, (111 * _aux[0] + 18 * _aux[OC_MINI(1, c_w - 1)] -
-                _aux[OC_MINI(2, c_w - 1)] + 64) >>
-                   7,
+            0,
+            (111 * _aux[0] + 18 * _aux[OC_MINI(1, c_w - 1)] -
+             _aux[OC_MINI(2, c_w - 1)] + 64) >>
+                7,
             255);
         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
-            0, (47 * _aux[0] + 86 * _aux[OC_MINI(1, c_w - 1)] -
-                5 * _aux[OC_MINI(2, c_w - 1)] + 64) >>
-                   7,
+            0,
+            (47 * _aux[0] + 86 * _aux[OC_MINI(1, c_w - 1)] -
+             5 * _aux[OC_MINI(2, c_w - 1)] + 64) >>
+                7,
             255);
       }
       for (; x < c_w - 2; x++) {
         tmp[x << 1] =
-            (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x] +
-                                         18 * _aux[x + 1] - _aux[x + 2] + 64) >>
-                                            7,
+            (unsigned char)OC_CLAMPI(0,
+                                     (_aux[x - 1] + 110 * _aux[x] +
+                                      18 * _aux[x + 1] - _aux[x + 2] + 64) >>
+                                         7,
                                      255);
         tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
-            0, (-3 * _aux[x - 1] + 50 * _aux[x] + 86 * _aux[x + 1] -
-                5 * _aux[x + 2] + 64) >>
-                   7,
+            0,
+            (-3 * _aux[x - 1] + 50 * _aux[x] + 86 * _aux[x + 1] -
+             5 * _aux[x + 2] + 64) >>
+                7,
             255);
       }
       for (; x < c_w; x++) {
         tmp[x << 1] = (unsigned char)OC_CLAMPI(
-            0, (_aux[x - 1] + 110 * _aux[x] +
-                18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >>
-                   7,
+            0,
+            (_aux[x - 1] + 110 * _aux[x] + 18 * _aux[OC_MINI(x + 1, c_w - 1)] -
+             _aux[c_w - 1] + 64) >>
+                7,
             255);
         if ((x << 1 | 1) < dst_c_w) {
           tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI(
@@ -711,27 +725,29 @@ static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst,
     /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
     for (y = 0; y < c_h; y++) {
       for (x = 0; x < OC_MINI(c_w, 2); x += 2) {
-        tmp[x >> 1] =
-            OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)] -
-                          17 * _aux[OC_MINI(2, c_w - 1)] +
-                          3 * _aux[OC_MINI(3, c_w - 1)] + 64) >>
-                             7,
-                      255);
+        tmp[x >> 1] = OC_CLAMPI(0,
+                                (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)] -
+                                 17 * _aux[OC_MINI(2, c_w - 1)] +
+                                 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >>
+                                    7,
+                                255);
       }
       for (; x < c_w - 3; x += 2) {
-        tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3]) -
-                                    17 * (_aux[x - 1] + _aux[x + 2]) +
-                                    78 * (_aux[x] + _aux[x + 1]) + 64) >>
-                                       7,
+        tmp[x >> 1] = OC_CLAMPI(0,
+                                (3 * (_aux[x - 2] + _aux[x + 3]) -
+                                 17 * (_aux[x - 1] + _aux[x + 2]) +
+                                 78 * (_aux[x] + _aux[x + 1]) + 64) >>
+                                    7,
                                 255);
       }
       for (; x < c_w; x += 2) {
-        tmp[x >> 1] = OC_CLAMPI(
-            0, (3 * (_aux[x - 2] + _aux[c_w - 1]) -
-                17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
-                78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >>
-                   7,
-            255);
+        tmp[x >> 1] =
+            OC_CLAMPI(0,
+                      (3 * (_aux[x - 2] + _aux[c_w - 1]) -
+                       17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) +
+                       78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >>
+                          7,
+                      255);
       }
       tmp += dst_c_w;
       _aux += c_w;