From 30bfbb3f97bd64b7838bcb55c98fa698b1bcc9d2 Mon Sep 17 00:00:00 2001
From: trav90 <travawine@openmailbox.org>
Date: Sun, 4 Feb 2018 13:19:22 -0600
Subject: Update FFmpeg code to n3.2-65-gee56777

---
 media/ffvpx/libavcodec/allcodecs.c          |  28 +-
 media/ffvpx/libavcodec/avcodec.h            | 213 +++++++++++---
 media/ffvpx/libavcodec/avpacket.c           |  20 +-
 media/ffvpx/libavcodec/bsf.h                |  11 +
 media/ffvpx/libavcodec/codec_desc.c         |   2 +-
 media/ffvpx/libavcodec/dummy_funcs.c        |  28 ++
 media/ffvpx/libavcodec/h264dsp.h            |   5 +-
 media/ffvpx/libavcodec/imgconvert.c         |   8 -
 media/ffvpx/libavcodec/internal.h           |   1 +
 media/ffvpx/libavcodec/me_cmp.h             |   1 +
 media/ffvpx/libavcodec/mpegvideo.h          |   3 +-
 media/ffvpx/libavcodec/parser.c             |   5 +
 media/ffvpx/libavcodec/raw.c                |  10 +
 media/ffvpx/libavcodec/utils.c              | 145 ++++++++--
 media/ffvpx/libavcodec/version.h            |   2 +-
 media/ffvpx/libavcodec/vp9.c                |   5 +
 media/ffvpx/libavcodec/x86/h264_i386.h      |  12 +-
 media/ffvpx/libavcodec/x86/vp9dsp_init.c    |  10 +
 media/ffvpx/libavcodec/x86/vp9itxfm.asm     | 434 +++++++++++++++++++++++++++-
 media/ffvpx/libavcodec/x86/vp9lpf.asm       | 238 +++++++++------
 media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm |   2 +-
 21 files changed, 993 insertions(+), 190 deletions(-)

(limited to 'media/ffvpx/libavcodec')

diff --git a/media/ffvpx/libavcodec/allcodecs.c b/media/ffvpx/libavcodec/allcodecs.c
index 54efaad34..b592aa3b2 100644
--- a/media/ffvpx/libavcodec/allcodecs.c
+++ b/media/ffvpx/libavcodec/allcodecs.c
@@ -67,11 +67,13 @@ void avcodec_register_all(void)
     initialized = 1;
 
     /* hardware accelerators */
+    REGISTER_HWACCEL(H263_CUVID,        h263_cuvid);
     REGISTER_HWACCEL(H263_VAAPI,        h263_vaapi);
     REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox);
     REGISTER_HWACCEL(H264_CUVID,        h264_cuvid);
     REGISTER_HWACCEL(H264_D3D11VA,      h264_d3d11va);
     REGISTER_HWACCEL(H264_DXVA2,        h264_dxva2);
+    REGISTER_HWACCEL(H264_MEDIACODEC,   h264_mediacodec);
     REGISTER_HWACCEL(H264_MMAL,         h264_mmal);
     REGISTER_HWACCEL(H264_QSV,          h264_qsv);
     REGISTER_HWACCEL(H264_VAAPI,        h264_vaapi);
@@ -82,12 +84,16 @@ void avcodec_register_all(void)
     REGISTER_HWACCEL(HEVC_CUVID,        hevc_cuvid);
     REGISTER_HWACCEL(HEVC_D3D11VA,      hevc_d3d11va);
     REGISTER_HWACCEL(HEVC_DXVA2,        hevc_dxva2);
+    REGISTER_HWACCEL(HEVC_MEDIACODEC,   hevc_mediacodec);
     REGISTER_HWACCEL(HEVC_QSV,          hevc_qsv);
     REGISTER_HWACCEL(HEVC_VAAPI,        hevc_vaapi);
     REGISTER_HWACCEL(HEVC_VDPAU,        hevc_vdpau);
+    REGISTER_HWACCEL(MJPEG_CUVID,       mjpeg_cuvid);
+    REGISTER_HWACCEL(MPEG1_CUVID,       mpeg1_cuvid);
     REGISTER_HWACCEL(MPEG1_XVMC,        mpeg1_xvmc);
     REGISTER_HWACCEL(MPEG1_VDPAU,       mpeg1_vdpau);
     REGISTER_HWACCEL(MPEG1_VIDEOTOOLBOX, mpeg1_videotoolbox);
+    REGISTER_HWACCEL(MPEG2_CUVID,       mpeg2_cuvid);
     REGISTER_HWACCEL(MPEG2_XVMC,        mpeg2_xvmc);
     REGISTER_HWACCEL(MPEG2_D3D11VA,     mpeg2_d3d11va);
     REGISTER_HWACCEL(MPEG2_DXVA2,       mpeg2_dxva2);
@@ -96,6 +102,8 @@ void avcodec_register_all(void)
     REGISTER_HWACCEL(MPEG2_VAAPI,       mpeg2_vaapi);
     REGISTER_HWACCEL(MPEG2_VDPAU,       mpeg2_vdpau);
     REGISTER_HWACCEL(MPEG2_VIDEOTOOLBOX, mpeg2_videotoolbox);
+    REGISTER_HWACCEL(MPEG4_CUVID,       mpeg4_cuvid);
+    REGISTER_HWACCEL(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
     REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
     REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
     REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
@@ -108,9 +116,11 @@ void avcodec_register_all(void)
     REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
     REGISTER_HWACCEL(VC1_QSV,           vc1_qsv);
     REGISTER_HWACCEL(VP8_CUVID,         vp8_cuvid);
+    REGISTER_HWACCEL(VP8_MEDIACODEC,    vp8_mediacodec);
     REGISTER_HWACCEL(VP9_CUVID,         vp9_cuvid);
     REGISTER_HWACCEL(VP9_D3D11VA,       vp9_d3d11va);
     REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
+    REGISTER_HWACCEL(VP9_MEDIACODEC,    vp9_mediacodec);
     REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
     REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
@@ -418,7 +428,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(MACE3,             mace3);
     REGISTER_DECODER(MACE6,             mace6);
     REGISTER_DECODER(METASOUND,         metasound);
-    REGISTER_DECODER(MLP,               mlp);
+    REGISTER_ENCDEC (MLP,               mlp);
     REGISTER_DECODER(MP1,               mp1);
     REGISTER_DECODER(MP1FLOAT,          mp1float);
     REGISTER_ENCDEC (MP2,               mp2);
@@ -447,7 +457,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (SONIC,             sonic);
     REGISTER_ENCODER(SONIC_LS,          sonic_ls);
     REGISTER_DECODER(TAK,               tak);
-    REGISTER_DECODER(TRUEHD,            truehd);
+    REGISTER_ENCDEC (TRUEHD,            truehd);
     REGISTER_DECODER(TRUESPEECH,        truespeech);
     REGISTER_ENCDEC (TTA,               tta);
     REGISTER_DECODER(TWINVQ,            twinvq);
@@ -486,6 +496,8 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (PCM_S32BE,         pcm_s32be);
     REGISTER_ENCDEC (PCM_S32LE,         pcm_s32le);
     REGISTER_ENCDEC (PCM_S32LE_PLANAR,  pcm_s32le_planar);
+    REGISTER_ENCDEC (PCM_S64BE,         pcm_s64be);
+    REGISTER_ENCDEC (PCM_S64LE,         pcm_s64le);
     REGISTER_ENCDEC (PCM_U8,            pcm_u8);
     REGISTER_ENCDEC (PCM_U16BE,         pcm_u16be);
     REGISTER_ENCDEC (PCM_U16LE,         pcm_u16le);
@@ -585,7 +597,6 @@ void avcodec_register_all(void)
     REGISTER_DECODER(QDMC_AT,           qdmc_at);
     REGISTER_DECODER(QDM2_AT,           qdm2_at);
     REGISTER_DECODER(LIBCELT,           libcelt);
-    REGISTER_ENCODER(LIBFAAC,           libfaac);
     REGISTER_ENCDEC (LIBFDK_AAC,        libfdk_aac);
     REGISTER_ENCDEC (LIBGSM,            libgsm);
     REGISTER_ENCDEC (LIBGSM_MS,         libgsm_ms);
@@ -622,7 +633,8 @@ void avcodec_register_all(void)
 
     /* external libraries, that shouldn't be used by default if one of the
      * above is available */
-    REGISTER_ENCODER(LIBOPENH264,       libopenh264);
+    REGISTER_ENCDEC (LIBOPENH264,       libopenh264);
+    REGISTER_DECODER(H263_CUVID,        h263_cuvid);
     REGISTER_DECODER(H264_CUVID,        h264_cuvid);
     REGISTER_ENCODER(H264_NVENC,        h264_nvenc);
     REGISTER_ENCODER(H264_OMX,          h264_omx);
@@ -635,15 +647,23 @@ void avcodec_register_all(void)
     REGISTER_ENCODER(NVENC_HEVC,        nvenc_hevc);
 #endif
     REGISTER_DECODER(HEVC_CUVID,        hevc_cuvid);
+    REGISTER_DECODER(HEVC_MEDIACODEC,   hevc_mediacodec);
     REGISTER_ENCODER(HEVC_NVENC,        hevc_nvenc);
     REGISTER_ENCODER(HEVC_QSV,          hevc_qsv);
     REGISTER_ENCODER(HEVC_VAAPI,        hevc_vaapi);
     REGISTER_ENCODER(LIBKVAZAAR,        libkvazaar);
+    REGISTER_DECODER(MJPEG_CUVID,       mjpeg_cuvid);
     REGISTER_ENCODER(MJPEG_VAAPI,       mjpeg_vaapi);
+    REGISTER_DECODER(MPEG1_CUVID,       mpeg1_cuvid);
+    REGISTER_DECODER(MPEG2_CUVID,       mpeg2_cuvid);
     REGISTER_ENCODER(MPEG2_QSV,         mpeg2_qsv);
+    REGISTER_DECODER(MPEG4_CUVID,       mpeg4_cuvid);
+    REGISTER_DECODER(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
     REGISTER_DECODER(VC1_CUVID,         vc1_cuvid);
     REGISTER_DECODER(VP8_CUVID,         vp8_cuvid);
+    REGISTER_DECODER(VP8_MEDIACODEC,    vp8_mediacodec);
     REGISTER_DECODER(VP9_CUVID,         vp9_cuvid);
+    REGISTER_DECODER(VP9_MEDIACODEC,    vp9_mediacodec);
 
     /* parsers */
     REGISTER_PARSER(AAC,                aac);
diff --git a/media/ffvpx/libavcodec/avcodec.h b/media/ffvpx/libavcodec/avcodec.h
index 39713ed76..e5e7f4225 100644
--- a/media/ffvpx/libavcodec/avcodec.h
+++ b/media/ffvpx/libavcodec/avcodec.h
@@ -43,7 +43,9 @@
 #include "version.h"
 
 /**
- * @defgroup libavc Encoding/Decoding Library
+ * @defgroup libavc libavcodec
+ * Encoding/Decoding Library
+ *
  * @{
  *
  * @defgroup lavc_decoding Decoding
@@ -443,9 +445,9 @@ enum AVCodecID {
     AV_CODEC_ID_PCM_S24LE_PLANAR,
     AV_CODEC_ID_PCM_S32LE_PLANAR,
     AV_CODEC_ID_PCM_S16BE_PLANAR,
-    /* new PCM "codecs" should be added right below this line starting with
-     * an explicit value of for example 0x10800
-     */
+
+    AV_CODEC_ID_PCM_S64LE = 0x10800,
+    AV_CODEC_ID_PCM_S64BE,
 
     /* various ADPCM codecs */
     AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
@@ -629,6 +631,7 @@ enum AVCodecID {
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
     AV_CODEC_ID_TTF = 0x18000,
 
+    AV_CODEC_ID_SCTE_35, ///< Contain timestamp estimated through PCR of program stream.
     AV_CODEC_ID_BINTEXT    = 0x18800,
     AV_CODEC_ID_XBIN,
     AV_CODEC_ID_IDF,
@@ -1033,6 +1036,16 @@ typedef struct RcOverride{
  * Audio encoder supports receiving a different number of samples in each call.
  */
 #define AV_CODEC_CAP_VARIABLE_FRAME_SIZE (1 << 16)
+/**
+ * Decoder is not a preferred choice for probing.
+ * This indicates that the decoder is not a good choice for probing.
+ * It could for example be an expensive to spin up hardware decoder,
+ * or it could simply not provide a lot of useful information about
+ * the stream.
+ * A decoder marked with this flag should only be used as last resort
+ * choice for probing.
+ */
+#define AV_CODEC_CAP_AVOID_PROBING       (1 << 17)
 /**
  * Codec is intra only.
  */
@@ -1348,6 +1361,14 @@ typedef struct AVCPBProperties {
  */
 enum AVPacketSideDataType {
     AV_PKT_DATA_PALETTE,
+
+    /**
+     * The AV_PKT_DATA_NEW_EXTRADATA is used to notify the codec or the format
+     * that the extradata buffer was changed and the receiving side should
+     * act upon it appropriately. The new extradata is embedded in the side
+     * data buffer and should be immediately used for processing the current
+     * frame or packet.
+     */
     AV_PKT_DATA_NEW_EXTRADATA,
 
     /**
@@ -1611,6 +1632,12 @@ typedef struct AVPacket {
 } AVPacket;
 #define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
 #define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
+/**
+ * Flag is used to discard packets which are required to maintain valid
+ * decoder state but are not required for output and should be dropped
+ * after decoding.
+ **/
+#define AV_PKT_FLAG_DISCARD   0x0004
 
 enum AVSideDataParamChangeFlags {
     AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
@@ -2083,22 +2110,23 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int ildct_cmp;
-#define FF_CMP_SAD    0
-#define FF_CMP_SSE    1
-#define FF_CMP_SATD   2
-#define FF_CMP_DCT    3
-#define FF_CMP_PSNR   4
-#define FF_CMP_BIT    5
-#define FF_CMP_RD     6
-#define FF_CMP_ZERO   7
-#define FF_CMP_VSAD   8
-#define FF_CMP_VSSE   9
-#define FF_CMP_NSSE   10
-#define FF_CMP_W53    11
-#define FF_CMP_W97    12
-#define FF_CMP_DCTMAX 13
-#define FF_CMP_DCT264 14
-#define FF_CMP_CHROMA 256
+#define FF_CMP_SAD          0
+#define FF_CMP_SSE          1
+#define FF_CMP_SATD         2
+#define FF_CMP_DCT          3
+#define FF_CMP_PSNR         4
+#define FF_CMP_BIT          5
+#define FF_CMP_RD           6
+#define FF_CMP_ZERO         7
+#define FF_CMP_VSAD         8
+#define FF_CMP_VSSE         9
+#define FF_CMP_NSSE         10
+#define FF_CMP_W53          11
+#define FF_CMP_W97          12
+#define FF_CMP_DCTMAX       13
+#define FF_CMP_DCT264       14
+#define FF_CMP_MEDIAN_SAD   15
+#define FF_CMP_CHROMA       256
 
     /**
      * ME diamond size & shape
@@ -2850,6 +2878,7 @@ typedef struct AVCodecContext {
 #define FF_BUG_DC_CLIP          4096
 #define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
 #define FF_BUG_TRUNCATED       16384
+#define FF_BUG_IEDGE           32768
 
     /**
      * strictly follow the standard (MPEG-4, ...).
@@ -3165,6 +3194,13 @@ typedef struct AVCodecContext {
 #define FF_PROFILE_MPEG2_AAC_LOW 128
 #define FF_PROFILE_MPEG2_AAC_HE  131
 
+#define FF_PROFILE_DNXHD         0
+#define FF_PROFILE_DNXHR_LB      1
+#define FF_PROFILE_DNXHR_SQ      2
+#define FF_PROFILE_DNXHR_HQ      3
+#define FF_PROFILE_DNXHR_HQX     4
+#define FF_PROFILE_DNXHR_444     5
+
 #define FF_PROFILE_DTS         20
 #define FF_PROFILE_DTS_ES      30
 #define FF_PROFILE_DTS_96_24   40
@@ -3189,8 +3225,10 @@ typedef struct AVCodecContext {
 #define FF_PROFILE_H264_HIGH                 100
 #define FF_PROFILE_H264_HIGH_10              110
 #define FF_PROFILE_H264_HIGH_10_INTRA        (110|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_MULTIVIEW_HIGH       118
 #define FF_PROFILE_H264_HIGH_422             122
 #define FF_PROFILE_H264_HIGH_422_INTRA       (122|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_STEREO_HIGH          128
 #define FF_PROFILE_H264_HIGH_444             144
 #define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
 #define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
@@ -3482,15 +3520,25 @@ typedef struct AVCodecContext {
     int            nb_coded_side_data;
 
     /**
-     * Encoding only.
+     * A reference to the AVHWFramesContext describing the input (for encoding)
+     * or output (decoding) frames. The reference is set by the caller and
+     * afterwards owned (and freed) by libavcodec.
+     *
+     * - decoding: This field should be set by the caller from the get_format()
+     *             callback. The previous reference (if any) will always be
+     *             unreffed by libavcodec before the get_format() call.
+     *
+     *             If the default get_buffer2() is used with a hwaccel pixel
+     *             format, then this AVHWFramesContext will be used for
+     *             allocating the frame buffers.
      *
-     * For hardware encoders configured to use a hwaccel pixel format, this
-     * field should be set by the caller to a reference to the AVHWFramesContext
-     * describing input frames. AVHWFramesContext.format must be equal to
-     * AVCodecContext.pix_fmt.
+     * - encoding: For hardware encoders configured to use a hwaccel pixel
+     *             format, this field should be set by the caller to a reference
+     *             to the AVHWFramesContext describing input frames.
+     *             AVHWFramesContext.format must be equal to
+     *             AVCodecContext.pix_fmt.
      *
-     * This field should be set before avcodec_open2() is called and is
-     * afterwards owned and managed by libavcodec.
+     *             This field should be set before avcodec_open2() is called.
      */
     AVBufferRef *hw_frames_ctx;
 
@@ -3505,6 +3553,17 @@ typedef struct AVCodecContext {
 #define FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS 1
 #endif
 
+    /**
+     * Audio only. The amount of padding (in samples) appended by the encoder to
+     * the end of the audio. I.e. this number of decoded samples must be
+     * discarded by the caller from the end of the stream to get the original
+     * audio without any trailing padding.
+     *
+     * - decoding: unused
+     * - encoding: unused
+     */
+    int trailing_padding;
+
 } AVCodecContext;
 
 AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
@@ -5115,7 +5174,10 @@ AVCodecParserContext *av_parser_init(int codec_id);
  * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
  * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
  * @param buf           input buffer.
- * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output).
+ * @param buf_size      buffer size in bytes without the padding. I.e. the full buffer
+                        size is assumed to be buf_size + AV_INPUT_BUFFER_PADDING_SIZE.
+                        To signal EOF, this should be 0 (so that the last frame
+                        can be output).
  * @param pts           input presentation timestamp.
  * @param dts           input decoding timestamp.
  * @param pos           input byte position in stream.
@@ -5505,15 +5567,8 @@ enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt
                                             enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
 
 attribute_deprecated
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
-enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
-                                              enum AVPixelFormat src_pix_fmt,
-                                              int has_alpha, int *loss_ptr);
-#else
 enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
                                             enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
-#endif
-
 
 enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
 
@@ -5881,7 +5936,8 @@ int av_bsf_init(AVBSFContext *ctx);
  * av_bsf_receive_packet() repeatedly until it returns AVERROR(EAGAIN) or
  * AVERROR_EOF.
  *
- * @param pkt the packet to filter. The bitstream filter will take ownership of
+ * @param pkt the packet to filter. pkt must contain some payload (i.e data or
+ * side data must be present in pkt). The bitstream filter will take ownership of
  * the packet and reset the contents of pkt. pkt is not touched if an error occurs.
  * This parameter may be NULL, which signals the end of the stream (i.e. no more
  * packets will be sent). That will cause the filter to output any packets it
@@ -5931,6 +5987,91 @@ void av_bsf_free(AVBSFContext **ctx);
  */
 const AVClass *av_bsf_get_class(void);
 
+/**
+ * Structure for chain/list of bitstream filters.
+ * Empty list can be allocated by av_bsf_list_alloc().
+ */
+typedef struct AVBSFList AVBSFList;
+
+/**
+ * Allocate empty list of bitstream filters.
+ * The list must be later freed by av_bsf_list_free()
+ * or finalized by av_bsf_list_finalize().
+ *
+ * @return Pointer to @ref AVBSFList on success, NULL in case of failure
+ */
+AVBSFList *av_bsf_list_alloc(void);
+
+/**
+ * Free list of bitstream filters.
+ *
+ * @param lst Pointer to pointer returned by av_bsf_list_alloc()
+ */
+void av_bsf_list_free(AVBSFList **lst);
+
+/**
+ * Append bitstream filter to the list of bitstream filters.
+ *
+ * @param lst List to append to
+ * @param bsf Filter context to be appended
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_append(AVBSFList *lst, AVBSFContext *bsf);
+
+/**
+ * Construct new bitstream filter context given it's name and options
+ * and append it to the list of bitstream filters.
+ *
+ * @param lst      List to append to
+ * @param bsf_name Name of the bitstream filter
+ * @param options  Options for the bitstream filter, can be set to NULL
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_append2(AVBSFList *lst, const char * bsf_name, AVDictionary **options);
+/**
+ * Finalize list of bitstream filters.
+ *
+ * This function will transform @ref AVBSFList to single @ref AVBSFContext,
+ * so the whole chain of bitstream filters can be treated as single filter
+ * freshly allocated by av_bsf_alloc().
+ * If the call is successful, @ref AVBSFList structure is freed and lst
+ * will be set to NULL. In case of failure, caller is responsible for
+ * freeing the structure by av_bsf_list_free()
+ *
+ * @param      lst Filter list structure to be transformed
+ * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure
+ *                 representing the chain of bitstream filters
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_finalize(AVBSFList **lst, AVBSFContext **bsf);
+
+/**
+ * Parse string describing list of bitstream filters and create single
+ * @ref AVBSFContext describing the whole chain of bitstream filters.
+ * Resulting @ref AVBSFContext can be treated as any other @ref AVBSFContext freshly
+ * allocated by av_bsf_alloc().
+ *
+ * @param      str String describing chain of bitstream filters in format
+ *                 `bsf1[=opt1=val1:opt2=val2][,bsf2]`
+ * @param[out] bsf Pointer to be set to newly created @ref AVBSFContext structure
+ *                 representing the chain of bitstream filters
+ *
+ * @return >=0 on success, negative AVERROR in case of failure
+ */
+int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf);
+
+/**
+ * Get null/pass-through bitstream filter.
+ *
+ * @param[out] bsf Pointer to be set to new instance of pass-through bitstream filter
+ *
+ * @return
+ */
+int av_bsf_get_null_filter(AVBSFContext **bsf);
+
 /* memory */
 
 /**
diff --git a/media/ffvpx/libavcodec/avpacket.c b/media/ffvpx/libavcodec/avpacket.c
index 921868923..e5a8bdbe4 100644
--- a/media/ffvpx/libavcodec/avpacket.c
+++ b/media/ffvpx/libavcodec/avpacket.c
@@ -139,7 +139,8 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
         pkt->buf = av_buffer_alloc(new_size);
         if (!pkt->buf)
             return AVERROR(ENOMEM);
-        memcpy(pkt->buf->data, pkt->data, pkt->size);
+        if (pkt->size > 0)
+            memcpy(pkt->buf->data, pkt->data, pkt->size);
         pkt->data = pkt->buf->data;
     }
     pkt->size += grow_by;
@@ -198,6 +199,7 @@ static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup)
 {
     pkt->data      = NULL;
     pkt->side_data = NULL;
+    pkt->side_data_elems = 0;
     if (pkt->buf) {
         AVBufferRef *ref = av_buffer_ref(src->buf);
         if (!ref)
@@ -207,9 +209,11 @@ static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup)
     } else {
         DUP_DATA(pkt->data, src->data, pkt->size, 1, ALLOC_BUF);
     }
-    if (pkt->side_data_elems && dup)
+    if (src->side_data_elems && dup) {
         pkt->side_data = src->side_data;
-    if (pkt->side_data_elems && !dup) {
+        pkt->side_data_elems = src->side_data_elems;
+    }
+    if (src->side_data_elems && !dup) {
         return av_copy_packet_side_data(pkt, src);
     }
     return 0;
@@ -291,16 +295,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
 int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
                             uint8_t *data, size_t size)
 {
+    AVPacketSideData *tmp;
     int elems = pkt->side_data_elems;
 
     if ((unsigned)elems + 1 > INT_MAX / sizeof(*pkt->side_data))
         return AVERROR(ERANGE);
 
-    pkt->side_data = av_realloc(pkt->side_data,
-                                (elems + 1) * sizeof(*pkt->side_data));
-    if (!pkt->side_data)
+    tmp = av_realloc(pkt->side_data, (elems + 1) * sizeof(*tmp));
+    if (!tmp)
         return AVERROR(ENOMEM);
 
+    pkt->side_data = tmp;
     pkt->side_data[elems].data = data;
     pkt->side_data[elems].size = size;
     pkt->side_data[elems].type = type;
@@ -581,7 +586,8 @@ int av_packet_ref(AVPacket *dst, const AVPacket *src)
         ret = packet_alloc(&dst->buf, src->size);
         if (ret < 0)
             goto fail;
-        memcpy(dst->buf->data, src->data, src->size);
+        if (src->size)
+            memcpy(dst->buf->data, src->data, src->size);
 
         dst->data = dst->buf->data;
     } else {
diff --git a/media/ffvpx/libavcodec/bsf.h b/media/ffvpx/libavcodec/bsf.h
index 3435df5d8..af035eee4 100644
--- a/media/ffvpx/libavcodec/bsf.h
+++ b/media/ffvpx/libavcodec/bsf.h
@@ -28,6 +28,17 @@
  */
 int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt);
 
+/**
+ * Called by bitstream filters to get packet for filtering.
+ * The reference to packet is moved to provided packet structure.
+ *
+ * @param ctx pointer to AVBSFContext of filter
+ * @param pkt pointer to packet to move reference to
+ *
+ * @return 0>= on success, negative AVERROR in case of failure
+ */
+int ff_bsf_get_packet_ref(AVBSFContext *ctx, AVPacket *pkt);
+
 const AVClass *ff_bsf_child_class_next(const AVClass *prev);
 
 #endif /* AVCODEC_BSF_H */
diff --git a/media/ffvpx/libavcodec/codec_desc.c b/media/ffvpx/libavcodec/codec_desc.c
index 9d94b72ed..d862cc8f6 100644
--- a/media/ffvpx/libavcodec/codec_desc.c
+++ b/media/ffvpx/libavcodec/codec_desc.c
@@ -2281,7 +2281,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_COOK,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "cook",
-        .long_name = NULL_IF_CONFIG_SMALL("Cook / Cooker / Gecko (RealAudio G2)"),
+        .long_name = NULL_IF_CONFIG_SMALL("Cook / Cooker / Goanna (RealAudio G2)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
diff --git a/media/ffvpx/libavcodec/dummy_funcs.c b/media/ffvpx/libavcodec/dummy_funcs.c
index a065c3576..200e1d266 100644
--- a/media/ffvpx/libavcodec/dummy_funcs.c
+++ b/media/ffvpx/libavcodec/dummy_funcs.c
@@ -62,6 +62,17 @@ AVHWAccel ff_vp8_cuvid_hwaccel;
 AVHWAccel ff_vc1_cuvid_hwaccel;
 AVHWAccel ff_hevc_cuvid_hwaccel;
 AVHWAccel ff_h264_cuvid_hwaccel;
+/* Added by FFmpeg 3.2 */
+AVHWAccel ff_h263_cuvid_hwaccel;
+AVHWAccel ff_mjpeg_cuvid_hwaccel;
+AVHWAccel ff_mpeg1_cuvid_hwaccel;
+AVHWAccel ff_mpeg2_cuvid_hwaccel;
+AVHWAccel ff_mpeg4_cuvid_hwaccel;
+AVHWAccel ff_h264_mediacodec_hwaccel;
+AVHWAccel ff_hevc_mediacodec_hwaccel;
+AVHWAccel ff_mpeg4_mediacodec_hwaccel;
+AVHWAccel ff_vp8_mediacodec_hwaccel;
+AVHWAccel ff_vp9_mediacodec_hwaccel;
 
 AVCodec ff_a64multi_encoder;
 AVCodec ff_a64multi5_encoder;
@@ -713,6 +724,23 @@ AVCodec ff_sheervideo_decoder;
 AVCodec ff_magicyuv_decoder;
 AVCodec ff_m101_decoder;
 AVCodec ff_h264_mediacodec_decoder;
+/* Added by FFmpeg 3.2 */
+AVCodec ff_vp9_mediacodec_decoder;
+AVCodec ff_vp8_mediacodec_decoder;
+AVCodec ff_mpeg4_mediacodec_decoder;
+AVCodec ff_mpeg4_cuvid_decoder;
+AVCodec ff_mpeg2_cuvid_decoder;
+AVCodec ff_mpeg1_cuvid_decoder;
+AVCodec ff_mjpeg_cuvid_decoder;
+AVCodec ff_hevc_mediacodec_decoder;
+AVCodec ff_h263_cuvid_decoder;
+AVCodec ff_libopenh264_decoder;
+AVCodec ff_pcm_s64le_decoder;
+AVCodec ff_pcm_s64le_encoder;
+AVCodec ff_pcm_s64be_decoder;
+AVCodec ff_pcm_s64be_encoder;
+AVCodec ff_truehd_encoder;
+AVCodec ff_mlp_encoder;
 
 AVCodecParser ff_aac_parser;
 AVCodecParser ff_aac_latm_parser;
diff --git a/media/ffvpx/libavcodec/h264dsp.h b/media/ffvpx/libavcodec/h264dsp.h
index 7f24376b8..bcd76abcc 100644
--- a/media/ffvpx/libavcodec/h264dsp.h
+++ b/media/ffvpx/libavcodec/h264dsp.h
@@ -28,11 +28,12 @@
 #define AVCODEC_H264DSP_H
 
 #include <stdint.h>
+#include <stddef.h>
 
-typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
+typedef void (*h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height,
                                  int log2_denom, int weight, int offset);
 typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
-                                   int stride, int height, int log2_denom,
+                                   ptrdiff_t stride, int height, int log2_denom,
                                    int weightd, int weights, int offset);
 
 /**
diff --git a/media/ffvpx/libavcodec/imgconvert.c b/media/ffvpx/libavcodec/imgconvert.c
index 46fa7809b..1547f1896 100644
--- a/media/ffvpx/libavcodec/imgconvert.c
+++ b/media/ffvpx/libavcodec/imgconvert.c
@@ -57,19 +57,11 @@ enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt
     return av_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
 }
 
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
-enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
-                                            enum AVPixelFormat src_pix_fmt,
-                                            int has_alpha, int *loss_ptr){
-    return avcodec_find_best_pix_fmt_of_list(pix_fmt_list, src_pix_fmt, has_alpha, loss_ptr);
-}
-#else
 enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
                                             enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr)
 {
     return avcodec_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
 }
-#endif
 
 enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
                                             enum AVPixelFormat src_pix_fmt,
diff --git a/media/ffvpx/libavcodec/internal.h b/media/ffvpx/libavcodec/internal.h
index 000fe263c..35b9630b5 100644
--- a/media/ffvpx/libavcodec/internal.h
+++ b/media/ffvpx/libavcodec/internal.h
@@ -173,6 +173,7 @@ typedef struct AVCodecInternal {
     int buffer_pkt_valid; // encoding: packet without data can be valid
     AVFrame *buffer_frame;
     int draining_done;
+    int showed_multi_packet_warning;
 } AVCodecInternal;
 
 struct AVCodecDefault {
diff --git a/media/ffvpx/libavcodec/me_cmp.h b/media/ffvpx/libavcodec/me_cmp.h
index a3603ec2c..5666f59ad 100644
--- a/media/ffvpx/libavcodec/me_cmp.h
+++ b/media/ffvpx/libavcodec/me_cmp.h
@@ -76,6 +76,7 @@ typedef struct MECmpContext {
     me_cmp_func frame_skip_cmp[6]; // only width 8 used
 
     me_cmp_func pix_abs[2][4];
+    me_cmp_func median_sad[2];
 } MECmpContext;
 
 void ff_me_cmp_init_static(void);
diff --git a/media/ffvpx/libavcodec/mpegvideo.h b/media/ffvpx/libavcodec/mpegvideo.h
index a1f3d4bd9..c82fa3e1a 100644
--- a/media/ffvpx/libavcodec/mpegvideo.h
+++ b/media/ffvpx/libavcodec/mpegvideo.h
@@ -594,7 +594,8 @@ enum rc_strategy {
 { "nsse",   "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
 { "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
 { "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
-{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
+{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "msad",   "Sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
 
 #ifndef FF_MPV_OFFSET
 #define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x)
diff --git a/media/ffvpx/libavcodec/parser.c b/media/ffvpx/libavcodec/parser.c
index 2c8fc6904..30cfc55cb 100644
--- a/media/ffvpx/libavcodec/parser.c
+++ b/media/ffvpx/libavcodec/parser.c
@@ -182,6 +182,11 @@ int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
     index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
                                     poutbuf_size, buf, buf_size);
     av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
+#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        FILL(field_order);
+    }
+
     /* update the file pointer */
     if (*poutbuf_size) {
         /* fill the data for the current frame */
diff --git a/media/ffvpx/libavcodec/raw.c b/media/ffvpx/libavcodec/raw.c
index bfa2537b5..f73a134a9 100644
--- a/media/ffvpx/libavcodec/raw.c
+++ b/media/ffvpx/libavcodec/raw.c
@@ -31,6 +31,7 @@
 const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUV420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
     { AV_PIX_FMT_YUV420P, MKTAG('I', 'Y', 'U', 'V') },
+    { AV_PIX_FMT_YUV420P, MKTAG('y', 'v', '1', '2') },
     { AV_PIX_FMT_YUV420P, MKTAG('Y', 'V', '1', '2') },
     { AV_PIX_FMT_YUV410P, MKTAG('Y', 'U', 'V', '9') },
     { AV_PIX_FMT_YUV410P, MKTAG('Y', 'V', 'U', '9') },
@@ -187,6 +188,14 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_GBRP16LE,     MKTAG('G', '3', 00 , 16 ) },
     { AV_PIX_FMT_GBRP16BE,     MKTAG(16 , 00 , '3', 'G') },
 
+    { AV_PIX_FMT_GBRAP,        MKTAG('G', '4', 00 ,  8 ) },
+    { AV_PIX_FMT_GBRAP10LE,    MKTAG('G', '4', 00 , 10 ) },
+    { AV_PIX_FMT_GBRAP10BE,    MKTAG(10 , 00 , '4', 'G') },
+    { AV_PIX_FMT_GBRAP12LE,    MKTAG('G', '4', 00 , 12 ) },
+    { AV_PIX_FMT_GBRAP12BE,    MKTAG(12 , 00 , '4', 'G') },
+    { AV_PIX_FMT_GBRAP16LE,    MKTAG('G', '4', 00 , 16 ) },
+    { AV_PIX_FMT_GBRAP16BE,    MKTAG(16 , 00 , '4', 'G') },
+
     { AV_PIX_FMT_XYZ12LE,      MKTAG('X', 'Y', 'Z' , 36 ) },
     { AV_PIX_FMT_XYZ12BE,      MKTAG(36 , 'Z' , 'Y', 'X') },
 
@@ -224,6 +233,7 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_ABGR,    MKTAG('A', 'B', 'G', 'R') },
     { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') },
     { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') },
+    { AV_PIX_FMT_RGBA64BE,MKTAG('b', '6', '4', 'a') },
 
     /* vlc */
     { AV_PIX_FMT_YUV410P,     MKTAG('I', '4', '1', '0') },
diff --git a/media/ffvpx/libavcodec/utils.c b/media/ffvpx/libavcodec/utils.c
index f7adb525f..87de15fc6 100644
--- a/media/ffvpx/libavcodec/utils.c
+++ b/media/ffvpx/libavcodec/utils.c
@@ -724,6 +724,9 @@ int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags
 {
     int ret;
 
+    if (avctx->hw_frames_ctx)
+        return av_hwframe_get_buffer(avctx->hw_frames_ctx, frame, 0);
+
     if ((ret = update_frame_pool(avctx, frame)) < 0)
         return ret;
 
@@ -765,7 +768,12 @@ int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
     };
 
     if (pkt) {
+        frame->pts = pkt->pts;
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
         frame->pkt_pts = pkt->pts;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         av_frame_set_pkt_pos     (frame, pkt->pos);
         av_frame_set_pkt_duration(frame, pkt->duration);
         av_frame_set_pkt_size    (frame, pkt->size);
@@ -784,8 +792,19 @@ int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
             }
         }
         add_metadata_from_side_data(pkt, frame);
+
+        if (pkt->flags & AV_PKT_FLAG_DISCARD) {
+            frame->flags |= AV_FRAME_FLAG_DISCARD;
+        } else {
+            frame->flags = (frame->flags & ~AV_FRAME_FLAG_DISCARD);
+        }
     } else {
+        frame->pts = AV_NOPTS_VALUE;
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
         frame->pkt_pts = AV_NOPTS_VALUE;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         av_frame_set_pkt_pos     (frame, -1);
         av_frame_set_pkt_duration(frame, 0);
         av_frame_set_pkt_size    (frame, -1);
@@ -991,6 +1010,7 @@ int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, v
         if (ret)
             ret[i] = r;
     }
+    emms_c();
     return 0;
 }
 
@@ -1003,6 +1023,7 @@ int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2,
         if (ret)
             ret[i] = r;
     }
+    emms_c();
     return 0;
 }
 
@@ -1113,6 +1134,8 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
         av_freep(&avctx->internal->hwaccel_priv_data);
         avctx->hwaccel = NULL;
 
+        av_buffer_unref(&avctx->hw_frames_ctx);
+
         ret = avctx->get_format(avctx, choices);
 
         desc = av_pix_fmt_desc_get(ret);
@@ -1128,6 +1151,16 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
             break;
 #endif
 
+        if (avctx->hw_frames_ctx) {
+            AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            if (hw_frames_ctx->format != ret) {
+                av_log(avctx, AV_LOG_ERROR, "Format returned from get_buffer() "
+                       "does not match the format of provided AVHWFramesContext\n");
+                ret = AV_PIX_FMT_NONE;
+                break;
+            }
+        }
+
         if (!setup_hwaccel(avctx, ret, desc->name))
             break;
 
@@ -1389,10 +1422,9 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         avctx->thread_count = 1;
 
     if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
-        av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
+        av_log(avctx, AV_LOG_WARNING, "The maximum value for lowres supported by the decoder is %d\n",
                avctx->codec->max_lowres);
-        ret = AVERROR(EINVAL);
-        goto free_and_end;
+        avctx->lowres = avctx->codec->max_lowres;
     }
 
 #if FF_API_VISMV
@@ -1962,6 +1994,8 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
     ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
     av_assert0(ret <= 0);
 
+    emms_c();
+
     if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
         needs_realloc = 0;
         if (user_pkt.data) {
@@ -1999,7 +2033,6 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
     if (ret < 0 || !*got_packet_ptr)
         av_packet_unref(avpkt);
 
-    emms_c();
     return ret;
 }
 
@@ -2023,7 +2056,7 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
  * which case the output will as well.
  *
  * @param pts the pts field of the decoded AVPacket, as passed through
- * AVFrame.pkt_pts
+ * AVFrame.pts
  * @param dts the dts field of the decoded AVPacket
  * @return one of the input values, may be AV_NOPTS_VALUE
  */
@@ -2248,7 +2281,9 @@ fail:
             if(ret == tmp.size)
                 ret = avpkt->size;
         }
-
+        if (picture->flags & AV_FRAME_FLAG_DISCARD) {
+            *got_picture_ptr = 0;
+        }
         if (*got_picture_ptr) {
             if (!avctx->refcounted_frames) {
                 int err = unrefcount_frame(avci, picture);
@@ -2259,7 +2294,7 @@ fail:
             avctx->frame_number++;
             av_frame_set_best_effort_timestamp(picture,
                                                guess_correct_pts(avctx,
-                                                                 picture->pkt_pts,
+                                                                 picture->pts,
                                                                  picture->pkt_dts));
         } else
             av_frame_unref(picture);
@@ -2332,7 +2367,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             avctx->frame_number++;
             av_frame_set_best_effort_timestamp(frame,
                                                guess_correct_pts(avctx,
-                                                                 frame->pkt_pts,
+                                                                 frame->pts,
                                                                  frame->pkt_dts));
             if (frame->format == AV_SAMPLE_FMT_NONE)
                 frame->format = avctx->sample_fmt;
@@ -2353,6 +2388,13 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             skip_reason = AV_RL8(side + 8);
             discard_reason = AV_RL8(side + 9);
         }
+
+        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            avctx->internal->skip_samples -= frame->nb_samples;
+            *got_frame_ptr = 0;
+        }
+
         if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
             !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
             if(frame->nb_samples <= avctx->internal->skip_samples){
@@ -2367,8 +2409,14 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
                     int64_t diff_ts = av_rescale_q(avctx->internal->skip_samples,
                                                    (AVRational){1, avctx->sample_rate},
                                                    avctx->pkt_timebase);
+                    if(frame->pts!=AV_NOPTS_VALUE)
+                        frame->pts += diff_ts;
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
                     if(frame->pkt_pts!=AV_NOPTS_VALUE)
                         frame->pkt_pts += diff_ts;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
                     if(frame->pkt_dts!=AV_NOPTS_VALUE)
                         frame->pkt_dts += diff_ts;
                     if (av_frame_get_pkt_duration(frame) >= diff_ts)
@@ -2432,6 +2480,12 @@ fail:
 
     av_assert0(ret <= avpkt->size);
 
+    if (!avci->showed_multi_packet_warning &&
+        ret >= 0 && ret != avpkt->size && !(avctx->codec->capabilities & AV_CODEC_CAP_SUBFRAMES)) {
+            av_log(avctx, AV_LOG_WARNING, "Multiple frames in a packet.\n");
+        avci->showed_multi_packet_warning = 1;
+    }
+
     return ret;
 }
 
@@ -2784,6 +2838,9 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke
     if (avctx->internal->draining)
         return AVERROR_EOF;
 
+    if (avpkt && !avpkt->size && avpkt->data)
+        return AVERROR(EINVAL);
+
     if (!avpkt || !avpkt->size) {
         avctx->internal->draining = 1;
         avpkt = NULL;
@@ -2832,7 +2889,14 @@ int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr
     if (avctx->codec->receive_frame) {
         if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
             return AVERROR_EOF;
-        return avctx->codec->receive_frame(avctx, frame);
+        ret = avctx->codec->receive_frame(avctx, frame);
+        if (ret >= 0) {
+            if (av_frame_get_best_effort_timestamp(frame) == AV_NOPTS_VALUE) {
+                av_frame_set_best_effort_timestamp(frame,
+                    guess_correct_pts(avctx, frame->pts, frame->pkt_dts));
+            }
+        }
+        return ret;
     }
 
     // Emulation via old API.
@@ -3190,7 +3254,21 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
                                 av_get_colorspace_name(enc->colorspace));
             }
 
-            if (av_log_get_level() >= AV_LOG_DEBUG &&
+            if (enc->field_order != AV_FIELD_UNKNOWN) {
+                const char *field_order = "progressive";
+                if (enc->field_order == AV_FIELD_TT)
+                    field_order = "top first";
+                else if (enc->field_order == AV_FIELD_BB)
+                    field_order = "bottom first";
+                else if (enc->field_order == AV_FIELD_TB)
+                    field_order = "top coded first (swapped)";
+                else if (enc->field_order == AV_FIELD_BT)
+                    field_order = "bottom coded first (swapped)";
+
+                av_strlcatf(detail, sizeof(detail), "%s, ", field_order);
+            }
+
+            if (av_log_get_level() >= AV_LOG_VERBOSE &&
                 enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED)
                 av_strlcatf(detail, sizeof(detail), "%s, ",
                             av_chroma_location_name(enc->chroma_sample_location));
@@ -3259,6 +3337,14 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
             && enc->bits_per_raw_sample != av_get_bytes_per_sample(enc->sample_fmt) * 8)
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      " (%d bit)", enc->bits_per_raw_sample);
+        if (av_log_get_level() >= AV_LOG_VERBOSE) {
+            if (enc->initial_padding)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", delay %d", enc->initial_padding);
+            if (enc->trailing_padding)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", padding %d", enc->trailing_padding);
+        }
         break;
     case AVMEDIA_TYPE_DATA:
         if (av_log_get_level() >= AV_LOG_DEBUG) {
@@ -3416,6 +3502,8 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
         return 32;
     case AV_CODEC_ID_PCM_F64BE:
     case AV_CODEC_ID_PCM_F64LE:
+    case AV_CODEC_ID_PCM_S64BE:
+    case AV_CODEC_ID_PCM_S64LE:
         return 64;
     default:
         return 0;
@@ -3433,6 +3521,7 @@ enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be)
         [AV_SAMPLE_FMT_U8P ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
         [AV_SAMPLE_FMT_S16P] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
         [AV_SAMPLE_FMT_S32P] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_S64P] = { AV_CODEC_ID_PCM_S64LE, AV_CODEC_ID_PCM_S64BE },
         [AV_SAMPLE_FMT_FLTP] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
         [AV_SAMPLE_FMT_DBLP] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
     };
@@ -4104,14 +4193,15 @@ int avcodec_parameters_from_context(AVCodecParameters *par,
         par->video_delay         = codec->has_b_frames;
         break;
     case AVMEDIA_TYPE_AUDIO:
-        par->format          = codec->sample_fmt;
-        par->channel_layout  = codec->channel_layout;
-        par->channels        = codec->channels;
-        par->sample_rate     = codec->sample_rate;
-        par->block_align     = codec->block_align;
-        par->frame_size      = codec->frame_size;
-        par->initial_padding = codec->initial_padding;
-        par->seek_preroll    = codec->seek_preroll;
+        par->format           = codec->sample_fmt;
+        par->channel_layout   = codec->channel_layout;
+        par->channels         = codec->channels;
+        par->sample_rate      = codec->sample_rate;
+        par->block_align      = codec->block_align;
+        par->frame_size       = codec->frame_size;
+        par->initial_padding  = codec->initial_padding;
+        par->trailing_padding = codec->trailing_padding;
+        par->seek_preroll     = codec->seek_preroll;
         break;
     case AVMEDIA_TYPE_SUBTITLE:
         par->width  = codec->width;
@@ -4158,15 +4248,16 @@ int avcodec_parameters_to_context(AVCodecContext *codec,
         codec->has_b_frames           = par->video_delay;
         break;
     case AVMEDIA_TYPE_AUDIO:
-        codec->sample_fmt      = par->format;
-        codec->channel_layout  = par->channel_layout;
-        codec->channels        = par->channels;
-        codec->sample_rate     = par->sample_rate;
-        codec->block_align     = par->block_align;
-        codec->frame_size      = par->frame_size;
-        codec->delay           =
-        codec->initial_padding = par->initial_padding;
-        codec->seek_preroll    = par->seek_preroll;
+        codec->sample_fmt       = par->format;
+        codec->channel_layout   = par->channel_layout;
+        codec->channels         = par->channels;
+        codec->sample_rate      = par->sample_rate;
+        codec->block_align      = par->block_align;
+        codec->frame_size       = par->frame_size;
+        codec->delay            =
+        codec->initial_padding  = par->initial_padding;
+        codec->trailing_padding = par->trailing_padding;
+        codec->seek_preroll     = par->seek_preroll;
         break;
     case AVMEDIA_TYPE_SUBTITLE:
         codec->width  = par->width;
diff --git a/media/ffvpx/libavcodec/version.h b/media/ffvpx/libavcodec/version.h
index 4f6423b15..ec8837a4e 100644
--- a/media/ffvpx/libavcodec/version.h
+++ b/media/ffvpx/libavcodec/version.h
@@ -28,7 +28,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  57
-#define LIBAVCODEC_VERSION_MINOR  48
+#define LIBAVCODEC_VERSION_MINOR  64
 #define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/media/ffvpx/libavcodec/vp9.c b/media/ffvpx/libavcodec/vp9.c
index 3b721495d..1aab6ba03 100644
--- a/media/ffvpx/libavcodec/vp9.c
+++ b/media/ffvpx/libavcodec/vp9.c
@@ -3991,7 +3991,12 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
         }
         if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
             return res;
+        ((AVFrame *)frame)->pts = pkt->pts;
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
         ((AVFrame *)frame)->pkt_pts = pkt->pts;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         ((AVFrame *)frame)->pkt_dts = pkt->dts;
         for (i = 0; i < 8; i++) {
             if (s->next_refs[i].f->buf[0])
diff --git a/media/ffvpx/libavcodec/x86/h264_i386.h b/media/ffvpx/libavcodec/x86/h264_i386.h
index 4dfbc3093..19cd12838 100644
--- a/media/ffvpx/libavcodec/x86/h264_i386.h
+++ b/media/ffvpx/libavcodec/x86/h264_i386.h
@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
         "movl %7, %%ecx                         \n\t"
-        "add  %1, %%"REG_c"                     \n\t"
+        "add  %1, %%"FF_REG_c"                  \n\t"
         "movl %%ecx, (%0)                       \n\t"
 
         "test $1, %4                            \n\t"
         " jnz 5f                                \n\t"
 
-        "add"OPSIZE"  $4, %2                    \n\t"
+        "add"FF_OPSIZE"  $4, %2                 \n\t"
 
         "4:                                     \n\t"
         "add  $1, %1                            \n\t"
@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         " jb 3b                                 \n\t"
         "mov  %2, %0                            \n\t"
         "movl %7, %%ecx                         \n\t"
-        "add  %1, %%"REG_c"                     \n\t"
+        "add  %1, %%"FF_REG_c"                  \n\t"
         "movl %%ecx, (%0)                       \n\t"
         "5:                                     \n\t"
         "add  %9, %k0                           \n\t"
@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end))
           TABLES_ARG
-        : "%"REG_c, "memory"
+        : "%"FF_REG_c, "memory"
     );
     return coeff_count;
 }
@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "test $1, %4                            \n\t"
         " jnz 5f                                \n\t"
 
-        "add"OPSIZE"  $4, %2                    \n\t"
+        "add"FF_OPSIZE"  $4, %2                 \n\t"
 
         "4:                                     \n\t"
         "add $1, %6                             \n\t"
@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end)),
           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
-        : "%"REG_c, "memory"
+        : "%"FF_REG_c, "memory"
     );
     return coeff_count;
 }
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init.c b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
index 469a66171..cc781a009 100644
--- a/media/ffvpx/libavcodec/x86/vp9dsp_init.c
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
@@ -114,6 +114,8 @@ itxfm_func(idct, idct, 32, sse2);
 itxfm_func(idct, idct, 32, ssse3);
 itxfm_func(idct, idct, 32, avx);
 itxfm_func(iwht, iwht, 4, mmx);
+itxfm_func(idct, idct, 16, avx2);
+itxfm_func(idct, idct, 32, avx2);
 
 #undef itxfm_func
 #undef itxfm_funcs
@@ -124,6 +126,8 @@ void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
 void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
                                                     int E, int I, int H)
 
+lpf_funcs(4, 8, mmxext);
+lpf_funcs(8, 8, mmxext);
 lpf_funcs(16, 16, sse2);
 lpf_funcs(16, 16, ssse3);
 lpf_funcs(16, 16, avx);
@@ -279,6 +283,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
     }
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
+        dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext;
+        dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext;
+        dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext;
+        dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext;
         init_subpel2(4, 0, 4, put, 8, mmxext);
         init_subpel2(4, 1, 4, avg, 8, mmxext);
         init_fpel_func(4, 1,  4, avg, _8, mmxext);
@@ -382,6 +390,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
         init_fpel_func(0, 1, 64, avg, _8, avx2);
         if (ARCH_X86_64) {
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2;
+            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2;
             init_subpel3_32_64(0, put, 8, avx2);
             init_subpel3_32_64(1, avg, 8, avx2);
 #endif
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm.asm b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
index 6d5008e33..57d6d353b 100644
--- a/media/ffvpx/libavcodec/x86/vp9itxfm.asm
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
@@ -24,36 +24,36 @@
 %include "libavutil/x86/x86util.asm"
 %include "vp9itxfm_template.asm"
 
-SECTION_RODATA
+SECTION_RODATA 32
 
 %macro VP9_IDCT_COEFFS 2-3 0
 const pw_m%1_%2
-times 4 dw -%1,  %2
+times 8 dw -%1,  %2
 const pw_%2_%1
-times 4 dw  %2,  %1
+times 8 dw  %2,  %1
 
 %if %3 == 1
 const pw_m%2_m%1
-times 4 dw -%2, -%1
+times 8 dw -%2, -%1
 %if %1 != %2
 const pw_m%2_%1
-times 4 dw -%2,  %1
+times 8 dw -%2,  %1
 const pw_%1_%2
-times 4 dw  %1,  %2
+times 8 dw  %1,  %2
 %endif
 %endif
 
 %if %1 < 11585
-pw_m%1x2:   times 8 dw -%1*2
+pw_m%1x2:   times 16 dw -%1*2
 %elif %1 > 11585
-pw_%1x2:    times 8 dw  %1*2
+pw_%1x2:    times 16 dw  %1*2
 %else
 const pw_%1x2
-times 8 dw %1*2
+times 16 dw %1*2
 %endif
 
 %if %2 != %1
-pw_%2x2:    times 8 dw  %2*2
+pw_%2x2:    times 16 dw  %2*2
 %endif
 %endmacro
 
@@ -127,16 +127,33 @@ SECTION .text
 %endmacro
 
 %macro VP9_STORE_2X 5-6 dstq ; reg1, reg2, tmp1, tmp2, zero, dst
+%if mmsize == 32
+    pmovzxbw           m%3, [%6]
+    pmovzxbw           m%4, [%6+strideq]
+%else
     movh               m%3, [%6]
     movh               m%4, [%6+strideq]
     punpcklbw          m%3, m%5
     punpcklbw          m%4, m%5
+%endif
     paddw              m%3, m%1
     paddw              m%4, m%2
+%if mmsize == 32
+    packuswb           m%3, m%4
+    ; Intel...
+    vpermq             m%3, m%3, q3120
+    mova              [%6], xm%3
+    vextracti128 [%6+strideq], m%3, 1
+%elif mmsize == 16
+    packuswb           m%3, m%4
+    movh              [%6], m%3
+    movhps    [%6+strideq], m%3
+%else
     packuswb           m%3, m%5
     packuswb           m%4, m%5
     movh              [%6], m%3
     movh      [%6+strideq], m%4
+%endif
 %endmacro
 
 %macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
@@ -1421,6 +1438,180 @@ VP9_IDCT_IDCT_16x16_ADD_XMM sse2
 VP9_IDCT_IDCT_16x16_ADD_XMM ssse3
 VP9_IDCT_IDCT_16x16_ADD_XMM avx
 
+%macro VP9_IDCT16_YMM_1D 0
+    VP9_UNPACK_MULSUB_2W_4X  1,  15, 16305,  1606, [pd_8192], 0, 4 ; t8,  t15
+    VP9_UNPACK_MULSUB_2W_4X  9,   7, 10394, 12665, [pd_8192], 0, 4 ; t9,  t14
+
+    SUMSUB_BA            w,  9,   1, 0      ; t8,  t9
+    SUMSUB_BA            w,  7,  15, 0      ; t15, t14
+
+    VP9_UNPACK_MULSUB_2W_4X 15,   1, 15137,  6270, [pd_8192], 0, 4 ; t9,  t14
+
+    VP9_UNPACK_MULSUB_2W_4X  5,  11, 14449,  7723, [pd_8192], 0, 4 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X 13,   3,  4756, 15679, [pd_8192], 0, 4 ; t11, t12
+
+    SUMSUB_BA            w,  5,  13, 0      ; t11, t10
+    SUMSUB_BA            w, 11,   3, 0      ; t12, t13
+
+    VP9_UNPACK_MULSUB_2W_4X  3,  13, 6270, m15137, [pd_8192], 0, 4 ; t10, t13
+
+    SUMSUB_BA            w,  5,   9, 0      ; t8,  t11
+    SUMSUB_BA            w,  3,  15, 0      ; t9,  t10
+    SUMSUB_BA            w, 11,   7, 0      ; t15, t12
+    SUMSUB_BA            w, 13,   1, 0      ; t14, t13
+
+    SUMSUB_BA            w, 15,   1, 0
+    SUMSUB_BA            w,  9,   7, 0
+    pmulhrsw            m1, [pw_11585x2]    ; t10
+    pmulhrsw            m7, [pw_11585x2]    ; t11
+    pmulhrsw            m9, [pw_11585x2]    ; t12
+    pmulhrsw           m15, [pw_11585x2]    ; t13
+
+    ; even (tx8x8)
+    mova                m4, [blockq+128]
+    mova      [blockq+128], m5
+    VP9_UNPACK_MULSUB_2W_4X   4,  12, 15137,  6270, [pd_8192], 0, 5 ; t2,  t3
+    VP9_UNPACK_MULSUB_2W_4X   2,  14, 16069,  3196, [pd_8192], 0, 5 ; t4,  t7
+    VP9_UNPACK_MULSUB_2W_4X  10,   6,  9102, 13623, [pd_8192], 0, 5 ; t5,  t6
+    mova                m0, [blockq+  0]
+    SUMSUB_BA            w,   8,   0, 5
+    pmulhrsw            m8, [pw_11585x2]    ; t0
+    pmulhrsw            m0, [pw_11585x2]    ; t1
+
+    SUMSUB_BA            w,  10,   2, 5     ; t4,  t5
+    SUMSUB_BA            w,   6,  14, 5     ; t7,  t6
+    SUMSUB_BA            w,  12,   8, 5     ; t0,  t3
+    SUMSUB_BA            w,   4,   0, 5     ; t1,  t2
+
+    SUMSUB_BA            w,   2,  14, 5
+    pmulhrsw           m14, [pw_11585x2]    ; t5
+    pmulhrsw            m2, [pw_11585x2]    ; t6
+
+    SUMSUB_BA            w,   6,  12, 5     ; t0,  t7
+    SUMSUB_BA            w,   2,   4, 5     ; t1,  t6
+    SUMSUB_BA            w,  14,   0, 5     ; t2,  t5
+    SUMSUB_BA            w,  10,   8, 5     ; t3,  t4
+
+    ; final stage
+    SUMSUB_BA            w, 11,  6,  5      ; out0, out15
+    SUMSUB_BA            w, 13,  2,  5      ; out1, out14
+    SUMSUB_BA            w, 15, 14,  5      ; out2, out13
+    SUMSUB_BA            w,  9, 10,  5      ; out3, out12
+    SUMSUB_BA            w,  7,  8,  5      ; out4, out11
+    SUMSUB_BA            w,  1,  0,  5      ; out5, out10
+    SUMSUB_BA            w,  3,  4,  5      ; out6, out9
+    mova                m5, [blockq+128]
+    mova      [blockq+192], m3
+    SUMSUB_BA            w,  5, 12,  3      ; out7, out8
+
+    SWAP  0, 11,  8, 12, 10
+    SWAP  1, 13, 14,  2, 15,  6,  3,  9,  4,  7,  5
+%endmacro
+
+; this is almost identical to VP9_STORE_2X, but it does two rows
+; for slightly improved interleaving, and it omits vpermq since the
+; input is DC so all values are identical
+%macro VP9_STORE_YMM_DC_4X 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero
+    mova              xm%2, [dstq]
+    mova              xm%4, [dstq+strideq*2]
+    vinserti128        m%2, m%2, [dstq+strideq], 1
+    vinserti128        m%4, m%4, [dstq+stride3q], 1
+    punpckhbw          m%3, m%2, m%6
+    punpcklbw          m%2, m%6
+    punpckhbw          m%5, m%4, m%6
+    punpcklbw          m%4, m%6
+    paddw              m%3, m%1
+    paddw              m%2, m%1
+    paddw              m%5, m%1
+    paddw              m%4, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova            [dstq], xm%2
+    mova        [dstq+strideq*2], xm%4
+    vextracti128  [dstq+strideq], m%2, 1
+    vextracti128 [dstq+stride3q], m%4, 1
+%endmacro
+
+%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_idct_idct_16x16_add, 4, 4, 16, dst, stride, block, eob
+    cmp eobd, 1 ; faster path for when only DC is set
+    jg .idctfull
+
+    ; dc-only
+    mova                m1, [pw_11585x2]
+    vpbroadcastw        m0, [blockq]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    pxor                m5, m5
+    pmulhrsw            m0, [pw_512]
+    movd          [blockq], xm5
+
+    DEFINE_ARGS dst, stride, stride3, cnt
+    mov               cntd, 4
+    lea           stride3q, [strideq*3]
+.loop_dc:
+    VP9_STORE_YMM_DC_4X  0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+4*strideq]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+    DEFINE_ARGS dst, stride, block, eob
+.idctfull:
+    mova                m1, [blockq+ 32]
+    mova                m2, [blockq+ 64]
+    mova                m3, [blockq+ 96]
+    mova                m5, [blockq+160]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    mova                m8, [blockq+256]
+    mova                m9, [blockq+288]
+    mova               m10, [blockq+320]
+    mova               m11, [blockq+352]
+    mova               m12, [blockq+384]
+    mova               m13, [blockq+416]
+    mova               m14, [blockq+448]
+    mova               m15, [blockq+480]
+
+    VP9_IDCT16_YMM_1D
+    TRANSPOSE16x16W      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+                         [blockq+192], [blockq+128], 1
+    mova      [blockq+  0], m0
+    VP9_IDCT16_YMM_1D
+
+    mova      [blockq+224], m7
+    mova      [blockq+480], m15
+    pxor               m15, m15
+
+    ; store
+    VP9_IDCT8_WRITEx2    0,  1, 6, 7, 15, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    2,  3, 6, 7, 15, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    4,  5, 6, 7, 15, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    mova                m6, [blockq+192]
+    mova                m7, [blockq+224]
+    SWAP                 0, 15
+    mova               m15, [blockq+480]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, 0, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    8,  9, 1, 2, 0, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   10, 11, 1, 2, 0, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   12, 13, 1, 2, 0, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2   14, 15, 1, 2, 0, [pw_512], 6
+    lea               dstq, [dstq+2*strideq]
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endif
+
 ;---------------------------------------------------------------------------------------------
 ; void vp9_iadst_iadst_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
 ;---------------------------------------------------------------------------------------------
@@ -1801,7 +1992,12 @@ IADST16_FN iadst, IADST16, iadst, IADST16, avx
 ;---------------------------------------------------------------------------------------------
 
 %macro VP9_IDCT32_1D 2-3 32 ; src, pass, nnzc
-%assign %%str 16*%2*%2
+%if %2 == 1
+%assign %%str mmsize
+%else
+%assign %%str 64
+%endif
+
     ; first do t0-15, this can be done identical to idct16x16
     VP9_IDCT16_1D_START %1, %3/2, 64*2, tmpq, 2*%%str, 1
 
@@ -2096,17 +2292,125 @@ IADST16_FN iadst, IADST16, iadst, IADST16, avx
     mova                 m3, [tmpq+20*%%str] ; t5
     mova                m13, [tmpq+24*%%str] ; t6
 
-    SUMSUB_BA             w,  6,  8,  10
+    SUMSUB_BA             w,  6,  8, 10
     mova    [tmpq+ 3*%%str], m8              ; t15
-    mova                m10, [tmpq+28*%%str] ; t7
     SUMSUB_BA             w,  0,  9,  8
     SUMSUB_BA             w, 15, 12,  8
     SUMSUB_BA             w, 14, 11,  8
     SUMSUB_BA             w,  1,  2,  8
     SUMSUB_BA             w,  7,  3,  8
     SUMSUB_BA             w,  5, 13,  8
+    mova                m10, [tmpq+28*%%str] ; t7
     SUMSUB_BA             w,  4, 10,  8
+%if cpuflag(avx2)
+    ; the "shitty" about this idct is that the final pass does the outermost
+    ; interleave sumsubs (t0/31, t1/30, etc) but the tN for the 16x16 need
+    ; to be sequential, which means I need to load/store half of the sumsub
+    ; intermediates back to/from memory to get a 16x16 transpose going...
+    ; This would be easier if we had more (e.g. 32) YMM regs here.
+    mova    [tmpq+ 7*%%str], m9
+    mova    [tmpq+11*%%str], m12
+    mova    [tmpq+15*%%str], m11
+    mova    [tmpq+19*%%str], m2
+    mova    [tmpq+23*%%str], m3
+    mova    [tmpq+27*%%str], m13
+    mova    [tmpq+31*%%str], m10
+    mova    [tmpq+12*%%str], m5
+
+    mova                m13, [tmpq+30*%%str] ; t8
+    mova                m12, [tmpq+26*%%str] ; t9
+    mova                m11, [tmpq+22*%%str] ; t10
+    mova                m10, [tmpq+18*%%str] ; t11
+    mova                 m9, [tmpq+17*%%str] ; t20
+    mova                 m8, [tmpq+ 1*%%str] ; t21
+    mova                 m3, [tmpq+25*%%str] ; t22
+    mova                 m2, [tmpq+ 5*%%str] ; t23
+
+    SUMSUB_BA             w,  9, 10, 5
+    SUMSUB_BA             w,  8, 11, 5
+    SUMSUB_BA             w,  3, 12, 5
+    SUMSUB_BA             w,  2, 13, 5
+    mova    [tmpq+ 1*%%str], m10
+    mova    [tmpq+ 5*%%str], m11
+    mova    [tmpq+17*%%str], m12
+    mova    [tmpq+25*%%str], m13
+
+    mova                m13, [tmpq+14*%%str] ; t12
+    mova                m12, [tmpq+10*%%str] ; t13
+    mova                m11, [tmpq+ 9*%%str] ; t18
+    mova                m10, [tmpq+13*%%str] ; t19
+
+    SUMSUB_BA             w, 11, 12, 5
+    SUMSUB_BA             w, 10, 13, 5
+    mova    [tmpq+ 9*%%str], m13
+    mova    [tmpq+13*%%str], m12
+    mova    [tmpq+10*%%str], m10
+    mova    [tmpq+14*%%str], m11
 
+    mova                m13, [tmpq+ 6*%%str] ; t14
+    mova                m12, [tmpq+ 2*%%str] ; t15
+    mova                m11, [tmpq+21*%%str] ; t16
+    mova                m10, [tmpq+29*%%str] ; t17
+    SUMSUB_BA             w, 11, 12, 5
+    SUMSUB_BA             w, 10, 13, 5
+    mova    [tmpq+21*%%str], m12
+    mova    [tmpq+29*%%str], m13
+    mova                m12, [tmpq+10*%%str]
+    mova                m13, [tmpq+14*%%str]
+
+    TRANSPOSE16x16W       6,  0, 15, 14,  1,  7,  5,  4, \
+                          2,  3,  8,  9, 12, 13, 10, 11, \
+            [tmpq+12*%%str], [tmpq+ 8*%%str], 1
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+ 4*%%str], m15
+    mova    [tmpq+ 6*%%str], m14
+    mova    [tmpq+10*%%str], m7
+    mova    [tmpq+12*%%str], m5
+    mova    [tmpq+14*%%str], m4
+    mova    [tmpq+16*%%str], m2
+    mova    [tmpq+18*%%str], m3
+    mova    [tmpq+20*%%str], m8
+    mova    [tmpq+22*%%str], m9
+    mova    [tmpq+24*%%str], m12
+    mova    [tmpq+26*%%str], m13
+    mova    [tmpq+28*%%str], m10
+    mova    [tmpq+30*%%str], m11
+
+    mova                 m0, [tmpq+21*%%str]
+    mova                 m1, [tmpq+29*%%str]
+    mova                 m2, [tmpq+13*%%str]
+    mova                 m3, [tmpq+ 9*%%str]
+    mova                 m4, [tmpq+ 1*%%str]
+    mova                 m5, [tmpq+ 5*%%str]
+    mova                 m7, [tmpq+25*%%str]
+    mova                 m8, [tmpq+31*%%str]
+    mova                 m9, [tmpq+27*%%str]
+    mova                m10, [tmpq+23*%%str]
+    mova                m11, [tmpq+19*%%str]
+    mova                m12, [tmpq+15*%%str]
+    mova                m13, [tmpq+11*%%str]
+    mova                m14, [tmpq+ 7*%%str]
+    mova                m15, [tmpq+ 3*%%str]
+    TRANSPOSE16x16W       0,  1,  2,  3,  4,  5,  6,  7, \
+                          8,  9, 10, 11, 12, 13, 14, 15, \
+            [tmpq+17*%%str], [tmpq+ 9*%%str], 1
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 3*%%str], m1
+    mova    [tmpq+ 5*%%str], m2
+    mova    [tmpq+ 7*%%str], m3
+    mova    [tmpq+11*%%str], m5
+    mova    [tmpq+13*%%str], m6
+    mova    [tmpq+15*%%str], m7
+    mova    [tmpq+17*%%str], m8
+    mova    [tmpq+19*%%str], m9
+    mova    [tmpq+21*%%str], m10
+    mova    [tmpq+23*%%str], m11
+    mova    [tmpq+25*%%str], m12
+    mova    [tmpq+27*%%str], m13
+    mova    [tmpq+29*%%str], m14
+    mova    [tmpq+31*%%str], m15
+%else ; !avx2
     TRANSPOSE8x8W         6, 0, 15, 14, 1, 7, 5, 4, 8
     mova    [tmpq+ 0*%%str], m6
     mova    [tmpq+ 4*%%str], m0
@@ -2175,6 +2479,7 @@ IADST16_FN iadst, IADST16, iadst, IADST16, avx
     mova    [tmpq+22*%%str], m13
     mova    [tmpq+26*%%str], m14
     mova    [tmpq+30*%%str], m15
+%endif ; avx2
 %else
     mova                 m2, [tmpq+24*%%str] ; t6
     mova                 m3, [tmpq+28*%%str] ; t7
@@ -2623,3 +2928,106 @@ cglobal vp9_idct_idct_32x32_add, 0, 6 + ARCH_X86_64 * 3, 16, 2048, dst, stride,
 VP9_IDCT_IDCT_32x32_ADD_XMM sse2
 VP9_IDCT_IDCT_32x32_ADD_XMM ssse3
 VP9_IDCT_IDCT_32x32_ADD_XMM avx
+
+; this is almost identical to VP9_STORE_2X, but it does two rows
+; for slightly improved interleaving, and it omits vpermq since the
+; input is DC so all values are identical
+%macro VP9_STORE_YMM_DC_2X2 6 ; reg, tmp1, tmp2, tmp3, tmp4, zero
+    mova               m%2, [dstq]
+    mova               m%4, [dstq+strideq]
+    punpckhbw          m%3, m%2, m%6
+    punpcklbw          m%2, m%6
+    punpckhbw          m%5, m%4, m%6
+    punpcklbw          m%4, m%6
+    paddw              m%3, m%1
+    paddw              m%2, m%1
+    paddw              m%5, m%1
+    paddw              m%4, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova  [dstq+strideq*0], m%2
+    mova  [dstq+strideq*1], m%4
+%endmacro
+
+%if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_idct_idct_32x32_add, 4, 9, 16, 2048, dst, stride, block, eob
+    cmp eobd, 135
+    jg .idctfull
+    cmp eobd, 1
+    jg .idct16x16
+
+    ; dc-only case
+    mova                m1, [pw_11585x2]
+    vpbroadcastw        m0, [blockq]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    pxor                m5, m5
+    pmulhrsw            m0, [pw_512]
+    movd          [blockq], xm5
+
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 16
+.loop_dc:
+    VP9_STORE_YMM_DC_2X2 0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+2*strideq]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+    DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
+.idct16x16:
+    mov               tmpq, rsp
+    VP9_IDCT32_1D   blockq, 1, 16
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 2
+    sub          stride30q, stride2q        ; stride*30
+.loop2_16x16:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 16
+    add           dst_bakq, 16
+    add               tmpq, 32
+    dec               cntd
+    jg .loop2_16x16
+
+    ; at the end of the loop, m1 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 16, m1
+    RET
+
+.idctfull:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT32_1D   blockq, 1
+    add             blockq, 32
+    add               tmpq, 1024
+    dec               cntd
+    jg .loop1_full
+
+    sub             blockq, 64
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 2
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_full:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2
+    add           dst_bakq, 16
+    add               tmpq, 32
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m1 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 32, m1
+    RET
+%endif
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf.asm b/media/ffvpx/libavcodec/x86/vp9lpf.asm
index 2c4fe214d..4e7ede223 100644
--- a/media/ffvpx/libavcodec/x86/vp9lpf.asm
+++ b/media/ffvpx/libavcodec/x86/vp9lpf.asm
@@ -52,7 +52,7 @@ mask_mix48: times 8 db 0x00
 SECTION .text
 
 %macro SCRATCH 3
-%if ARCH_X86_64
+%ifdef m8
     SWAP                %1, %2
 %else
     mova              [%3], m%1
@@ -60,7 +60,7 @@ SECTION .text
 %endmacro
 
 %macro UNSCRATCH 3
-%if ARCH_X86_64
+%ifdef m8
     SWAP                %1, %2
 %else
     mova               m%1, [%3]
@@ -69,7 +69,7 @@ SECTION .text
 
 ; %1 = abs(%2-%3)
 %macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
-%if ARCH_X86_64
+%ifdef m8
     psubusb             %1, %3, %2
     psubusb             %4, %2, %3
 %else
@@ -102,7 +102,7 @@ SECTION .text
 %endmacro
 
 %macro UNPACK 4
-%if ARCH_X86_64
+%ifdef m8
     punpck%1bw          %2, %3, %4
 %else
     mova                %2, %3
@@ -112,27 +112,27 @@ SECTION .text
 
 %macro FILTER_SUBx2_ADDx2 11 ; %1=dst %2=h/l %3=cache %4=stack_off %5=sub1 %6=sub2 %7=add1
                              ; %8=add2 %9=rshift, [unpack], [unpack_is_mem_on_x86_32]
-    psubw               %3, [rsp+%4+%5*32]
-    psubw               %3, [rsp+%4+%6*32]
-    paddw               %3, [rsp+%4+%7*32]
+    psubw               %3, [rsp+%4+%5*mmsize*2]
+    psubw               %3, [rsp+%4+%6*mmsize*2]
+    paddw               %3, [rsp+%4+%7*mmsize*2]
 %ifnidn %10, ""
 %if %11 == 0
     punpck%2bw          %1, %10, m0
 %else
     UNPACK          %2, %1, %10, m0
 %endif
-    mova    [rsp+%4+%8*32], %1
+    mova [rsp+%4+%8*mmsize*2], %1
     paddw               %3, %1
 %else
-    paddw               %3, [rsp+%4+%8*32]
+    paddw               %3, [rsp+%4+%8*mmsize*2]
 %endif
     psraw               %1, %3, %9
 %endmacro
 
 ; FIXME interleave l/h better (for instruction pairing)
 %macro FILTER_INIT 9 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, filterid, mask, source
-    FILTER%7_INIT       %1, l, %3, %6 +  0
-    FILTER%7_INIT       %2, h, %4, %6 + 16
+    FILTER%7_INIT       %1, l, %3, %6 +      0
+    FILTER%7_INIT       %2, h, %4, %6 + mmsize
     packuswb            %1, %2
     MASK_APPLY          %1, %9, %8, %2
     mova                %5, %1
@@ -147,8 +147,8 @@ SECTION .text
     mova               %14, %15
 %endif
 %endif
-    FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14, %16
-    FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14, %16
+    FILTER_SUBx2_ADDx2  %1, l, %3, %6 +      0, %7, %8, %9, %10, %11, %14, %16
+    FILTER_SUBx2_ADDx2  %2, h, %4, %6 + mmsize, %7, %8, %9, %10, %11, %14, %16
     packuswb            %1, %2
 %ifnidn %13, ""
     MASK_APPLY          %1, %13, %12, %2
@@ -195,21 +195,21 @@ SECTION .text
 
 %macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
     UNPACK          %2, %1, rp3, m0                     ; p3: B->W
-    mova     [rsp+%4+0*32], %1
+    mova [rsp+%4+0*mmsize*2], %1
     paddw               %3, %1, %1                      ; p3*2
     paddw               %3, %1                          ; p3*3
     punpck%2bw          %1, m1,  m0                     ; p2: B->W
-    mova     [rsp+%4+1*32], %1
+    mova [rsp+%4+1*mmsize*2], %1
     paddw               %3, %1                          ; p3*3 + p2
     paddw               %3, %1                          ; p3*3 + p2*2
     UNPACK          %2, %1, rp1, m0                     ; p1: B->W
-    mova     [rsp+%4+2*32], %1
+    mova [rsp+%4+2*mmsize*2], %1
     paddw               %3, %1                          ; p3*3 + p2*2 + p1
     UNPACK          %2, %1, rp0, m0                     ; p0: B->W
-    mova     [rsp+%4+3*32], %1
+    mova [rsp+%4+3*mmsize*2], %1
     paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0
     UNPACK          %2, %1, rq0, m0                     ; q0: B->W
-    mova     [rsp+%4+4*32], %1
+    mova [rsp+%4+4*mmsize*2], %1
     paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0 + q0
     paddw               %3, [pw_4]                      ; p3*3 + p2*2 + p1 + p0 + q0 + 4
     psraw               %1, %3, 3                       ; (p3*3 + p2*2 + p1 + p0 + q0 + 4) >> 3
@@ -217,24 +217,24 @@ SECTION .text
 
 %macro FILTER14_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
     punpck%2bw          %1, m2, m0                      ; p7: B->W
-    mova    [rsp+%4+ 8*32], %1
+    mova [rsp+%4+ 8*mmsize*2], %1
     psllw               %3, %1, 3                       ; p7*8
     psubw               %3, %1                          ; p7*7
     punpck%2bw          %1, m3, m0                      ; p6: B->W
-    mova    [rsp+%4+ 9*32], %1
+    mova [rsp+%4+ 9*mmsize*2], %1
     paddw               %3, %1                          ; p7*7 + p6
     paddw               %3, %1                          ; p7*7 + p6*2
     UNPACK          %2, %1, rp5, m0                     ; p5: B->W
-    mova    [rsp+%4+10*32], %1
+    mova [rsp+%4+10*mmsize*2], %1
     paddw               %3, %1                          ; p7*7 + p6*2 + p5
     UNPACK          %2, %1, rp4, m0                     ; p4: B->W
-    mova    [rsp+%4+11*32], %1
+    mova [rsp+%4+11*mmsize*2], %1
     paddw               %3, %1                          ; p7*7 + p6*2 + p5 + p4
-    paddw               %3, [rsp+%4+ 0*32]              ; p7*7 + p6*2 + p5 + p4 + p3
-    paddw               %3, [rsp+%4+ 1*32]              ; p7*7 + p6*2 + p5 + .. + p2
-    paddw               %3, [rsp+%4+ 2*32]              ; p7*7 + p6*2 + p5 + .. + p1
-    paddw               %3, [rsp+%4+ 3*32]              ; p7*7 + p6*2 + p5 + .. + p0
-    paddw               %3, [rsp+%4+ 4*32]              ; p7*7 + p6*2 + p5 + .. + p0 + q0
+    paddw               %3, [rsp+%4+ 0*mmsize*2]        ; p7*7 + p6*2 + p5 + p4 + p3
+    paddw               %3, [rsp+%4+ 1*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p2
+    paddw               %3, [rsp+%4+ 2*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p1
+    paddw               %3, [rsp+%4+ 3*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p0
+    paddw               %3, [rsp+%4+ 4*mmsize*2]        ; p7*7 + p6*2 + p5 + .. + p0 + q0
     paddw               %3, [pw_8]                      ; p7*7 + p6*2 + p5 + .. + p0 + q0 + 8
     psraw               %1, %3, 4                       ; (p7*7 + p6*2 + p5 + .. + p0 + q0 + 8) >> 4
 %endmacro
@@ -334,22 +334,24 @@ SECTION .text
 %endmacro
 
 %macro DEFINE_TRANSPOSED_P7_TO_Q7 0-1 0
-%define P3 rsp +   0 + %1
-%define P2 rsp +  16 + %1
-%define P1 rsp +  32 + %1
-%define P0 rsp +  48 + %1
-%define Q0 rsp +  64 + %1
-%define Q1 rsp +  80 + %1
-%define Q2 rsp +  96 + %1
-%define Q3 rsp + 112 + %1
-%define P7 rsp + 128 + %1
-%define P6 rsp + 144 + %1
-%define P5 rsp + 160 + %1
-%define P4 rsp + 176 + %1
-%define Q4 rsp + 192 + %1
-%define Q5 rsp + 208 + %1
-%define Q6 rsp + 224 + %1
-%define Q7 rsp + 240 + %1
+%define P3 rsp +  0*mmsize + %1
+%define P2 rsp +  1*mmsize + %1
+%define P1 rsp +  2*mmsize + %1
+%define P0 rsp +  3*mmsize + %1
+%define Q0 rsp +  4*mmsize + %1
+%define Q1 rsp +  5*mmsize + %1
+%define Q2 rsp +  6*mmsize + %1
+%define Q3 rsp +  7*mmsize + %1
+%if mmsize == 16
+%define P7 rsp +  8*mmsize + %1
+%define P6 rsp +  9*mmsize + %1
+%define P5 rsp + 10*mmsize + %1
+%define P4 rsp + 11*mmsize + %1
+%define Q4 rsp + 12*mmsize + %1
+%define Q5 rsp + 13*mmsize + %1
+%define Q6 rsp + 14*mmsize + %1
+%define Q7 rsp + 15*mmsize + %1
+%endif
 %endmacro
 
 ; ..............AB -> AAAAAAAABBBBBBBB
@@ -363,14 +365,19 @@ SECTION .text
 %endif
 %endmacro
 
-%macro LOOPFILTER 5 ; %1=v/h %2=size1 %3+%4=stack, %5=32bit stack only
+%macro LOOPFILTER 5 ; %1=v/h %2=size1 %3+%4=stack, %5=mmx/32bit stack only
+%assign %%ext 0
+%if ARCH_X86_32 || mmsize == 8
+%assign %%ext %5
+%endif
+
 %if UNIX64
-cglobal vp9_loop_filter_%1_%2_16, 5, 9, 16, %3 + %4, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 5, 9, 16, %3 + %4 + %%ext, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
 %else
 %if WIN64
-cglobal vp9_loop_filter_%1_%2_16, 4, 8, 16, %3 + %4, dst, stride, E, I, mstride, dst2, stride3, mstride3
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 4, 8, 16, %3 + %4 + %%ext, dst, stride, E, I, mstride, dst2, stride3, mstride3
 %else
-cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride, dst2, stride3, mstride3
+cglobal vp9_loop_filter_%1_%2_ %+ mmsize, 2, 6, 16, %3 + %4 + %%ext, dst, stride, mstride, dst2, stride3, mstride3
 %define Ed dword r2m
 %define Id dword r3m
 %endif
@@ -384,18 +391,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     lea              mstride3q, [mstrideq*3]
 
 %ifidn %1, h
-%if %2 > 16
+%if %2 != 16
+%if mmsize == 16
 %define movx movh
+%else
+%define movx mova
+%endif
     lea                   dstq, [dstq + 4*strideq - 4]
 %else
 %define movx movu
     lea                   dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos)
 %endif
-    lea                  dst2q, [dstq + 8*strideq]
 %else
     lea                   dstq, [dstq + 4*mstrideq]
-    lea                  dst2q, [dstq + 8*strideq]
 %endif
+    ; FIXME we shouldn't need two dts registers if mmsize == 8
+    lea                  dst2q, [dstq + 8*strideq]
 
     DEFINE_REAL_P7_TO_Q7
 
@@ -406,11 +417,11 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movx                    m3, [P4]
     movx                    m4, [P3]
     movx                    m5, [P2]
-%if ARCH_X86_64 || %2 != 16
+%if (ARCH_X86_64 && mmsize == 16) || %2 > 16
     movx                    m6, [P1]
 %endif
     movx                    m7, [P0]
-%if ARCH_X86_64
+%ifdef m8
     movx                    m8, [Q0]
     movx                    m9, [Q1]
     movx                   m10, [Q2]
@@ -502,7 +513,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movhps        [Q5], m6
     movhps        [Q7], m7
     DEFINE_TRANSPOSED_P7_TO_Q7
-%else ; %2 == 44/48/84/88
+%elif %2 > 16 ; %2 == 44/48/84/88
     punpcklbw        m0, m1
     punpcklbw        m2, m3
     punpcklbw        m4, m5
@@ -529,12 +540,31 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     mova           [Q1],  m5
     mova           [Q2],  m7
     mova           [Q3],  m3
+%else ; %2 == 4 || %2 == 8
+    SBUTTERFLY       bw, 0, 1, 6
+    SBUTTERFLY       bw, 2, 3, 6
+    SBUTTERFLY       bw, 4, 5, 6
+    mova [rsp+4*mmsize], m5
+    mova             m6, [P1]
+    SBUTTERFLY       bw, 6, 7, 5
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    TRANSPOSE4x4W     0, 2, 4, 6, 5
+    mova           [P3], m0
+    mova           [P2], m2
+    mova           [P1], m4
+    mova           [P0], m6
+    mova             m5, [rsp+4*mmsize]
+    TRANSPOSE4x4W     1, 3, 5, 7, 0
+    mova           [Q0], m1
+    mova           [Q1], m3
+    mova           [Q2], m5
+    mova           [Q3], m7
 %endif ; %2
 %endif ; x86-32/64
 %endif ; %1 == h
 
     ; calc fm mask
-%if %2 == 16
+%if %2 == 16 || mmsize == 8
 %if cpuflag(ssse3)
     pxor                m0, m0
 %endif
@@ -552,7 +582,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     mova                m0, [pb_80]
     pxor                m2, m0
     pxor                m3, m0
-%if ARCH_X86_64
+%ifdef m8
 %ifidn %1, v
     mova                m8, [P3]
     mova                m9, [P2]
@@ -613,10 +643,10 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
 
     ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
     ; calc flat8in (if not 44_16) and hev masks
-%if %2 != 44
+%if %2 != 44 && %2 != 4
     mova                m6, [pb_81]                     ; [1 1 1 1 ...] ^ 0x80
     ABSSUB_GT           m2, rp3, rp0, m6, m5            ; abs(p3 - p0) <= 1
-%if ARCH_X86_64
+%ifdef m8
     mova                m8, [pb_80]
 %define rb80 m8
 %else
@@ -625,7 +655,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     ABSSUB_GT           m1, rp2, rp0, m6, m5, rb80      ; abs(p2 - p0) <= 1
     por                 m2, m1
     ABSSUB              m4, rp1, rp0, m5                ; abs(p1 - p0)
-%if %2 == 16
+%if %2 <= 16
 %if cpuflag(ssse3)
     pxor                m0, m0
 %endif
@@ -655,8 +685,15 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
 %endif
 %else
     mova                m6, [pb_80]
+%if %2 == 44
     movd                m7, Hd
     SPLATB_MIX          m7
+%else
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
+%endif
     pxor                m7, m6
     ABSSUB              m4, rp1, rp0, m1                ; abs(p1 - p0)
     pxor                m4, m6
@@ -670,7 +707,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
 %if %2 == 16
     ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
     ; calc flat8out mask
-%if ARCH_X86_64
+%ifdef m8
     mova                m8, [P7]
     mova                m9, [P6]
 %define rp7 m8
@@ -682,7 +719,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     ABSSUB_GT           m1, rp7, rp0, m6, m5            ; abs(p7 - p0) <= 1
     ABSSUB_GT           m7, rp6, rp0, m6, m5            ; abs(p6 - p0) <= 1
     por                 m1, m7
-%if ARCH_X86_64
+%ifdef m8
     mova                m8, [P5]
     mova                m9, [P4]
 %define rp5 m8
@@ -695,7 +732,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     por                 m1, m7
     ABSSUB_GT           m7, rp4, rp0, m6, m5            ; abs(p4 - p0) <= 1
     por                 m1, m7
-%if ARCH_X86_64
+%ifdef m8
     mova                m14, [Q4]
     mova                m15, [Q5]
 %define rq4 m14
@@ -708,7 +745,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     por                 m1, m7
     ABSSUB_GT           m7, rq5, rq0, m6, m5            ; abs(q5 - q0) <= 1
     por                 m1, m7
-%if ARCH_X86_64
+%ifdef m8
     mova                m14, [Q6]
     mova                m15, [Q7]
 %define rq6 m14
@@ -738,7 +775,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
 
     ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
     ; filter2()
-%if %2 != 44
+%if %2 != 44 && %2 != 4
     mova                m6, [pb_80]                     ; already in m6 if 44_16
     SCRATCH              2, 15, rsp+%3+%4
 %if %2 == 16
@@ -756,7 +793,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     paddsb              m4, m2                          ; 3*(q0 - p0) + (p1 - q1)
     paddsb              m6, m4, [pb_4]                  ; m6: f1 = clip(f + 4, 127)
     paddsb              m4, [pb_3]                      ; m4: f2 = clip(f + 3, 127)
-%if ARCH_X86_64
+%ifdef m8
     mova                m14, [pb_10]                    ; will be reused in filter4()
 %define rb10 m14
 %else
@@ -765,8 +802,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     SRSHIFT3B_2X        m6, m4, rb10, m7                ; f1 and f2 sign byte shift by 3
     SIGN_SUB            m7, rq0, m6, m5                 ; m7 = q0 - f1
     SIGN_ADD            m1, rp0, m4, m5                 ; m1 = p0 + f2
-%if %2 != 44
-%if ARCH_X86_64
+%if %2 != 44 && %2 != 4
+%ifdef m8
     pandn               m6, m15, m3                     ;  ~mask(in) & mask(fm)
 %else
     mova                m6, [rsp+%3+%4]
@@ -787,8 +824,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     paddsb              m6, m2, [pb_4]                  ; m6:  f1 = clip(f + 4, 127)
     paddsb              m2, [pb_3]                      ; m2: f2 = clip(f + 3, 127)
     SRSHIFT3B_2X        m6, m2, rb10, m4                ; f1 and f2 sign byte shift by 3
-%if %2 != 44
-%if ARCH_X86_64
+%if %2 != 44 && %2 != 4
+%ifdef m8
     pandn               m5, m15, m3                     ;               ~mask(in) & mask(fm)
 %else
     mova                m5, [rsp+%3+%4]
@@ -815,26 +852,26 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     mova                [P1], m1
     mova                [Q1], m4
 
-%if %2 != 44
+%if %2 != 44 && %2 != 4
     UNSCRATCH            2, 15, rsp+%3+%4
 %endif
 
     ; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
     ; filter6()
-%if %2 != 44
+%if %2 != 44 && %2 != 4
     pxor                m0, m0
-%if %2 > 16
+%if %2 != 16
     pand                m3, m2
 %else
     pand                m2, m3                          ;               mask(fm) & mask(in)
-%if ARCH_X86_64
+%ifdef m8
     pandn               m3, m8, m2                      ; ~mask(out) & (mask(fm) & mask(in))
 %else
     mova                m3, [rsp+%3+%4+16]
     pandn               m3, m2
 %endif
 %endif
-%if ARCH_X86_64
+%ifdef m8
     mova               m14, [P3]
     mova                m9, [Q3]
 %define rp3 m14
@@ -882,7 +919,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     pand            m1, m2                                                              ; mask(out) & (mask(fm) & mask(in))
     mova            m2, [P7]
     mova            m3, [P6]
-%if ARCH_X86_64
+%ifdef m8
     mova            m8, [P5]
     mova            m9, [P4]
 %define rp5 m8
@@ -1008,7 +1045,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movhps [Q5],  m6
     movhps [Q7],  m7
 %endif
-%elif %2 == 44
+%elif %2 == 44 || %2 == 4
     SWAP 0, 1   ; m0 = p1
     SWAP 1, 7   ; m1 = p0
     SWAP 2, 5   ; m2 = q0
@@ -1018,6 +1055,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     SBUTTERFLY  bw, 2, 3, 4
     SBUTTERFLY  wd, 0, 2, 4
     SBUTTERFLY  wd, 1, 3, 4
+%if mmsize == 16
     movd  [P7], m0
     movd  [P3], m2
     movd  [Q0], m1
@@ -1046,6 +1084,20 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movd  [P0], m2
     movd  [Q3], m1
     movd  [Q7], m3
+%else
+    movd  [P7], m0
+    movd  [P5], m2
+    movd  [P3], m1
+    movd  [P1], m3
+    psrlq   m0, 32
+    psrlq   m2, 32
+    psrlq   m1, 32
+    psrlq   m3, 32
+    movd  [P6], m0
+    movd  [P4], m2
+    movd  [P2], m1
+    movd  [P0], m3
+%endif
 %else
     ; the following code do a transpose of 8 full lines to 16 half
     ; lines (high part). It is inlined to avoid the need of a staging area
@@ -1055,12 +1107,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     mova                    m3, [P0]
     mova                    m4, [Q0]
     mova                    m5, [Q1]
-%if ARCH_X86_64
+%ifdef m8
     mova                    m6, [Q2]
 %endif
     mova                    m7, [Q3]
     DEFINE_REAL_P7_TO_Q7
-%if ARCH_X86_64
+%ifdef m8
     SBUTTERFLY  bw,  0,  1, 8
     SBUTTERFLY  bw,  2,  3, 8
     SBUTTERFLY  bw,  4,  5, 8
@@ -1075,27 +1127,32 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     SBUTTERFLY  dq,  3,  7, 8
 %else
     SBUTTERFLY  bw,  0,  1, 6
-    mova  [rsp+64], m1
-    mova        m6, [rsp+96]
+    mova [rsp+mmsize*4], m1
+    mova        m6, [rsp+mmsize*6]
     SBUTTERFLY  bw,  2,  3, 1
     SBUTTERFLY  bw,  4,  5, 1
     SBUTTERFLY  bw,  6,  7, 1
     SBUTTERFLY  wd,  0,  2, 1
-    mova  [rsp+96], m2
-    mova        m1, [rsp+64]
+    mova [rsp+mmsize*6], m2
+    mova        m1, [rsp+mmsize*4]
     SBUTTERFLY  wd,  1,  3, 2
     SBUTTERFLY  wd,  4,  6, 2
     SBUTTERFLY  wd,  5,  7, 2
     SBUTTERFLY  dq,  0,  4, 2
     SBUTTERFLY  dq,  1,  5, 2
+%if mmsize == 16
     movh      [Q0], m1
     movhps    [Q1], m1
-    mova        m2, [rsp+96]
+%else
+    mova      [P3], m1
+%endif
+    mova        m2, [rsp+mmsize*6]
     SBUTTERFLY  dq,  2,  6, 1
     SBUTTERFLY  dq,  3,  7, 1
 %endif
     SWAP         3, 6
     SWAP         1, 4
+%if mmsize == 16
     movh      [P7], m0
     movhps    [P6], m0
     movh      [P5], m1
@@ -1104,7 +1161,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movhps    [P2], m2
     movh      [P1], m3
     movhps    [P0], m3
-%if ARCH_X86_64
+%ifdef m8
     movh      [Q0], m4
     movhps    [Q1], m4
 %endif
@@ -1114,6 +1171,15 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
     movhps    [Q5], m6
     movh      [Q6], m7
     movhps    [Q7], m7
+%else
+    mova      [P7], m0
+    mova      [P6], m1
+    mova      [P5], m2
+    mova      [P4], m3
+    mova      [P2], m5
+    mova      [P1], m6
+    mova      [P0], m7
+%endif
 %endif
 %endif
 
@@ -1137,3 +1203,9 @@ LPF_16_VH_ALL_OPTS 44,   0, 128,  0
 LPF_16_VH_ALL_OPTS 48, 256, 128, 16
 LPF_16_VH_ALL_OPTS 84, 256, 128, 16
 LPF_16_VH_ALL_OPTS 88, 256, 128, 16
+
+INIT_MMX mmxext
+LOOPFILTER v, 4,   0,  0, 0
+LOOPFILTER h, 4,   0, 64, 0
+LOOPFILTER v, 8, 128,  0, 8
+LOOPFILTER h, 8, 128, 64, 8
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
index c15437b8b..c0888170c 100644
--- a/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
+++ b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
@@ -78,7 +78,7 @@ SECTION .text
 %endif
 %endmacro
 
-; calulate p or q portion of flat8out
+; calculate p or q portion of flat8out
 %macro FLAT8OUT_HALF 0
     psubw               m4, m0                      ; q4-q0
     psubw               m5, m0                      ; q5-q0
-- 
cgit v1.2.3